cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#ifndef CUMO_COMPLEX_MACRO_KERNEL_H
|
|
2
|
+
#define CUMO_COMPLEX_MACRO_KERNEL_H
|
|
3
|
+
|
|
4
|
+
#include "float_def_kernel.h"
|
|
5
|
+
|
|
6
|
+
extern double round(double);
|
|
7
|
+
extern double log2(double);
|
|
8
|
+
extern double exp2(double);
|
|
9
|
+
extern double exp10(double);
|
|
10
|
+
|
|
11
|
+
#define r_abs(x) fabs(x)
|
|
12
|
+
#define r_sqrt(x) sqrt(x)
|
|
13
|
+
#define r_exp(x) exp(x)
|
|
14
|
+
#define r_log(x) log(x)
|
|
15
|
+
#define r_sin(x) sin(x)
|
|
16
|
+
#define r_cos(x) cos(x)
|
|
17
|
+
#define r_sinh(x) sinh(x)
|
|
18
|
+
#define r_cosh(x) cosh(x)
|
|
19
|
+
#define r_tanh(x) tanh(x)
|
|
20
|
+
#define r_atan2(y,x) atan2(y,x)
|
|
21
|
+
#define r_hypot(x,y) hypot(x,y)
|
|
22
|
+
|
|
23
|
+
#include "complex_kernel.h"
|
|
24
|
+
|
|
25
|
+
__host__ __device__ static inline dtype c_from_scomplex(scomplex x) {
|
|
26
|
+
dtype z;
|
|
27
|
+
REAL(z) = REAL(x);
|
|
28
|
+
IMAG(z) = IMAG(x);
|
|
29
|
+
return z;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
__host__ __device__ static inline dtype c_from_dcomplex(dcomplex x) {
|
|
33
|
+
dtype z;
|
|
34
|
+
REAL(z) = REAL(x);
|
|
35
|
+
IMAG(z) = IMAG(x);
|
|
36
|
+
return z;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/* --------------------------- */
|
|
40
|
+
|
|
41
|
+
#define m_zero c_zero()
|
|
42
|
+
#define m_one c_one()
|
|
43
|
+
|
|
44
|
+
//#define m_num_to_data(x) NUM2COMP(x)
|
|
45
|
+
//#define m_data_to_num(x) COMP2NUM(x)
|
|
46
|
+
|
|
47
|
+
#define m_from_double(x) c_new(x,0)
|
|
48
|
+
#define m_from_real(x) c_new(x,0)
|
|
49
|
+
#define m_from_sint(x) c_new(x,0)
|
|
50
|
+
#define m_from_int32(x) c_new(x,0)
|
|
51
|
+
#define m_from_int64(x) c_new(x,0)
|
|
52
|
+
#define m_from_uint32(x) c_new(x,0)
|
|
53
|
+
#define m_from_uint64(x) c_new(x,0)
|
|
54
|
+
#define m_from_scomplex(x) c_from_scomplex(x)
|
|
55
|
+
#define m_from_dcomplex(x) c_from_dcomplex(x)
|
|
56
|
+
|
|
57
|
+
//#define m_extract(x) COMP2NUM(*(dtype*)x)
|
|
58
|
+
|
|
59
|
+
#define m_real(x) REAL(x)
|
|
60
|
+
#define m_imag(x) IMAG(x)
|
|
61
|
+
#define m_set_real(x,y) c_set_real(x,y)
|
|
62
|
+
#define m_set_imag(x,y) c_set_imag(x,y)
|
|
63
|
+
|
|
64
|
+
#define m_add(x,y) c_add(x,y)
|
|
65
|
+
#define m_sub(x,y) c_sub(x,y)
|
|
66
|
+
#define m_mul(x,y) c_mul(x,y)
|
|
67
|
+
#define m_div(x,y) c_div(x,y)
|
|
68
|
+
#define m_mod(x,y) c_mod(x,y)
|
|
69
|
+
#define m_pow(x,y) c_pow(x,y)
|
|
70
|
+
#define m_pow_int(x,y) c_pow_int(x,y)
|
|
71
|
+
|
|
72
|
+
#define m_abs(x) c_abs(x)
|
|
73
|
+
#define m_minus(x) c_minus(x)
|
|
74
|
+
#define m_reciprocal(x) c_reciprocal(x)
|
|
75
|
+
#define m_square(x) c_square(x)
|
|
76
|
+
#define m_floor(x) c_new(floor(REAL(x)),floor(IMAG(x)))
|
|
77
|
+
#define m_round(x) c_new(round(REAL(x)),round(IMAG(x)))
|
|
78
|
+
#define m_ceil(x) c_new(ceil(REAL(x)),ceil(IMAG(x)))
|
|
79
|
+
#define m_trunc(x) c_new(trunc(REAL(x)),trunc(IMAG(x)))
|
|
80
|
+
#define m_rint(x) c_new(rint(REAL(x)),rint(IMAG(x)))
|
|
81
|
+
#define m_sign(x) c_new( \
|
|
82
|
+
((REAL(x)==0) ? 0.0:((REAL(x)>0) ? 1.0:((REAL(x)<0) ? -1.0:REAL(x)))), \
|
|
83
|
+
((IMAG(x)==0) ? 0.0:((IMAG(x)>0) ? 1.0:((IMAG(x)<0) ? -1.0:IMAG(x)))))
|
|
84
|
+
#define m_copysign(x,y) c_new(copysign(REAL(x),REAL(y)),copysign(IMAG(x),IMAG(y)))
|
|
85
|
+
|
|
86
|
+
#define m_im(x) c_im(x)
|
|
87
|
+
#define m_conj(x) c_new(REAL(x),-IMAG(x))
|
|
88
|
+
#define m_arg(x) atan2(IMAG(x),REAL(x))
|
|
89
|
+
|
|
90
|
+
#define m_eq(x,y) c_eq(x,y)
|
|
91
|
+
#define m_ne(x,y) c_ne(x,y)
|
|
92
|
+
#define m_nearly_eq(x,y) c_nearly_eq(x,y)
|
|
93
|
+
|
|
94
|
+
#define m_isnan(x) c_isnan(x)
|
|
95
|
+
#define m_isinf(x) c_isinf(x)
|
|
96
|
+
#define m_isposinf(x) c_isposinf(x)
|
|
97
|
+
#define m_isneginf(x) c_isneginf(x)
|
|
98
|
+
#define m_isfinite(x) c_isfinite(x)
|
|
99
|
+
|
|
100
|
+
#define m_sprintf(s,x) sprintf(s,"%g%+gi",REAL(x),IMAG(x))
|
|
101
|
+
|
|
102
|
+
#define m_sqrt(x) c_sqrt(x)
|
|
103
|
+
#define m_cbrt(x) c_cbrt(x)
|
|
104
|
+
#define m_log(x) c_log(x)
|
|
105
|
+
#define m_log2(x) c_log2(x)
|
|
106
|
+
#define m_log10(x) c_log10(x)
|
|
107
|
+
#define m_exp(x) c_exp(x)
|
|
108
|
+
#define m_exp2(x) c_exp2(x)
|
|
109
|
+
#define m_exp10(x) c_exp10(x)
|
|
110
|
+
#define m_sin(x) c_sin(x)
|
|
111
|
+
#define m_cos(x) c_cos(x)
|
|
112
|
+
#define m_tan(x) c_tan(x)
|
|
113
|
+
#define m_asin(x) c_asin(x)
|
|
114
|
+
#define m_acos(x) c_acos(x)
|
|
115
|
+
#define m_atan(x) c_atan(x)
|
|
116
|
+
#define m_sinh(x) c_sinh(x)
|
|
117
|
+
#define m_cosh(x) c_cosh(x)
|
|
118
|
+
#define m_tanh(x) c_tanh(x)
|
|
119
|
+
#define m_asinh(x) c_asinh(x)
|
|
120
|
+
#define m_acosh(x) c_acosh(x)
|
|
121
|
+
#define m_atanh(x) c_atanh(x)
|
|
122
|
+
#define m_hypot(x,y) c_hypot(x,y)
|
|
123
|
+
#define m_sinc(x) c_div(c_sin(x),x)
|
|
124
|
+
|
|
125
|
+
#define m_sum_init 0
|
|
126
|
+
#define m_mulsum_init 0
|
|
127
|
+
|
|
128
|
+
#define not_nan(x) (REAL(x)==REAL(x) && IMAG(x)==IMAG(x))
|
|
129
|
+
|
|
130
|
+
#define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
|
|
131
|
+
#define m_mulsum_nan(x,y,z) { \
|
|
132
|
+
if(not_nan(x) && not_nan(y)) { \
|
|
133
|
+
z = m_add(m_mul(x,y),z); \
|
|
134
|
+
}}
|
|
135
|
+
|
|
136
|
+
#define m_cumsum(x,y) {(x)=m_add(x,y);}
|
|
137
|
+
#define m_cumsum_nan(x,y) { \
|
|
138
|
+
if (!not_nan(x)) { \
|
|
139
|
+
(x) = (y); \
|
|
140
|
+
} else if (not_nan(y)) { \
|
|
141
|
+
(x) = m_add(x,y); \
|
|
142
|
+
}}
|
|
143
|
+
|
|
144
|
+
#define m_cumprod(x,y) {(x)=m_mul(x,y);}
|
|
145
|
+
#define m_cumprod_nan(x,y) { \
|
|
146
|
+
if (!not_nan(x)) { \
|
|
147
|
+
(x) = (y); \
|
|
148
|
+
} else if (not_nan(y)) { \
|
|
149
|
+
(x) = m_mul(x,y); \
|
|
150
|
+
}}
|
|
151
|
+
|
|
152
|
+
__host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
|
|
153
|
+
{
|
|
154
|
+
return c_add(x,c_mul_r(y,c));
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/* --------- thrust ----------------- */
|
|
158
|
+
#include "cumo/cuda/cumo_thrust_complex.hpp"
|
|
159
|
+
|
|
160
|
+
struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
|
|
161
|
+
{
|
|
162
|
+
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
|
|
166
|
+
{
|
|
167
|
+
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
|
|
171
|
+
{
|
|
172
|
+
__host__ __device__ dtype operator()(dtype x, dtype y) {
|
|
173
|
+
if (not_nan(x) && not_nan(y)) {
|
|
174
|
+
return m_mul(x, y);
|
|
175
|
+
} else {
|
|
176
|
+
return m_zero;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
|
|
182
|
+
{
|
|
183
|
+
__host__ __device__ rtype operator()(const dtype& x) const { return c_abs_square(x); }
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
#endif // CUMO_COMPLEX_MACRO_KERNEL_H
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
typedef dcomplex dtype;
|
|
2
|
+
typedef double rtype;
|
|
3
|
+
#define cT cumo_cDComplex
|
|
4
|
+
#define cRT cumo_cDFloat
|
|
5
|
+
#define mTM cumo_mDComplexMath
|
|
6
|
+
|
|
7
|
+
#include "complex_macro.h"
|
|
8
|
+
#include "cublas_v2.h"
|
|
9
|
+
#include "cumo/cuda/cublas.h"
|
|
10
|
+
|
|
11
|
+
static inline bool c_nearly_eq(dtype x, dtype y) {
|
|
12
|
+
return c_abs(c_sub(x,y)) <= (c_abs(x)+c_abs(y))*DBL_EPSILON*2;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
#ifdef SFMT_H
|
|
16
|
+
/* generates a random number on [0,1)-real-interval */
|
|
17
|
+
inline static dtype m_rand(dtype max)
|
|
18
|
+
{
|
|
19
|
+
dtype z;
|
|
20
|
+
REAL(z) = genrand_res53_mix() * REAL(max);
|
|
21
|
+
IMAG(z) = genrand_res53_mix() * IMAG(max);
|
|
22
|
+
return z;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/* generates random numbers from the normal distribution
|
|
26
|
+
using Box-Muller Transformation.
|
|
27
|
+
*/
|
|
28
|
+
inline static void m_rand_norm(dtype mu, rtype sigma, dtype *a0)
|
|
29
|
+
{
|
|
30
|
+
rtype x1, x2, w;
|
|
31
|
+
do {
|
|
32
|
+
x1 = genrand_res53_mix();
|
|
33
|
+
x1 = x1*2-1;
|
|
34
|
+
x2 = genrand_res53_mix();
|
|
35
|
+
x2 = x2*2-1;
|
|
36
|
+
w = x1 * x1 + x2 * x2;
|
|
37
|
+
} while (w>=1);
|
|
38
|
+
w = sqrt( (-2*log(w)) / w );
|
|
39
|
+
REAL(*a0) = x1*w * sigma + REAL(mu);
|
|
40
|
+
IMAG(*a0) = x2*w * sigma + IMAG(mu);
|
|
41
|
+
}
|
|
42
|
+
#endif
|
|
43
|
+
|
|
44
|
+
#define M_EPSILON rb_float_new(2.2204460492503131e-16)
|
|
45
|
+
#define M_MIN rb_float_new(2.2250738585072014e-308)
|
|
46
|
+
#define M_MAX rb_float_new(1.7976931348623157e+308)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#ifndef CUMO_DCOMPLEX_KERNEL_H
|
|
2
|
+
#define CUMO_DCOMPLEX_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef dcomplex dtype;
|
|
5
|
+
typedef double rtype;
|
|
6
|
+
|
|
7
|
+
#include "complex_macro_kernel.h"
|
|
8
|
+
|
|
9
|
+
__device__ static inline bool c_nearly_eq(dtype x, dtype y) {
|
|
10
|
+
return c_abs(c_sub(x,y)) <= (c_abs(x)+c_abs(y))*DBL_EPSILON*2;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
#endif // CUMO_DCOMPLEX_KERNEL_H
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
typedef double dtype;
|
|
2
|
+
typedef double rtype;
|
|
3
|
+
#define cT cumo_cDFloat
|
|
4
|
+
#define cRT cumo_cDFloat
|
|
5
|
+
#define mTM cumo_mDFloatMath
|
|
6
|
+
|
|
7
|
+
#include "float_macro.h"
|
|
8
|
+
#include "cublas_v2.h"
|
|
9
|
+
#include "cumo/cuda/cublas.h"
|
|
10
|
+
|
|
11
|
+
#ifdef SFMT_H
|
|
12
|
+
/* generates a random number on [0,1)-real-interval */
|
|
13
|
+
inline static dtype m_rand(dtype max)
|
|
14
|
+
{
|
|
15
|
+
return genrand_res53_mix() * max;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/* generates random numbers from the normal distribution
|
|
19
|
+
using Box-Muller Transformation.
|
|
20
|
+
*/
|
|
21
|
+
inline static void m_rand_norm(dtype mu, dtype sigma, dtype *a0, dtype *a1)
|
|
22
|
+
{
|
|
23
|
+
dtype x1, x2, w;
|
|
24
|
+
do {
|
|
25
|
+
x1 = genrand_res53_mix();
|
|
26
|
+
x1 = x1*2-1;
|
|
27
|
+
x2 = genrand_res53_mix();
|
|
28
|
+
x2 = x2*2-1;
|
|
29
|
+
w = x1 * x1 + x2 * x2;
|
|
30
|
+
} while (w>=1);
|
|
31
|
+
w = sqrt( (-2*log(w)) / w );
|
|
32
|
+
if (a0) {*a0 = x1*w * sigma + mu;}
|
|
33
|
+
if (a1) {*a1 = x2*w * sigma + mu;}
|
|
34
|
+
}
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
#define m_min_init cumo_dfloat_new_dim0(0.0/0.0)
|
|
38
|
+
#define m_max_init cumo_dfloat_new_dim0(0.0/0.0)
|
|
39
|
+
#define m_extract(x) rb_float_new(*(double*)x)
|
|
40
|
+
#define m_nearly_eq(x,y) (fabs(x-y)<=(fabs(x)+fabs(y))*DBL_EPSILON*2)
|
|
41
|
+
|
|
42
|
+
#define M_EPSILON rb_float_new(2.2204460492503131e-16)
|
|
43
|
+
#define M_MIN rb_float_new(2.2250738585072014e-308)
|
|
44
|
+
#define M_MAX rb_float_new(1.7976931348623157e+308)
|
|
45
|
+
|
|
46
|
+
#define DATA_MIN DBL_MIN
|
|
47
|
+
#define DATA_MAX DBL_MAX
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#ifndef CUMO_DFLOAT_KERNEL_H
|
|
2
|
+
#define CUMO_DFLOAT_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef double dtype;
|
|
5
|
+
typedef double rtype;
|
|
6
|
+
|
|
7
|
+
#include "float_macro_kernel.h"
|
|
8
|
+
|
|
9
|
+
#define m_nearly_eq(x,y) (fabs(x-y)<=(fabs(x)+fabs(y))*DBL_EPSILON*2)
|
|
10
|
+
|
|
11
|
+
#define DATA_MIN DBL_MIN
|
|
12
|
+
#define DATA_MAX DBL_MAX
|
|
13
|
+
|
|
14
|
+
#endif // CUMO_DFLOAT_KERNEL_H
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#ifndef DBL_EPSILON
|
|
2
|
+
#define DBL_EPSILON 2.2204460492503131e-16
|
|
3
|
+
#endif
|
|
4
|
+
#ifndef FLT_EPSILON
|
|
5
|
+
#define FLT_EPSILON 1.1920928955078125e-07
|
|
6
|
+
#endif
|
|
7
|
+
#ifndef DBL_MAX
|
|
8
|
+
#define DBL_MAX 1.7976931348623157e+308
|
|
9
|
+
#endif
|
|
10
|
+
#ifndef DBL_MAX
|
|
11
|
+
#define DBL_MAX 1.7976931348623157e+308
|
|
12
|
+
#endif
|
|
13
|
+
#ifndef FLT_MIN
|
|
14
|
+
#define FLT_MIN 1.1754943508222875e-38
|
|
15
|
+
#endif
|
|
16
|
+
#ifndef FLT_MAX
|
|
17
|
+
#define FLT_MAX 3.4028234663852886e+38
|
|
18
|
+
#endif
|
|
19
|
+
|
|
20
|
+
#ifndef M_PI_2
|
|
21
|
+
#define M_PI_2 1.57079632679489661923 /* pi/2 */
|
|
22
|
+
#endif
|
|
23
|
+
#ifndef M_LOG2E
|
|
24
|
+
#define M_LOG2E 1.4426950408889634074 /* log_2 e */
|
|
25
|
+
#endif
|
|
26
|
+
#ifndef M_LOG10E
|
|
27
|
+
#define M_LOG10E 0.43429448190325182765 /* log_10 e */
|
|
28
|
+
#endif
|
|
29
|
+
#ifndef M_LN2
|
|
30
|
+
#define M_LN2 0.69314718055994530942 /* log_e 2 */
|
|
31
|
+
#endif
|
|
32
|
+
#ifndef M_LN10
|
|
33
|
+
#define M_LN10 2.30258509299404568402 /* log_e 10 */
|
|
34
|
+
#endif
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#ifndef CUMO_FLOAT_DEF_KERNEL_H
|
|
2
|
+
#define CUMO_FLOAT_DEF_KERNEL_H
|
|
3
|
+
|
|
4
|
+
#ifndef DBL_EPSILON
|
|
5
|
+
#define DBL_EPSILON 2.2204460492503131e-16
|
|
6
|
+
#endif
|
|
7
|
+
#ifndef FLT_EPSILON
|
|
8
|
+
#define FLT_EPSILON 1.1920928955078125e-07
|
|
9
|
+
#endif
|
|
10
|
+
#ifndef DBL_MAX
|
|
11
|
+
#define DBL_MAX 1.7976931348623157e+308
|
|
12
|
+
#endif
|
|
13
|
+
#ifndef DBL_MAX
|
|
14
|
+
#define DBL_MAX 1.7976931348623157e+308
|
|
15
|
+
#endif
|
|
16
|
+
#ifndef FLT_MIN
|
|
17
|
+
#define FLT_MIN 1.1754943508222875e-38
|
|
18
|
+
#endif
|
|
19
|
+
#ifndef FLT_MAX
|
|
20
|
+
#define FLT_MAX 3.4028234663852886e+38
|
|
21
|
+
#endif
|
|
22
|
+
|
|
23
|
+
#ifndef M_PI_2
|
|
24
|
+
#define M_PI_2 1.57079632679489661923 /* pi/2 */
|
|
25
|
+
#endif
|
|
26
|
+
#ifndef M_LOG2E
|
|
27
|
+
#define M_LOG2E 1.4426950408889634074 /* log_2 e */
|
|
28
|
+
#endif
|
|
29
|
+
#ifndef M_LOG10E
|
|
30
|
+
#define M_LOG10E 0.43429448190325182765 /* log_10 e */
|
|
31
|
+
#endif
|
|
32
|
+
#ifndef M_LN2
|
|
33
|
+
#define M_LN2 0.69314718055994530942 /* log_e 2 */
|
|
34
|
+
#endif
|
|
35
|
+
#ifndef M_LN10
|
|
36
|
+
#define M_LN10 2.30258509299404568402 /* log_e 10 */
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
#endif // CUMO_FLOAT_DEF_KERNEL_H
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#include "float_def.h"
|
|
2
|
+
|
|
3
|
+
extern double round(double);
|
|
4
|
+
extern double log2(double);
|
|
5
|
+
extern double exp2(double);
|
|
6
|
+
#ifdef HAVE_EXP10
|
|
7
|
+
extern double exp10(double);
|
|
8
|
+
#else
|
|
9
|
+
extern double pow(double, double);
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
#define m_zero 0.0
|
|
13
|
+
#define m_one 1.0
|
|
14
|
+
|
|
15
|
+
#define m_num_to_data(x) NUM2DBL(x)
|
|
16
|
+
#define m_data_to_num(x) rb_float_new(x)
|
|
17
|
+
|
|
18
|
+
#define m_from_double(x) (x)
|
|
19
|
+
#define m_from_real(x) (x)
|
|
20
|
+
#define m_from_sint(x) (x)
|
|
21
|
+
#define m_from_int32(x) (x)
|
|
22
|
+
#define m_from_int64(x) (x)
|
|
23
|
+
#define m_from_uint32(x) (x)
|
|
24
|
+
#define m_from_uint64(x) (x)
|
|
25
|
+
|
|
26
|
+
#define m_add(x,y) ((x)+(y))
|
|
27
|
+
#define m_sub(x,y) ((x)-(y))
|
|
28
|
+
#define m_mul(x,y) ((x)*(y))
|
|
29
|
+
#define m_div(x,y) ((x)/(y))
|
|
30
|
+
#define m_div_check(x,y) ((y)==0)
|
|
31
|
+
#define m_mod(x,y) fmod(x,y)
|
|
32
|
+
#define m_divmod(x,y,a,b) {a=(x)/(y); b=m_mod(x,y);}
|
|
33
|
+
#define m_pow(x,y) pow(x,y)
|
|
34
|
+
#define m_pow_int(x,y) pow_int(x,y)
|
|
35
|
+
|
|
36
|
+
#define m_abs(x) fabs(x)
|
|
37
|
+
#define m_minus(x) (-(x))
|
|
38
|
+
#define m_reciprocal(x) (1/(x))
|
|
39
|
+
#define m_square(x) ((x)*(x))
|
|
40
|
+
#define m_floor(x) floor(x)
|
|
41
|
+
#define m_round(x) round(x)
|
|
42
|
+
#define m_ceil(x) ceil(x)
|
|
43
|
+
#define m_trunc(x) trunc(x)
|
|
44
|
+
#define m_rint(x) rint(x)
|
|
45
|
+
#define m_sign(x) (((x)==0) ? 0.0:(((x)>0) ? 1.0:(((x)<0) ? -1.0:(x))))
|
|
46
|
+
#define m_copysign(x,y) copysign(x,y)
|
|
47
|
+
#define m_signbit(x) signbit(x)
|
|
48
|
+
#define m_modf(x,y,z) {double d; y=modf(x,&d); z=d;}
|
|
49
|
+
|
|
50
|
+
#define m_eq(x,y) ((x)==(y))
|
|
51
|
+
#define m_ne(x,y) ((x)!=(y))
|
|
52
|
+
#define m_gt(x,y) ((x)>(y))
|
|
53
|
+
#define m_ge(x,y) ((x)>=(y))
|
|
54
|
+
#define m_lt(x,y) ((x)<(y))
|
|
55
|
+
#define m_le(x,y) ((x)<=(y))
|
|
56
|
+
|
|
57
|
+
#define m_isnan(x) isnan(x)
|
|
58
|
+
#define m_isinf(x) isinf(x)
|
|
59
|
+
#define m_isposinf(x) (isinf(x) && signbit(x)==0)
|
|
60
|
+
#define m_isneginf(x) (isinf(x) && signbit(x))
|
|
61
|
+
#define m_isfinite(x) isfinite(x)
|
|
62
|
+
|
|
63
|
+
#define m_mulsum_init INT2FIX(0)
|
|
64
|
+
|
|
65
|
+
#define m_sprintf(s,x) sprintf(s,"%g",x)
|
|
66
|
+
|
|
67
|
+
#define cmp_prnan(a,b) \
|
|
68
|
+
((qsort_cast(a)==qsort_cast(b)) ? 0 : \
|
|
69
|
+
(qsort_cast(a) > qsort_cast(b)) ? 1 : -1)
|
|
70
|
+
|
|
71
|
+
#define cmp_ignan(a,b) \
|
|
72
|
+
(m_isnan(qsort_cast(a)) ? (m_isnan(qsort_cast(b)) ? 0 : 1) : \
|
|
73
|
+
(m_isnan(qsort_cast(b)) ? -1 : \
|
|
74
|
+
((qsort_cast(a)==qsort_cast(b)) ? 0 : \
|
|
75
|
+
(qsort_cast(a) > qsort_cast(b)) ? 1 : -1)))
|
|
76
|
+
|
|
77
|
+
#define cmpgt_prnan(a,b) \
|
|
78
|
+
(qsort_cast(a) > qsort_cast(b))
|
|
79
|
+
|
|
80
|
+
#define cmpgt_ignan(a,b) \
|
|
81
|
+
((m_isnan(qsort_cast(a)) && !m_isnan(qsort_cast(b))) || \
|
|
82
|
+
(qsort_cast(a) > qsort_cast(b)))
|
|
83
|
+
|
|
84
|
+
#define m_sqrt(x) sqrt(x)
|
|
85
|
+
#define m_cbrt(x) cbrt(x)
|
|
86
|
+
#define m_log(x) log(x)
|
|
87
|
+
#define m_log2(x) log2(x)
|
|
88
|
+
#define m_log10(x) log10(x)
|
|
89
|
+
#define m_exp(x) exp(x)
|
|
90
|
+
#define m_exp2(x) exp2(x)
|
|
91
|
+
#ifdef HAVE_EXP10
|
|
92
|
+
#define m_exp10(x) exp10(x)
|
|
93
|
+
#else
|
|
94
|
+
#define m_exp10(x) pow(10, x)
|
|
95
|
+
#endif
|
|
96
|
+
#define m_expm1(x) expm1(x)
|
|
97
|
+
#define m_log1p(x) log1p(x)
|
|
98
|
+
|
|
99
|
+
#define m_sin(x) sin(x)
|
|
100
|
+
#define m_cos(x) cos(x)
|
|
101
|
+
#define m_tan(x) tan(x)
|
|
102
|
+
#define m_asin(x) asin(x)
|
|
103
|
+
#define m_acos(x) acos(x)
|
|
104
|
+
#define m_atan(x) atan(x)
|
|
105
|
+
#define m_sinh(x) sinh(x)
|
|
106
|
+
#define m_cosh(x) cosh(x)
|
|
107
|
+
#define m_tanh(x) tanh(x)
|
|
108
|
+
#define m_asinh(x) asinh(x)
|
|
109
|
+
#define m_acosh(x) acosh(x)
|
|
110
|
+
#define m_atanh(x) atanh(x)
|
|
111
|
+
#define m_atan2(x,y) atan2(x,y)
|
|
112
|
+
#define m_hypot(x,y) hypot(x,y)
|
|
113
|
+
#define m_sinc(x) (sin(x)/(x))
|
|
114
|
+
|
|
115
|
+
#define m_erf(x) erf(x)
|
|
116
|
+
#define m_erfc(x) erfc(x)
|
|
117
|
+
#define m_ldexp(x,y) ldexp(x,y)
|
|
118
|
+
#define m_frexp(x,exp) frexp(x,exp)
|
|
119
|
+
|
|
120
|
+
static inline dtype pow_int(dtype x, int p)
|
|
121
|
+
{
|
|
122
|
+
dtype r=1;
|
|
123
|
+
switch(p) {
|
|
124
|
+
case 0: return 1;
|
|
125
|
+
case 1: return x;
|
|
126
|
+
case 2: return x*x;
|
|
127
|
+
case 3: return x*x*x;
|
|
128
|
+
case 4: x=x*x; return x*x;
|
|
129
|
+
}
|
|
130
|
+
if (p<0) return 1/pow_int(x,-p);
|
|
131
|
+
if (p>64) return pow(x,p);
|
|
132
|
+
while (p) {
|
|
133
|
+
if (p&1) r *= x;
|
|
134
|
+
x *= x;
|
|
135
|
+
p >>= 1;
|
|
136
|
+
}
|
|
137
|
+
return r;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
static inline dtype f_seq(dtype x, dtype y, double c)
|
|
141
|
+
{
|
|
142
|
+
return x + y * c;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
static inline dtype f_kahan_sum(size_t n, char *p, ssize_t stride)
|
|
146
|
+
{
|
|
147
|
+
size_t i=n;
|
|
148
|
+
dtype x;
|
|
149
|
+
volatile dtype y=0;
|
|
150
|
+
volatile dtype t,r=0;
|
|
151
|
+
|
|
152
|
+
for (; i--;) {
|
|
153
|
+
x = *(dtype*)p;
|
|
154
|
+
p += stride;
|
|
155
|
+
if (fabs(x) > fabs(y)) {
|
|
156
|
+
dtype z=x; x=y; y=z;
|
|
157
|
+
}
|
|
158
|
+
r += x;
|
|
159
|
+
t = y;
|
|
160
|
+
y += r;
|
|
161
|
+
t = y-t;
|
|
162
|
+
r -= t;
|
|
163
|
+
}
|
|
164
|
+
return y;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
static inline dtype f_kahan_sum_nan(size_t n, char *p, ssize_t stride)
|
|
168
|
+
{
|
|
169
|
+
size_t i=n;
|
|
170
|
+
dtype x;
|
|
171
|
+
volatile dtype y=0;
|
|
172
|
+
volatile dtype t,r=0;
|
|
173
|
+
|
|
174
|
+
for (; i--;) {
|
|
175
|
+
x = *(dtype*)p;
|
|
176
|
+
p += stride;
|
|
177
|
+
if (!m_isnan(x)) {
|
|
178
|
+
if (fabs(x) > fabs(y)) {
|
|
179
|
+
dtype z=x; x=y; y=z;
|
|
180
|
+
}
|
|
181
|
+
r += x;
|
|
182
|
+
t = y;
|
|
183
|
+
y += r;
|
|
184
|
+
t = y-t;
|
|
185
|
+
r -= t;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return y;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
#include "real_accum.h"
|