cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#ifndef CUMO_FLOAT_MACRO_KERNEL_H
|
|
2
|
+
#define CUMO_FLOAT_MACRO_KERNEL_H
|
|
3
|
+
|
|
4
|
+
#include "float_def_kernel.h"
|
|
5
|
+
|
|
6
|
+
extern double round(double);
|
|
7
|
+
extern double log2(double);
|
|
8
|
+
extern double exp2(double);
|
|
9
|
+
#ifdef HAVE_EXP10
|
|
10
|
+
extern double exp10(double);
|
|
11
|
+
#else
|
|
12
|
+
extern double pow(double, double);
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#define m_zero 0.0
|
|
16
|
+
#define m_one 1.0
|
|
17
|
+
|
|
18
|
+
//#define m_num_to_data(x) NUM2DBL(x)
|
|
19
|
+
//#define m_data_to_num(x) rb_float_new(x)
|
|
20
|
+
|
|
21
|
+
#define m_from_double(x) (x)
|
|
22
|
+
#define m_from_real(x) (x)
|
|
23
|
+
#define m_from_sint(x) (x)
|
|
24
|
+
#define m_from_int32(x) (x)
|
|
25
|
+
#define m_from_int64(x) (x)
|
|
26
|
+
#define m_from_uint32(x) (x)
|
|
27
|
+
#define m_from_uint64(x) (x)
|
|
28
|
+
|
|
29
|
+
#define m_add(x,y) ((x)+(y))
|
|
30
|
+
#define m_sub(x,y) ((x)-(y))
|
|
31
|
+
#define m_mul(x,y) ((x)*(y))
|
|
32
|
+
#define m_div(x,y) ((x)/(y))
|
|
33
|
+
#define m_div_check(x,y) ((y)==0)
|
|
34
|
+
#define m_mod(x,y) fmod(x,y)
|
|
35
|
+
#define m_divmod(x,y,a,b) {a=(x)/(y); b=m_mod(x,y);}
|
|
36
|
+
#define m_pow(x,y) pow(x,y)
|
|
37
|
+
#define m_pow_int(x,y) pow_int(x,y)
|
|
38
|
+
|
|
39
|
+
#define m_abs(x) fabs(x)
|
|
40
|
+
#define m_minus(x) (-(x))
|
|
41
|
+
#define m_reciprocal(x) (1/(x))
|
|
42
|
+
#define m_square(x) ((x)*(x))
|
|
43
|
+
#define m_floor(x) floor(x)
|
|
44
|
+
#define m_round(x) round(x)
|
|
45
|
+
#define m_ceil(x) ceil(x)
|
|
46
|
+
#define m_trunc(x) trunc(x)
|
|
47
|
+
#define m_rint(x) rint(x)
|
|
48
|
+
#define m_sign(x) (((x)==0) ? 0.0:(((x)>0) ? 1.0:(((x)<0) ? -1.0:(x))))
|
|
49
|
+
#define m_copysign(x,y) copysign(x,y)
|
|
50
|
+
#define m_signbit(x) signbit(x)
|
|
51
|
+
#define m_modf(x,y,z) {double d; y=modf(x,&d); z=d;}
|
|
52
|
+
|
|
53
|
+
#define m_eq(x,y) ((x)==(y))
|
|
54
|
+
#define m_ne(x,y) ((x)!=(y))
|
|
55
|
+
#define m_gt(x,y) ((x)>(y))
|
|
56
|
+
#define m_ge(x,y) ((x)>=(y))
|
|
57
|
+
#define m_lt(x,y) ((x)<(y))
|
|
58
|
+
#define m_le(x,y) ((x)<=(y))
|
|
59
|
+
|
|
60
|
+
#define m_isnan(x) isnan(x)
|
|
61
|
+
#define m_isinf(x) isinf(x)
|
|
62
|
+
#define m_isposinf(x) (isinf(x) && signbit(x)==0)
|
|
63
|
+
#define m_isneginf(x) (isinf(x) && signbit(x))
|
|
64
|
+
#define m_isfinite(x) isfinite(x)
|
|
65
|
+
|
|
66
|
+
#define m_mulsum_init 0
|
|
67
|
+
|
|
68
|
+
#define m_sprintf(s,x) sprintf(s,"%g",x)
|
|
69
|
+
|
|
70
|
+
/*
|
|
71
|
+
#define cmp_prnan(a,b) \
|
|
72
|
+
((qsort_cast(a)==qsort_cast(b)) ? 0 : \
|
|
73
|
+
(qsort_cast(a) > qsort_cast(b)) ? 1 : -1)
|
|
74
|
+
|
|
75
|
+
#define cmp_ignan(a,b) \
|
|
76
|
+
(m_isnan(qsort_cast(a)) ? (m_isnan(qsort_cast(b)) ? 0 : 1) : \
|
|
77
|
+
(m_isnan(qsort_cast(b)) ? -1 : \
|
|
78
|
+
((qsort_cast(a)==qsort_cast(b)) ? 0 : \
|
|
79
|
+
(qsort_cast(a) > qsort_cast(b)) ? 1 : -1)))
|
|
80
|
+
|
|
81
|
+
#define cmpgt_prnan(a,b) \
|
|
82
|
+
(qsort_cast(a) > qsort_cast(b))
|
|
83
|
+
|
|
84
|
+
#define cmpgt_ignan(a,b) \
|
|
85
|
+
((m_isnan(qsort_cast(a)) && !m_isnan(qsort_cast(b))) || \
|
|
86
|
+
(qsort_cast(a) > qsort_cast(b)))
|
|
87
|
+
*/
|
|
88
|
+
|
|
89
|
+
#define m_sqrt(x) sqrt(x)
|
|
90
|
+
#define m_cbrt(x) cbrt(x)
|
|
91
|
+
#define m_log(x) log(x)
|
|
92
|
+
#define m_log2(x) log2(x)
|
|
93
|
+
#define m_log10(x) log10(x)
|
|
94
|
+
#define m_exp(x) exp(x)
|
|
95
|
+
#define m_exp2(x) exp2(x)
|
|
96
|
+
#ifdef HAVE_EXP10
|
|
97
|
+
#define m_exp10(x) exp10(x)
|
|
98
|
+
#else
|
|
99
|
+
#define m_exp10(x) pow(10, x)
|
|
100
|
+
#endif
|
|
101
|
+
#define m_expm1(x) expm1(x)
|
|
102
|
+
#define m_log1p(x) log1p(x)
|
|
103
|
+
|
|
104
|
+
#define m_sin(x) sin(x)
|
|
105
|
+
#define m_cos(x) cos(x)
|
|
106
|
+
#define m_tan(x) tan(x)
|
|
107
|
+
#define m_asin(x) asin(x)
|
|
108
|
+
#define m_acos(x) acos(x)
|
|
109
|
+
#define m_atan(x) atan(x)
|
|
110
|
+
#define m_sinh(x) sinh(x)
|
|
111
|
+
#define m_cosh(x) cosh(x)
|
|
112
|
+
#define m_tanh(x) tanh(x)
|
|
113
|
+
#define m_asinh(x) asinh(x)
|
|
114
|
+
#define m_acosh(x) acosh(x)
|
|
115
|
+
#define m_atanh(x) atanh(x)
|
|
116
|
+
#define m_atan2(x,y) atan2(x,y)
|
|
117
|
+
#define m_hypot(x,y) hypot(x,y)
|
|
118
|
+
#define m_sinc(x) (sin(x)/(x))
|
|
119
|
+
|
|
120
|
+
#define m_erf(x) erf(x)
|
|
121
|
+
#define m_erfc(x) erfc(x)
|
|
122
|
+
#define m_ldexp(x,y) ldexp(x,y)
|
|
123
|
+
#define m_frexp(x,exp) frexp(x,exp)
|
|
124
|
+
|
|
125
|
+
/* only internal use (called by pow_int) */
|
|
126
|
+
__host__ __device__ static inline dtype pow_positive_int(dtype x, int p)
|
|
127
|
+
{
|
|
128
|
+
dtype r=1;
|
|
129
|
+
switch(p) {
|
|
130
|
+
case 0: return 1;
|
|
131
|
+
case 1: return x;
|
|
132
|
+
case 2: return x*x;
|
|
133
|
+
case 3: return x*x*x;
|
|
134
|
+
case 4: x=x*x; return x*x;
|
|
135
|
+
}
|
|
136
|
+
if (p>64) return pow(x,p);
|
|
137
|
+
while (p) {
|
|
138
|
+
if (p&1) r *= x;
|
|
139
|
+
x *= x;
|
|
140
|
+
p >>= 1;
|
|
141
|
+
}
|
|
142
|
+
return r;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
__host__ __device__ static inline dtype pow_int(dtype x, int p)
|
|
146
|
+
{
|
|
147
|
+
if (p<0) return 1/pow_positive_int(x, -p);
|
|
148
|
+
return pow_positive_int(x, p);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
__host__ __device__ static inline dtype f_seq(dtype x, dtype y, double c)
|
|
152
|
+
{
|
|
153
|
+
return x + y * c;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
#include "real_accum_kernel.h"
|
|
157
|
+
|
|
158
|
+
#endif // CUMO_FLOAT_MACRO_KERNEL_H
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
typedef int16_t dtype;
|
|
2
|
+
typedef int16_t rtype;
|
|
3
|
+
#define cT cumo_cInt16
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2INT(x))
|
|
7
|
+
#define m_data_to_num(x) INT2NUM((int)(x))
|
|
8
|
+
#define m_extract(x) INT2NUM((int)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%d",(int)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef INT16_MIN
|
|
12
|
+
#define INT16_MIN (-32767-1)
|
|
13
|
+
#endif
|
|
14
|
+
#ifndef INT16_MAX
|
|
15
|
+
#define INT16_MAX (32767)
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#define DATA_MIN INT16_MIN
|
|
19
|
+
#define DATA_MAX INT16_MAX
|
|
20
|
+
|
|
21
|
+
#define M_MIN m_data_to_num(INT16_MIN)
|
|
22
|
+
#define M_MAX m_data_to_num(INT16_MAX)
|
|
23
|
+
|
|
24
|
+
#include "int_macro.h"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#ifndef CUMO_INT16_KERNEL_H
|
|
2
|
+
#define CUMO_INT16_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef int16_t dtype;
|
|
5
|
+
typedef int16_t rtype;
|
|
6
|
+
#define cT cumo_cInt16
|
|
7
|
+
#define cRT cT
|
|
8
|
+
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%d",(int)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef INT16_MIN
|
|
12
|
+
#define INT16_MIN (-32767-1)
|
|
13
|
+
#endif
|
|
14
|
+
#ifndef INT16_MAX
|
|
15
|
+
#define INT16_MAX (32767)
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#define DATA_MIN INT16_MIN
|
|
19
|
+
#define DATA_MAX INT16_MAX
|
|
20
|
+
|
|
21
|
+
#include "int_macro_kernel.h"
|
|
22
|
+
|
|
23
|
+
#endif // CUMO_INT16_KERNEL_H
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
typedef int32_t dtype;
|
|
2
|
+
typedef int32_t rtype;
|
|
3
|
+
#define cT cumo_cInt32
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2INT32(x))
|
|
7
|
+
#define m_data_to_num(x) INT322NUM((int32_t)(x))
|
|
8
|
+
#define m_extract(x) INT322NUM((int32_t)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%"PRId32,(int32_t)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef INT32_MIN
|
|
12
|
+
#define INT32_MIN (-2147483647-1)
|
|
13
|
+
#endif
|
|
14
|
+
#ifndef INT32_MAX
|
|
15
|
+
#define INT32_MAX (2147483647)
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#define DATA_MIN INT32_MIN
|
|
19
|
+
#define DATA_MAX INT32_MAX
|
|
20
|
+
|
|
21
|
+
#define M_MIN m_data_to_num(INT32_MIN)
|
|
22
|
+
#define M_MAX m_data_to_num(INT32_MAX)
|
|
23
|
+
|
|
24
|
+
#include "int_macro.h"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#ifndef CUMO_INT32_KERNEL_H
|
|
2
|
+
#define CUMO_INT32_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef int32_t dtype;
|
|
5
|
+
typedef int32_t rtype;
|
|
6
|
+
|
|
7
|
+
#ifndef INT32_MIN
|
|
8
|
+
#define INT32_MIN (-2147483647-1)
|
|
9
|
+
#endif
|
|
10
|
+
#ifndef INT32_MAX
|
|
11
|
+
#define INT32_MAX (2147483647)
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
#define DATA_MIN INT32_MIN
|
|
15
|
+
#define DATA_MAX INT32_MAX
|
|
16
|
+
|
|
17
|
+
#include "int_macro_kernel.h"
|
|
18
|
+
|
|
19
|
+
#endif // CUMO_INT32_KERNEL_H
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
typedef int64_t dtype;
|
|
2
|
+
typedef int64_t rtype;
|
|
3
|
+
#define cT cumo_cInt64
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2INT64(x))
|
|
7
|
+
#define m_data_to_num(x) INT642NUM((int64_t)(x))
|
|
8
|
+
#define m_extract(x) INT642NUM((int64_t)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%"PRId64,(int64_t)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef INT64_MIN
|
|
12
|
+
#define INT64_MIN (-9223372036854775807l-1)
|
|
13
|
+
#endif
|
|
14
|
+
#ifndef INT64_MAX
|
|
15
|
+
#define INT64_MAX (9223372036854775807l)
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#define DATA_MIN INT64_MIN
|
|
19
|
+
#define DATA_MAX INT64_MAX
|
|
20
|
+
|
|
21
|
+
#define M_MIN m_data_to_num(INT64_MIN)
|
|
22
|
+
#define M_MAX m_data_to_num(INT64_MAX)
|
|
23
|
+
|
|
24
|
+
#include "int_macro.h"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#ifndef CUMO_INT64_KERNEL_H
|
|
2
|
+
#define CUMO_INT64_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef int64_t dtype;
|
|
5
|
+
typedef int64_t rtype;
|
|
6
|
+
|
|
7
|
+
#ifndef INT64_MIN
|
|
8
|
+
#define INT64_MIN (-9223372036854775807l-1)
|
|
9
|
+
#endif
|
|
10
|
+
#ifndef INT64_MAX
|
|
11
|
+
#define INT64_MAX (9223372036854775807l)
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
#define DATA_MIN INT64_MIN
|
|
15
|
+
#define DATA_MAX INT64_MAX
|
|
16
|
+
|
|
17
|
+
#include "int_macro_kernel.h"
|
|
18
|
+
|
|
19
|
+
#endif // CUMO_INT64_KERNEL_H
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
typedef int8_t dtype;
|
|
2
|
+
typedef int8_t rtype;
|
|
3
|
+
#define cT cumo_cInt8
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2INT(x))
|
|
7
|
+
#define m_data_to_num(x) INT2NUM((int)(x))
|
|
8
|
+
#define m_extract(x) INT2NUM((int)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%d",(int)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef INT8_MIN
|
|
12
|
+
#define INT8_MIN (-127-1)
|
|
13
|
+
#endif
|
|
14
|
+
#ifndef INT8_MAX
|
|
15
|
+
#define INT8_MAX (127)
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#define DATA_MIN INT8_MIN
|
|
19
|
+
#define DATA_MAX INT8_MAX
|
|
20
|
+
|
|
21
|
+
#define M_MIN INT2FIX(INT8_MIN)
|
|
22
|
+
#define M_MAX INT2FIX(INT8_MAX)
|
|
23
|
+
|
|
24
|
+
#include "int_macro.h"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#ifndef CUMO_INT8_KERNEL_H
|
|
2
|
+
#define CUMO_INT8_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef int8_t dtype;
|
|
5
|
+
typedef int8_t rtype;
|
|
6
|
+
|
|
7
|
+
#ifndef INT8_MIN
|
|
8
|
+
#define INT8_MIN (-127-1)
|
|
9
|
+
#endif
|
|
10
|
+
#ifndef INT8_MAX
|
|
11
|
+
#define INT8_MAX (127)
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
#define DATA_MIN INT8_MIN
|
|
15
|
+
#define DATA_MAX INT8_MAX
|
|
16
|
+
|
|
17
|
+
#include "int_macro_kernel.h"
|
|
18
|
+
|
|
19
|
+
#endif // CUMO_INT8_KERNEL_H
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#include "xint_macro.h"
|
|
2
|
+
|
|
3
|
+
#define m_sign(x) (((x)==0) ? 0 : (((x)>0) ? 1 : -1))
|
|
4
|
+
|
|
5
|
+
static inline dtype m_abs(dtype x) {
|
|
6
|
+
if (x==DATA_MIN) {
|
|
7
|
+
rb_raise(nary_eValueError, "cannot convert the minimum integer");
|
|
8
|
+
}
|
|
9
|
+
return (x<0)?-x:x;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
static inline dtype int_reciprocal(dtype x) {
|
|
13
|
+
switch (x) {
|
|
14
|
+
case 1:
|
|
15
|
+
return 1;
|
|
16
|
+
case -1:
|
|
17
|
+
return -1;
|
|
18
|
+
case 0:
|
|
19
|
+
rb_raise(rb_eZeroDivError, "divided by 0");
|
|
20
|
+
default:
|
|
21
|
+
return 0;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/*
|
|
26
|
+
static dtype pow_int(dtype x, int p)
|
|
27
|
+
{
|
|
28
|
+
dtype r = m_one;
|
|
29
|
+
switch(p) {
|
|
30
|
+
case 0: return 1;
|
|
31
|
+
case 1: return x;
|
|
32
|
+
case 2: return x*x;
|
|
33
|
+
case 3: return x*x*x;
|
|
34
|
+
}
|
|
35
|
+
if (p<0) return 0;
|
|
36
|
+
while (p) {
|
|
37
|
+
if (p&1) r *= x;
|
|
38
|
+
x *= x;
|
|
39
|
+
p >>= 1;
|
|
40
|
+
}
|
|
41
|
+
return r;
|
|
42
|
+
}
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
static inline int64_t f_sum(size_t n, char *p, ssize_t stride)
|
|
46
|
+
{
|
|
47
|
+
int64_t x,y=0;
|
|
48
|
+
size_t i=n;
|
|
49
|
+
for (; i--;) {
|
|
50
|
+
x = *(dtype*)p;
|
|
51
|
+
y += x;
|
|
52
|
+
p += stride;
|
|
53
|
+
}
|
|
54
|
+
return y;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
static inline int64_t f_prod(size_t n, char *p, ssize_t stride)
|
|
58
|
+
{
|
|
59
|
+
int64_t x,y=1;
|
|
60
|
+
size_t i=n;
|
|
61
|
+
for (; i--;) {
|
|
62
|
+
x = *(dtype*)p;
|
|
63
|
+
y *= x;
|
|
64
|
+
p += stride;
|
|
65
|
+
}
|
|
66
|
+
return y;
|
|
67
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#ifndef CUMO_INT_MACRO_KERNEL_H
|
|
2
|
+
#define CUMO_INT_MACRO_KERNEL_H
|
|
3
|
+
|
|
4
|
+
#include "xint_macro_kernel.h"
|
|
5
|
+
|
|
6
|
+
#define m_sign(x) (((x)==0) ? 0 : (((x)>0) ? 1 : -1))
|
|
7
|
+
|
|
8
|
+
__host__ __device__ static inline dtype m_abs(dtype x) {
|
|
9
|
+
// TODO(sonots): How to handle in CUDA kernel?
|
|
10
|
+
// if (x==DATA_MIN) {
|
|
11
|
+
// rb_raise(nary_eValueError, "cannot convert the minimum integer");
|
|
12
|
+
// }
|
|
13
|
+
return (x<0)?-x:x;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
__host__ __device__ static inline dtype int_reciprocal(dtype x) {
|
|
17
|
+
switch (x) {
|
|
18
|
+
case 1:
|
|
19
|
+
return 1;
|
|
20
|
+
case -1:
|
|
21
|
+
return -1;
|
|
22
|
+
case 0:
|
|
23
|
+
return 0; // as CUDA kernel 1/0 results in 0.
|
|
24
|
+
//rb_raise(rb_eZeroDivError, "divided by 0");
|
|
25
|
+
default:
|
|
26
|
+
return 0;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
__device__ static dtype pow_int(dtype x, int p)
|
|
31
|
+
{
|
|
32
|
+
dtype r = m_one;
|
|
33
|
+
switch(p) {
|
|
34
|
+
case 0: return 1;
|
|
35
|
+
case 1: return x;
|
|
36
|
+
case 2: return x*x;
|
|
37
|
+
case 3: return x*x*x;
|
|
38
|
+
}
|
|
39
|
+
if (p<0) return 0;
|
|
40
|
+
while (p) {
|
|
41
|
+
if (p&1) r *= x;
|
|
42
|
+
x *= x;
|
|
43
|
+
p >>= 1;
|
|
44
|
+
}
|
|
45
|
+
return r;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
#endif // CUMO_INT_MACRO_KERNEL_H
|