cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
#ifndef CUMO_REAL_ACCUM_KERNEL_H
|
|
2
|
+
#define CUMO_REAL_ACCUM_KERNEL_H
|
|
3
|
+
|
|
4
|
+
#define not_nan(x) ((x)==(x))
|
|
5
|
+
|
|
6
|
+
#define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
|
|
7
|
+
#define m_mulsum_nan(x,y,z) { \
|
|
8
|
+
if(not_nan(x) && not_nan(y)) { \
|
|
9
|
+
z = m_add(m_mul(x,y),z); \
|
|
10
|
+
}}
|
|
11
|
+
|
|
12
|
+
#define m_cumsum(x,y) {(x)=m_add(x,y);}
|
|
13
|
+
#define m_cumsum_nan(x,y) { \
|
|
14
|
+
if (!not_nan(x)) { \
|
|
15
|
+
(x) = (y); \
|
|
16
|
+
} else if (not_nan(y)) { \
|
|
17
|
+
(x) = m_add(x,y); \
|
|
18
|
+
}}
|
|
19
|
+
|
|
20
|
+
#define m_cumprod(x,y) {(x)=m_mul(x,y);}
|
|
21
|
+
#define m_cumprod_nan(x,y) { \
|
|
22
|
+
if (!not_nan(x)) { \
|
|
23
|
+
(x) = (y); \
|
|
24
|
+
} else if (not_nan(y)) { \
|
|
25
|
+
(x) = m_mul(x,y); \
|
|
26
|
+
}}
|
|
27
|
+
|
|
28
|
+
__host__ __device__ static inline dtype f_maximum(dtype x, dtype y)
|
|
29
|
+
{
|
|
30
|
+
if (m_ge(x,y)) {
|
|
31
|
+
return x;
|
|
32
|
+
}
|
|
33
|
+
if (not_nan(y)) {
|
|
34
|
+
return y;
|
|
35
|
+
}
|
|
36
|
+
return x;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
__host__ __device__ static inline dtype f_maximum_nan(dtype x, dtype y)
|
|
40
|
+
{
|
|
41
|
+
if (m_ge(x,y)) {
|
|
42
|
+
return x;
|
|
43
|
+
}
|
|
44
|
+
if (!not_nan(x)) {
|
|
45
|
+
return x;
|
|
46
|
+
}
|
|
47
|
+
return y;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
__host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
|
|
51
|
+
{
|
|
52
|
+
if (m_le(x,y)) {
|
|
53
|
+
return x;
|
|
54
|
+
}
|
|
55
|
+
if (not_nan(y)) {
|
|
56
|
+
return y;
|
|
57
|
+
}
|
|
58
|
+
return x;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
__host__ __device__ static inline dtype f_minimum_nan(dtype x, dtype y)
|
|
62
|
+
{
|
|
63
|
+
if (m_le(x,y)) {
|
|
64
|
+
return x;
|
|
65
|
+
}
|
|
66
|
+
if (!not_nan(x)) {
|
|
67
|
+
return x;
|
|
68
|
+
}
|
|
69
|
+
return y;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/* --------- thrust ----------------- */
|
|
73
|
+
#include "cumo/cuda/cumo_thrust.hpp"
|
|
74
|
+
|
|
75
|
+
struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
|
|
76
|
+
{
|
|
77
|
+
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
|
|
81
|
+
{
|
|
82
|
+
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
struct cumo_thrust_multiplies_mulsum_nan : public thrust::binary_function<dtype, dtype, dtype>
|
|
86
|
+
{
|
|
87
|
+
__host__ __device__ dtype operator()(dtype x, dtype y) {
|
|
88
|
+
if (not_nan(x) && not_nan(y)) {
|
|
89
|
+
return m_mul(x, y);
|
|
90
|
+
} else {
|
|
91
|
+
return m_zero;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
|
|
97
|
+
{
|
|
98
|
+
__host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
#endif // CUMO_REAL_ACCUM_KERNEL_H
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#define m_zero INT2FIX(0)
|
|
2
|
+
#define m_one INT2FIX(1)
|
|
3
|
+
|
|
4
|
+
#define m_num_to_data(x) (x)
|
|
5
|
+
#define m_data_to_num(x) (x)
|
|
6
|
+
|
|
7
|
+
#define m_from_double(x) rb_float_new(x)
|
|
8
|
+
#define m_from_real(x) rb_float_new(x)
|
|
9
|
+
#define m_from_sint(x) INT2FIX(x)
|
|
10
|
+
#define m_from_int32(x) INT322NUM(x)
|
|
11
|
+
#define m_from_int64(x) INT642NUM(x)
|
|
12
|
+
#define m_from_uint32(x) UINT322NUM(x)
|
|
13
|
+
#define m_from_uint64(x) UINT642NUM(x)
|
|
14
|
+
|
|
15
|
+
#define m_add(x,y) rb_funcall(x,'+',1,y)
|
|
16
|
+
#define m_sub(x,y) rb_funcall(x,'-',1,y)
|
|
17
|
+
#define m_mul(x,y) rb_funcall(x,'*',1,y)
|
|
18
|
+
#define m_div(x,y) rb_funcall(x,'/',1,y)
|
|
19
|
+
#define m_mod(x,y) rb_funcall(x,'%',1,y)
|
|
20
|
+
#define m_divmod(x,y,a,b) \
|
|
21
|
+
{x = rb_funcall(x,id_divmod,1,y); \
|
|
22
|
+
a = RARRAY_PTR(x)[0]; b = RARRAY_PTR(x)[0];}
|
|
23
|
+
#define m_pow(x,y) rb_funcall(x,id_pow,1,y)
|
|
24
|
+
#define m_pow_int(x,y) rb_funcall(x,id_pow,1,y)
|
|
25
|
+
|
|
26
|
+
#define m_abs(x) rb_funcall(x,id_abs,0)
|
|
27
|
+
#define m_minus(x) rb_funcall(x,id_minus,0)
|
|
28
|
+
#define m_reciprocal(x) rb_funcall(x,id_reciprocal,0)
|
|
29
|
+
#define m_square(x) rb_funcall(x,'*',1,x)
|
|
30
|
+
#define m_floor(x) rb_funcall(x,id_floor,0)
|
|
31
|
+
#define m_round(x) rb_funcall(x,id_round,0)
|
|
32
|
+
#define m_ceil(x) rb_funcall(x,id_ceil,0)
|
|
33
|
+
#define m_trunc(x) rb_funcall(x,id_truncate,0)
|
|
34
|
+
#define m_sign(x) rb_funcall(x,id_ufo,1,INT2FIX(0))
|
|
35
|
+
|
|
36
|
+
#define m_eq(x,y) RTEST(rb_funcall(x,id_eq,1,y))
|
|
37
|
+
#define m_ne(x,y) RTEST(rb_funcall(x,id_ne,1,y))
|
|
38
|
+
#define m_gt(x,y) RTEST(rb_funcall(x,id_gt,1,y))
|
|
39
|
+
#define m_ge(x,y) RTEST(rb_funcall(x,id_ge,1,y))
|
|
40
|
+
#define m_lt(x,y) RTEST(rb_funcall(x,id_lt,1,y))
|
|
41
|
+
#define m_le(x,y) RTEST(rb_funcall(x,id_le,1,y))
|
|
42
|
+
|
|
43
|
+
#define m_bit_and(x,y) rb_funcall(x,id_bit_and,1,y)
|
|
44
|
+
#define m_bit_or(x,y) rb_funcall(x,id_bit_or, 1,y)
|
|
45
|
+
#define m_bit_xor(x,y) rb_funcall(x,id_bit_xor,1,y)
|
|
46
|
+
#define m_bit_not(x) rb_funcall(x,id_bit_not,0)
|
|
47
|
+
|
|
48
|
+
#define m_left_shift(x,y) rb_funcall(x,id_left_shift,1,y)
|
|
49
|
+
#define m_right_shift(x,y) rb_funcall(x,id_right_shift,1,y)
|
|
50
|
+
|
|
51
|
+
#define m_isnan(x) ((rb_respond_to(x,id_nan_p)) ? RTEST(rb_funcall(x,id_nan_p,0)) : 0)
|
|
52
|
+
#define m_isinf(x) ((rb_respond_to(x,id_infinite_p)) ? RTEST(rb_funcall(x,id_infinite_p,0)) : 0)
|
|
53
|
+
#define m_isposinf(x) ((rb_respond_to(x,id_infinite_p)) ? \
|
|
54
|
+
((RTEST(rb_funcall(x,id_infinite_p,0))) ? \
|
|
55
|
+
m_gt(x,INT2FIX(0)) : 0) : 0)
|
|
56
|
+
#define m_isneginf(x) ((rb_respond_to(x,id_infinite_p)) ? \
|
|
57
|
+
((RTEST(rb_funcall(x,id_infinite_p,0))) ? \
|
|
58
|
+
m_lt(x,INT2FIX(0)) : 0) : 0)
|
|
59
|
+
#define m_isfinite(x) ((rb_respond_to(x,id_finite_p)) ? RTEST(rb_funcall(x,id_finite_p,0)) : 0)
|
|
60
|
+
|
|
61
|
+
#define m_mulsum_init INT2FIX(0)
|
|
62
|
+
|
|
63
|
+
#define m_sprintf(s,x) robj_sprintf(s,x)
|
|
64
|
+
|
|
65
|
+
static inline int robj_sprintf(char *s, VALUE x) {
|
|
66
|
+
VALUE v = rb_funcall(x,rb_intern("to_s"),0);
|
|
67
|
+
return sprintf(s,"%s",StringValuePtr(v));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
#define m_sqrt(x) \
|
|
71
|
+
rb_funcall(rb_const_get(rb_mKernel,rb_intern("Math")), \
|
|
72
|
+
rb_intern("sqrt"),1,x);
|
|
73
|
+
|
|
74
|
+
static inline dtype f_seq(dtype x, dtype y, size_t c)
|
|
75
|
+
{
|
|
76
|
+
y = m_mul(y,SIZET2NUM(c));
|
|
77
|
+
return m_add(x,y);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
#include "real_accum.h"
|
|
File without changes
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
typedef VALUE dtype;
|
|
2
|
+
typedef VALUE rtype;
|
|
3
|
+
#define cT cumo_cRObject
|
|
4
|
+
#define cRT cT
|
|
5
|
+
//#define mTM mRObjectMath
|
|
6
|
+
|
|
7
|
+
#include "float_def.h"
|
|
8
|
+
#include "robj_macro.h"
|
|
9
|
+
|
|
10
|
+
#define m_min_init (0.0/0.0)
|
|
11
|
+
#define m_max_init (0.0/0.0)
|
|
12
|
+
#define m_extract(x) (*(VALUE*)x)
|
|
13
|
+
#define m_nearly_eq(x,y) robj_nearly_eq(x,y)
|
|
14
|
+
|
|
15
|
+
inline static int robj_nearly_eq(VALUE vx, VALUE vy)
|
|
16
|
+
{
|
|
17
|
+
double x, y;
|
|
18
|
+
x = NUM2DBL(vx);
|
|
19
|
+
y = NUM2DBL(vy);
|
|
20
|
+
return (fabs(x-y)<=(fabs(x)+fabs(y))*DBL_EPSILON*2);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/* generates a random number on [0,1)-real-interval */
|
|
24
|
+
inline static dtype m_rand(dtype max)
|
|
25
|
+
{
|
|
26
|
+
return DBL2NUM(genrand_res53_mix() * max);
|
|
27
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
typedef scomplex dtype;
|
|
2
|
+
typedef float rtype;
|
|
3
|
+
#define cT cumo_cSComplex
|
|
4
|
+
#define cRT cumo_cSFloat
|
|
5
|
+
#define mTM cumo_mSComplexMath
|
|
6
|
+
|
|
7
|
+
#include "complex_macro.h"
|
|
8
|
+
#include "cublas_v2.h"
|
|
9
|
+
#include "cumo/cuda/cublas.h"
|
|
10
|
+
|
|
11
|
+
static inline bool c_nearly_eq(dtype x, dtype y) {
|
|
12
|
+
return c_abs(c_sub(x,y)) <= (c_abs(x)+c_abs(y))*FLT_EPSILON*2;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
#ifdef SFMT_H
|
|
16
|
+
/* generates a random number on [0,1)-real-interval */
|
|
17
|
+
inline static dtype m_rand(dtype max)
|
|
18
|
+
{
|
|
19
|
+
dtype z;
|
|
20
|
+
REAL(z) = to_real2(gen_rand32()) * REAL(max);
|
|
21
|
+
IMAG(z) = to_real2(gen_rand32()) * IMAG(max);
|
|
22
|
+
return z;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/* generates random numbers from the normal distribution
|
|
26
|
+
using Box-Muller Transformation.
|
|
27
|
+
*/
|
|
28
|
+
inline static void m_rand_norm(dtype mu, rtype sigma, dtype *a0)
|
|
29
|
+
{
|
|
30
|
+
rtype x1, x2, w;
|
|
31
|
+
do {
|
|
32
|
+
x1 = to_real2(gen_rand32());
|
|
33
|
+
x1 = x1*2-1;
|
|
34
|
+
x2 = to_real2(gen_rand32());
|
|
35
|
+
x2 = x2*2-1;
|
|
36
|
+
w = x1 * x1 + x2 * x2;
|
|
37
|
+
} while (w>=1);
|
|
38
|
+
w = sqrt( (-2*log(w)) / w );
|
|
39
|
+
REAL(*a0) = x1*w * sigma + REAL(mu);
|
|
40
|
+
IMAG(*a0) = x2*w * sigma + IMAG(mu);
|
|
41
|
+
}
|
|
42
|
+
#endif
|
|
43
|
+
|
|
44
|
+
#define M_EPSILON rb_float_new(1.1920928955078125e-07)
|
|
45
|
+
#define M_MIN rb_float_new(1.1754943508222875e-38)
|
|
46
|
+
#define M_MAX rb_float_new(3.4028234663852886e+38)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#ifndef CUMO_SCOMPLEX_KERNEL_H
|
|
2
|
+
#define CUMO_SCOMPLEX_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef scomplex dtype;
|
|
5
|
+
typedef float rtype;
|
|
6
|
+
|
|
7
|
+
#include "complex_macro_kernel.h"
|
|
8
|
+
|
|
9
|
+
__device__ static inline bool c_nearly_eq(dtype x, dtype y) {
|
|
10
|
+
return c_abs(c_sub(x,y)) <= (c_abs(x)+c_abs(y))*FLT_EPSILON*2;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
#endif // CUMO_SCOMPLEX_KERNEL_H
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
typedef float dtype;
|
|
2
|
+
typedef float rtype;
|
|
3
|
+
#define cT cumo_cSFloat
|
|
4
|
+
#define cRT cumo_cSFloat
|
|
5
|
+
#define mTM cumo_mSFloatMath
|
|
6
|
+
|
|
7
|
+
#include "float_macro.h"
|
|
8
|
+
#include "cublas_v2.h"
|
|
9
|
+
#include "cumo/cuda/cublas.h"
|
|
10
|
+
|
|
11
|
+
#ifdef SFMT_H
|
|
12
|
+
/* generates a random number on [0,1)-real-interval */
|
|
13
|
+
inline static dtype m_rand(dtype max)
|
|
14
|
+
{
|
|
15
|
+
return to_real2(gen_rand32()) * max;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/* generates random numbers from the normal distribution
|
|
19
|
+
using Box-Muller Transformation.
|
|
20
|
+
*/
|
|
21
|
+
inline static void m_rand_norm(dtype mu, dtype sigma, dtype *a0, dtype *a1)
|
|
22
|
+
{
|
|
23
|
+
dtype x1, x2, w;
|
|
24
|
+
do {
|
|
25
|
+
x1 = to_real2(gen_rand32());
|
|
26
|
+
x1 = x1*2-1;
|
|
27
|
+
x2 = to_real2(gen_rand32());
|
|
28
|
+
x2 = x2*2-1;
|
|
29
|
+
w = x1 * x1 + x2 * x2;
|
|
30
|
+
} while (w>=1);
|
|
31
|
+
w = sqrt( (-2*log(w)) / w );
|
|
32
|
+
if (a0) {*a0 = x1*w * sigma + mu;}
|
|
33
|
+
if (a1) {*a1 = x2*w * sigma + mu;}
|
|
34
|
+
}
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
#define m_min_init cumo_sfloat_new_dim0(0.0/0.0)
|
|
38
|
+
#define m_max_init cumo_sfloat_new_dim0(0.0/0.0)
|
|
39
|
+
|
|
40
|
+
#define m_extract(x) rb_float_new(*(float*)x)
|
|
41
|
+
#define m_nearly_eq(x,y) (fabs(x-y)<=(fabs(x)+fabs(y))*FLT_EPSILON*2)
|
|
42
|
+
|
|
43
|
+
#define M_EPSILON rb_float_new(1.1920928955078125e-07)
|
|
44
|
+
#define M_MIN rb_float_new(1.1754943508222875e-38)
|
|
45
|
+
#define M_MAX rb_float_new(3.4028234663852886e+38)
|
|
46
|
+
|
|
47
|
+
#define DATA_MIN FLT_MIN
|
|
48
|
+
#define DATA_MAX FLT_MAX
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#ifndef CUMO_SFLOAT_KERNEL_H
|
|
2
|
+
#define CUMO_SFLOAT_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef float dtype;
|
|
5
|
+
typedef float rtype;
|
|
6
|
+
|
|
7
|
+
#include "float_macro_kernel.h"
|
|
8
|
+
|
|
9
|
+
#define m_nearly_eq(x,y) (fabs(x-y)<=(fabs(x)+fabs(y))*FLT_EPSILON*2)
|
|
10
|
+
|
|
11
|
+
#define DATA_MIN FLT_MIN
|
|
12
|
+
#define DATA_MAX FLT_MAX
|
|
13
|
+
|
|
14
|
+
#endif // CUMO_SFLOAT_KERNEL_H
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
typedef u_int16_t dtype;
|
|
2
|
+
typedef u_int16_t rtype;
|
|
3
|
+
#define cT cumo_cUInt16
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2UINT(x))
|
|
7
|
+
#define m_data_to_num(x) UINT2NUM((unsigned int)(x))
|
|
8
|
+
#define m_extract(x) UINT2NUM((unsigned int)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%u",(unsigned int)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef UINT16_MIN
|
|
12
|
+
#define UINT16_MIN (0)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#ifndef UINT16_MAX
|
|
16
|
+
#define UINT16_MAX (65535)
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#define DATA_MIN UINT16_MIN
|
|
20
|
+
#define DATA_MAX UINT16_MAX
|
|
21
|
+
|
|
22
|
+
#define M_MIN INT2FIX(0)
|
|
23
|
+
#define M_MAX m_data_to_num(UINT16_MAX)
|
|
24
|
+
|
|
25
|
+
#include "uint_macro.h"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#ifndef CUMO_UINT16_KERNEL_H
|
|
2
|
+
#define CUMO_UINT16_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef u_int16_t dtype;
|
|
5
|
+
typedef u_int16_t rtype;
|
|
6
|
+
|
|
7
|
+
#ifndef UINT16_MIN
|
|
8
|
+
#define UINT16_MIN (0)
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#ifndef UINT16_MAX
|
|
12
|
+
#define UINT16_MAX (65535)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#define DATA_MIN UINT16_MIN
|
|
16
|
+
#define DATA_MAX UINT16_MAX
|
|
17
|
+
|
|
18
|
+
#include "uint_macro_kernel.h"
|
|
19
|
+
|
|
20
|
+
#endif // CUMO_UINT16_KERNEL_H
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
typedef u_int32_t dtype;
|
|
2
|
+
typedef u_int32_t rtype;
|
|
3
|
+
#define cT cumo_cUInt32
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2UINT32(x))
|
|
7
|
+
#define m_data_to_num(x) UINT322NUM((u_int32_t)(x))
|
|
8
|
+
#define m_extract(x) UINT322NUM((u_int32_t)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%"PRIu32,(u_int32_t)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef UINT32_MIN
|
|
12
|
+
#define UINT32_MIN (0)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#ifndef UINT32_MAX
|
|
16
|
+
#define UINT32_MAX (4294967295u)
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#define DATA_MIN UINT32_MIN
|
|
20
|
+
#define DATA_MAX UINT32_MAX
|
|
21
|
+
|
|
22
|
+
#define M_MIN INT2FIX(0)
|
|
23
|
+
#define M_MAX m_data_to_num(UINT32_MAX)
|
|
24
|
+
|
|
25
|
+
#include "uint_macro.h"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#ifndef CUMO_UINT32_KERNEL_H
|
|
2
|
+
#define CUMO_UINT32_KERNEL_H
|
|
3
|
+
|
|
4
|
+
typedef u_int32_t dtype;
|
|
5
|
+
typedef u_int32_t rtype;
|
|
6
|
+
|
|
7
|
+
#ifndef UINT32_MIN
|
|
8
|
+
#define UINT32_MIN (0)
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#ifndef UINT32_MAX
|
|
12
|
+
#define UINT32_MAX (4294967295u)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#define DATA_MIN UINT32_MIN
|
|
16
|
+
#define DATA_MAX UINT32_MAX
|
|
17
|
+
|
|
18
|
+
#include "uint_macro_kernel.h"
|
|
19
|
+
|
|
20
|
+
#endif // CUMO_UINT32_KERNEL_H
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
typedef u_int64_t dtype;
|
|
2
|
+
typedef u_int64_t rtype;
|
|
3
|
+
#define cT cumo_cUInt64
|
|
4
|
+
#define cRT cT
|
|
5
|
+
|
|
6
|
+
#define m_num_to_data(x) ((dtype)NUM2UINT64(x))
|
|
7
|
+
#define m_data_to_num(x) UINT642NUM((u_int64_t)(x))
|
|
8
|
+
#define m_extract(x) UINT642NUM((u_int64_t)*(dtype*)(x))
|
|
9
|
+
#define m_sprintf(s,x) sprintf(s,"%"PRIu64,(u_int64_t)(x))
|
|
10
|
+
|
|
11
|
+
#ifndef UINT64_MIN
|
|
12
|
+
#define UINT64_MIN (0)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#ifndef UINT64_MAX
|
|
16
|
+
#define UINT64_MAX (18446744073709551615ul)
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
#define DATA_MIN UINT64_MIN
|
|
20
|
+
#define DATA_MAX UINT64_MAX
|
|
21
|
+
|
|
22
|
+
#define M_MIN INT2FIX(0)
|
|
23
|
+
#define M_MAX m_data_to_num(UINT64_MAX)
|
|
24
|
+
|
|
25
|
+
#include "uint_macro.h"
|