cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#ifndef CUMO_CUDA_THRUST_COMPLEX_H
|
|
2
|
+
#define CUMO_CUDA_THRUST_COMPLEX_H
|
|
3
|
+
|
|
4
|
+
#include "cumo/types/complex_kernel.h"
|
|
5
|
+
#include "cumo/cuda/cumo_thrust.hpp"
|
|
6
|
+
|
|
7
|
+
// ref. https://github.com/thrust/thrust/blob/master/examples/summary_statistics.cu
|
|
8
|
+
|
|
9
|
+
// structure used to accumulate the moments and other
|
|
10
|
+
// statistical properties encountered so far.
|
|
11
|
+
template <typename T, typename R>
|
|
12
|
+
struct cumo_thrust_complex_variance_data
|
|
13
|
+
{
|
|
14
|
+
R n;
|
|
15
|
+
T mean;
|
|
16
|
+
R M2;
|
|
17
|
+
|
|
18
|
+
// initialize to the identity element
|
|
19
|
+
void initialize()
|
|
20
|
+
{
|
|
21
|
+
n = M2 = 0;
|
|
22
|
+
mean = c_zero();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
__host__ __device__ R variance() { return M2 / (n - 1); }
|
|
26
|
+
__host__ __device__ R variance_n() { return M2 / n; }
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
// stats_unary_op is a functor that takes in a value x and
|
|
30
|
+
// returns a variace_data whose mean value is initialized to x.
|
|
31
|
+
template <typename T, typename R>
|
|
32
|
+
struct cumo_thrust_complex_variance_unary_op
|
|
33
|
+
{
|
|
34
|
+
__host__ __device__
|
|
35
|
+
cumo_thrust_complex_variance_data<T,R> operator()(const T& x) const
|
|
36
|
+
{
|
|
37
|
+
cumo_thrust_complex_variance_data<T,R> result;
|
|
38
|
+
result.n = 1;
|
|
39
|
+
result.mean = x;
|
|
40
|
+
result.M2 = 0;
|
|
41
|
+
|
|
42
|
+
return result;
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
// cumo_thrust_variance_binary_op is a functor that accepts two cumo_thrust_variance_data
|
|
47
|
+
// structs and returns a new cumo_thrust_variance_data which are an
|
|
48
|
+
// approximation to the cumo_thrust_variance for
|
|
49
|
+
// all values that have been agregated so far
|
|
50
|
+
template <typename T, typename R>
|
|
51
|
+
struct cumo_thrust_complex_variance_binary_op
|
|
52
|
+
: public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
|
|
53
|
+
const cumo_thrust_complex_variance_data<T,R>&,
|
|
54
|
+
cumo_thrust_complex_variance_data<T,R> >
|
|
55
|
+
{
|
|
56
|
+
__host__ __device__
|
|
57
|
+
cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
|
|
58
|
+
{
|
|
59
|
+
cumo_thrust_complex_variance_data<T,R> result;
|
|
60
|
+
|
|
61
|
+
// precompute some common subexpressions
|
|
62
|
+
R n = x.n + y.n;
|
|
63
|
+
|
|
64
|
+
T delta = c_sub(y.mean, x.mean);
|
|
65
|
+
R delta2 = c_abs_square(delta);
|
|
66
|
+
|
|
67
|
+
//Basic number of samples (n)
|
|
68
|
+
result.n = n;
|
|
69
|
+
|
|
70
|
+
result.mean = c_add(x.mean, c_mul_r(delta, y.n / n));
|
|
71
|
+
|
|
72
|
+
result.M2 = x.M2 + y.M2;
|
|
73
|
+
result.M2 += delta2 * x.n * y.n / n;
|
|
74
|
+
|
|
75
|
+
return result;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
#endif /* ifndef CUMO_CUDA_THRUST_COMPLEX_H */
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#ifndef CUMO_CUDA_DRIVER_H
|
|
2
|
+
#define CUMO_CUDA_DRIVER_H
|
|
3
|
+
#include <cuda.h>
|
|
4
|
+
|
|
5
|
+
#if defined(__cplusplus)
|
|
6
|
+
extern "C" {
|
|
7
|
+
#if 0
|
|
8
|
+
} /* satisfy cc-mode */
|
|
9
|
+
#endif
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
extern VALUE cumo_cuda_eDriverError;
|
|
13
|
+
extern VALUE cumo_cuda_mDriver;
|
|
14
|
+
|
|
15
|
+
#if defined(__cplusplus)
|
|
16
|
+
#if 0
|
|
17
|
+
{ /* satisfy cc-mode */
|
|
18
|
+
#endif
|
|
19
|
+
} /* extern "C" { */
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
#endif /* ifndef CUMO_CUDA_DRIVER_H */
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#ifndef CUMO_CUDA_MEMORY_POOL_H
|
|
2
|
+
#define CUMO_CUDA_MEMORY_POOL_H
|
|
3
|
+
|
|
4
|
+
#include "cumo/narray.h"
|
|
5
|
+
|
|
6
|
+
#if defined(__cplusplus)
|
|
7
|
+
extern "C" {
|
|
8
|
+
#if 0
|
|
9
|
+
} /* satisfy cc-mode */
|
|
10
|
+
#endif
|
|
11
|
+
#endif
|
|
12
|
+
|
|
13
|
+
extern VALUE cumo_cuda_eOutOfMemoryError;
|
|
14
|
+
|
|
15
|
+
char*
|
|
16
|
+
cumo_cuda_runtime_malloc(size_t size);
|
|
17
|
+
|
|
18
|
+
void
|
|
19
|
+
cumo_cuda_runtime_free(char *ptr);
|
|
20
|
+
|
|
21
|
+
#if defined(__cplusplus)
|
|
22
|
+
#if 0
|
|
23
|
+
{ /* satisfy cc-mode */
|
|
24
|
+
#endif
|
|
25
|
+
} /* extern "C" { */
|
|
26
|
+
#endif
|
|
27
|
+
|
|
28
|
+
#endif /* ifndef CUMO_CUDA_MEMORY_POOL_H */
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#ifndef CUMO_CUDA_NVRTC_H
|
|
2
|
+
#define CUMO_CUDA_NVRTC_H
|
|
3
|
+
#include <nvrtc.h>
|
|
4
|
+
|
|
5
|
+
#if defined(__cplusplus)
|
|
6
|
+
extern "C" {
|
|
7
|
+
#if 0
|
|
8
|
+
} /* satisfy cc-mode */
|
|
9
|
+
#endif
|
|
10
|
+
#endif
|
|
11
|
+
|
|
12
|
+
extern VALUE cumo_cuda_eNVRTCError;
|
|
13
|
+
extern VALUE cumo_cuda_mNVRTC;
|
|
14
|
+
|
|
15
|
+
#if defined(__cplusplus)
|
|
16
|
+
#if 0
|
|
17
|
+
{ /* satisfy cc-mode */
|
|
18
|
+
#endif
|
|
19
|
+
} /* extern "C" { */
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
#endif /* ifndef CUMO_CUDA_NVRTC_H */
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#ifndef CUMO_CUDA_RUNTIME_H
|
|
2
|
+
#define CUMO_CUDA_RUNTIME_H
|
|
3
|
+
|
|
4
|
+
#include "cumo/narray.h"
|
|
5
|
+
#include <cuda_runtime.h>
|
|
6
|
+
|
|
7
|
+
#if defined(__cplusplus)
|
|
8
|
+
extern "C" {
|
|
9
|
+
#if 0
|
|
10
|
+
} /* satisfy cc-mode */
|
|
11
|
+
#endif
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
extern VALUE cumo_cuda_eRuntimeError;
|
|
15
|
+
|
|
16
|
+
static inline void
|
|
17
|
+
cumo_cuda_runtime_check_status(cudaError_t status)
|
|
18
|
+
{
|
|
19
|
+
if (status != 0) {
|
|
20
|
+
rb_raise(cumo_cuda_eRuntimeError, "%s (error=%d)", cudaGetErrorString(status), status);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static inline bool
|
|
25
|
+
cumo_cuda_runtime_is_device_memory(void* ptr)
|
|
26
|
+
{
|
|
27
|
+
struct cudaPointerAttributes attrs;
|
|
28
|
+
cudaError_t status = cudaPointerGetAttributes(&attrs, ptr);
|
|
29
|
+
cudaGetLastError(); // reset last error to success
|
|
30
|
+
return (status != cudaErrorInvalidValue);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
#if defined(__cplusplus)
|
|
34
|
+
#if 0
|
|
35
|
+
{ /* satisfy cc-mode */
|
|
36
|
+
#endif
|
|
37
|
+
} /* extern "C" { */
|
|
38
|
+
#endif
|
|
39
|
+
|
|
40
|
+
#endif /* ifndef CUMO_CUDA_RUNTIME_H */
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#ifndef CUMO_INDEXER_H
|
|
2
|
+
#define CUMO_INDEXER_H
|
|
3
|
+
|
|
4
|
+
/* Add cumo_ prefix */
|
|
5
|
+
#define na_indexer_t cumo_na_indexer_t
|
|
6
|
+
#define na_iarray_t cumo_na_iarray_t
|
|
7
|
+
#define na_reduction_arg_t cumo_na_reduction_arg_t
|
|
8
|
+
|
|
9
|
+
#ifndef __CUDACC__
|
|
10
|
+
#include "cumo/narray.h"
|
|
11
|
+
#include "cumo/ndloop.h"
|
|
12
|
+
#else
|
|
13
|
+
#include "cumo/narray_kernel.h"
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
/* A structure to get indices for each dimension.
|
|
17
|
+
*
|
|
18
|
+
* Note that shapes of each argument NArray are typically equivalent, and
|
|
19
|
+
* thus indexer would point the same indicies for all NArrays.
|
|
20
|
+
*/
|
|
21
|
+
typedef struct {
|
|
22
|
+
unsigned char ndim; // # of dimensions
|
|
23
|
+
size_t total_size; // # of total elements
|
|
24
|
+
size_t shape[NA_MAX_DIMENSION]; // # of elements for each dimension
|
|
25
|
+
uint64_t index[NA_MAX_DIMENSION]; // indicies for each dimension
|
|
26
|
+
uint64_t raw_index;
|
|
27
|
+
} na_indexer_t;
|
|
28
|
+
|
|
29
|
+
/* A structure to get data address with indexer.
|
|
30
|
+
*
|
|
31
|
+
* Note that strides would be different for each NArray although indexer points same indicies.
|
|
32
|
+
*/
|
|
33
|
+
typedef struct {
|
|
34
|
+
char* ptr;
|
|
35
|
+
ssize_t step[NA_MAX_DIMENSION]; // or strides
|
|
36
|
+
} na_iarray_t;
|
|
37
|
+
|
|
38
|
+
typedef struct {
|
|
39
|
+
na_iarray_t in;
|
|
40
|
+
na_iarray_t out;
|
|
41
|
+
na_indexer_t in_indexer;
|
|
42
|
+
na_indexer_t out_indexer;
|
|
43
|
+
na_indexer_t reduce_indexer;
|
|
44
|
+
} na_reduction_arg_t;
|
|
45
|
+
|
|
46
|
+
#ifndef __CUDACC__
|
|
47
|
+
extern int na_debug_flag; // narray.c
|
|
48
|
+
|
|
49
|
+
static void
|
|
50
|
+
print_na_indexer_t(na_indexer_t* indexer)
|
|
51
|
+
{
|
|
52
|
+
printf("na_indexer_t = 0x%"SZF"x {\n", (size_t)indexer);
|
|
53
|
+
printf(" ndim = %d\n", indexer->ndim);
|
|
54
|
+
printf(" total_size = %ld\n", indexer->total_size);
|
|
55
|
+
printf(" shape = 0x%"SZF"x\n", (size_t)indexer->shape);
|
|
56
|
+
for (int i = 0; i < indexer->ndim; ++i) {
|
|
57
|
+
printf(" shape[%d] = %ld\n", i, indexer->shape[i]);
|
|
58
|
+
}
|
|
59
|
+
printf("}\n");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
static void
|
|
63
|
+
print_na_iarray_t(na_iarray_t* iarray, unsigned char ndim)
|
|
64
|
+
{
|
|
65
|
+
printf("na_iarray_t = 0x%"SZF"x {\n", (size_t)iarray);
|
|
66
|
+
printf(" ptr = 0x%"SZF"x\n", (size_t)iarray->ptr);
|
|
67
|
+
printf(" step = 0x%"SZF"x\n", (size_t)iarray->step);
|
|
68
|
+
for (int i = 0; i < ndim; ++i) {
|
|
69
|
+
printf(" step[%d] = %ld\n", i, iarray->step[i]);
|
|
70
|
+
}
|
|
71
|
+
printf("}\n");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
static void
|
|
75
|
+
print_na_reduction_arg_t(na_reduction_arg_t* arg)
|
|
76
|
+
{
|
|
77
|
+
printf("na_reduction_arg_t = 0x%"SZF"x {\n", (size_t)arg);
|
|
78
|
+
printf("--in--\n");
|
|
79
|
+
print_na_iarray_t(&arg->in, arg->in_indexer.ndim);
|
|
80
|
+
printf("--out--\n");
|
|
81
|
+
print_na_iarray_t(&arg->out, arg->out_indexer.ndim);
|
|
82
|
+
printf("--in_indexer--\n");
|
|
83
|
+
print_na_indexer_t(&arg->in_indexer);
|
|
84
|
+
printf("--out_indexer--\n");
|
|
85
|
+
print_na_indexer_t(&arg->out_indexer);
|
|
86
|
+
printf("--reduce_indexer--\n");
|
|
87
|
+
print_na_indexer_t(&arg->reduce_indexer);
|
|
88
|
+
printf("}\n");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Note that you, then, have to call na_indexer_set to create index[]
|
|
92
|
+
static na_indexer_t
|
|
93
|
+
na_make_indexer(na_loop_args_t* arg)
|
|
94
|
+
{
|
|
95
|
+
na_indexer_t indexer;
|
|
96
|
+
indexer.ndim = arg->ndim;
|
|
97
|
+
indexer.total_size = 1;
|
|
98
|
+
for (int i = 0; i < arg->ndim; ++i) {
|
|
99
|
+
indexer.shape[i] = arg->shape[i];
|
|
100
|
+
indexer.total_size *= arg->shape[i];
|
|
101
|
+
}
|
|
102
|
+
return indexer;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
static na_iarray_t
|
|
106
|
+
na_make_iarray_given_ndim(na_loop_args_t* arg, int ndim)
|
|
107
|
+
{
|
|
108
|
+
na_iarray_t iarray;
|
|
109
|
+
iarray.ptr = arg->ptr + arg->iter[0].pos;
|
|
110
|
+
for (int idim = ndim; --idim >= 0;) {
|
|
111
|
+
iarray.step[idim] = arg->iter[idim].step;
|
|
112
|
+
}
|
|
113
|
+
return iarray;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
static na_iarray_t
|
|
117
|
+
na_make_iarray(na_loop_args_t* arg)
|
|
118
|
+
{
|
|
119
|
+
return na_make_iarray_given_ndim(arg, arg->ndim);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
static na_reduction_arg_t
|
|
123
|
+
na_make_reduction_arg(na_loop_t* lp_user)
|
|
124
|
+
{
|
|
125
|
+
na_reduction_arg_t arg;
|
|
126
|
+
int i;
|
|
127
|
+
int in_ndim = lp_user->args[0].ndim;
|
|
128
|
+
|
|
129
|
+
// in shape = (2, 3, 4, 5, 6)
|
|
130
|
+
// axis = (1, 3)
|
|
131
|
+
// out shape = (2, 4, 6)
|
|
132
|
+
// reduce shape = (3, 5)
|
|
133
|
+
|
|
134
|
+
arg.in = na_make_iarray(&lp_user->args[0]);
|
|
135
|
+
arg.in_indexer = na_make_indexer(&lp_user->args[0]);
|
|
136
|
+
|
|
137
|
+
arg.reduce_indexer.ndim = 0;
|
|
138
|
+
arg.reduce_indexer.total_size = 1;
|
|
139
|
+
arg.out_indexer.ndim = 0;
|
|
140
|
+
arg.out_indexer.total_size = 1;
|
|
141
|
+
for (i = 0; i < in_ndim; ++i) {
|
|
142
|
+
if (na_test_reduce(lp_user->reduce, i)) {
|
|
143
|
+
arg.reduce_indexer.shape[arg.reduce_indexer.ndim] = arg.in_indexer.shape[i];
|
|
144
|
+
arg.reduce_indexer.total_size *= arg.in_indexer.shape[i];
|
|
145
|
+
++arg.reduce_indexer.ndim;
|
|
146
|
+
} else {
|
|
147
|
+
arg.out_indexer.shape[arg.out_indexer.ndim] = arg.in_indexer.shape[i];
|
|
148
|
+
arg.out_indexer.total_size *= arg.in_indexer.shape[i];
|
|
149
|
+
++arg.out_indexer.ndim;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
arg.out = na_make_iarray_given_ndim(&lp_user->args[1], arg.out_indexer.ndim);
|
|
153
|
+
|
|
154
|
+
if (na_debug_flag) {
|
|
155
|
+
print_na_reduction_arg_t(&arg);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
assert(arg.reduce_indexer.ndim == lp_user->reduce_dim);
|
|
159
|
+
assert(arg.in_indexer.ndim == arg.reduce_indexer.ndim + arg.out_indexer.ndim);
|
|
160
|
+
|
|
161
|
+
return arg;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
#endif // #ifndef __CUDACC__
|
|
165
|
+
|
|
166
|
+
#define CUMO_NA_INDEXER_OPTIMIZED_NDIM 4
|
|
167
|
+
|
|
168
|
+
#ifdef __CUDACC__
|
|
169
|
+
|
|
170
|
+
__host__ __device__
|
|
171
|
+
static inline void
|
|
172
|
+
cumo_na_indexer_set_dim(na_indexer_t* indexer, uint64_t i) {
|
|
173
|
+
indexer->raw_index = i;
|
|
174
|
+
for (int j = indexer->ndim; --j >= 0;) {
|
|
175
|
+
indexer->index[j] = i % indexer->shape[j];
|
|
176
|
+
i /= indexer->shape[j];
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Let compiler optimize
|
|
181
|
+
#define CUMO_NA_INDEXER_SET(NDIM) \
|
|
182
|
+
__host__ __device__ \
|
|
183
|
+
static inline void \
|
|
184
|
+
cumo_na_indexer_set_dim##NDIM(na_indexer_t* indexer, uint64_t i) { \
|
|
185
|
+
indexer->raw_index = i; \
|
|
186
|
+
for (int j = NDIM; --j >= 0;) { \
|
|
187
|
+
indexer->index[j] = i % indexer->shape[j]; \
|
|
188
|
+
i /= indexer->shape[j]; \
|
|
189
|
+
} \
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
CUMO_NA_INDEXER_SET(4)
|
|
193
|
+
CUMO_NA_INDEXER_SET(3)
|
|
194
|
+
CUMO_NA_INDEXER_SET(2)
|
|
195
|
+
CUMO_NA_INDEXER_SET(0)
|
|
196
|
+
|
|
197
|
+
__host__ __device__
|
|
198
|
+
static inline void
|
|
199
|
+
cumo_na_indexer_set_dim1(na_indexer_t* indexer, uint64_t i) {
|
|
200
|
+
indexer->raw_index = i;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
__host__ __device__
|
|
204
|
+
static inline char*
|
|
205
|
+
cumo_na_iarray_at_dim(na_iarray_t* iarray, na_indexer_t* indexer) {
|
|
206
|
+
char* ptr = iarray->ptr;
|
|
207
|
+
for (int idim = 0; idim < indexer->ndim; ++idim) {
|
|
208
|
+
ptr += iarray->step[idim] * indexer->index[idim];
|
|
209
|
+
}
|
|
210
|
+
return ptr;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Let compiler optimize
|
|
214
|
+
#define CUMO_NA_IARRAY_AT(NDIM) \
|
|
215
|
+
__host__ __device__ \
|
|
216
|
+
static inline char* \
|
|
217
|
+
cumo_na_iarray_at_dim##NDIM(na_iarray_t* iarray, na_indexer_t* indexer) { \
|
|
218
|
+
char* ptr = iarray->ptr; \
|
|
219
|
+
for (int idim = 0; idim < NDIM; ++idim) { \
|
|
220
|
+
ptr += iarray->step[idim] * indexer->index[idim]; \
|
|
221
|
+
} \
|
|
222
|
+
return ptr; \
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
CUMO_NA_IARRAY_AT(4)
|
|
226
|
+
CUMO_NA_IARRAY_AT(3)
|
|
227
|
+
CUMO_NA_IARRAY_AT(2)
|
|
228
|
+
CUMO_NA_IARRAY_AT(0)
|
|
229
|
+
|
|
230
|
+
__host__ __device__
|
|
231
|
+
static inline char*
|
|
232
|
+
cumo_na_iarray_at_dim1(na_iarray_t* iarray, na_indexer_t* indexer) {
|
|
233
|
+
return iarray->ptr + iarray->step[0] * indexer->raw_index;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
#endif // #ifdef __CUDACC__
|
|
237
|
+
|
|
238
|
+
#endif // CUMO_INDEXER_H
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#ifndef CUMO_INTERN_H
|
|
2
|
+
#define CUMO_INTERN_H
|
|
3
|
+
|
|
4
|
+
void cumo_debug_breakpoint(void);
|
|
5
|
+
|
|
6
|
+
/* Add cumo_ prefix to avoid C symbol collisions with Numo without modifying C implementations */
|
|
7
|
+
|
|
8
|
+
#define rb_narray_new cumo_nary_new
|
|
9
|
+
#define nary_new cumo_nary_new
|
|
10
|
+
VALUE cumo_nary_new(VALUE elem, int ndim, size_t *shape);
|
|
11
|
+
#define rb_narray_view_new cumo_nary_view_new
|
|
12
|
+
#define nary_view_new cumo_nary_view_new
|
|
13
|
+
VALUE cumo_nary_view_new(VALUE elem, int ndim, size_t *shape);
|
|
14
|
+
#define rb_narray_debug_info cumo_nary_debug_info
|
|
15
|
+
#define nary_debug_info cumo_nary_debug_info
|
|
16
|
+
VALUE cumo_nary_debug_info(VALUE);
|
|
17
|
+
|
|
18
|
+
#define na_make_view cumo_nary_make_view
|
|
19
|
+
VALUE cumo_nary_make_view(VALUE self);
|
|
20
|
+
|
|
21
|
+
#define na_s_allocate cumo_nary_s_allocate
|
|
22
|
+
VALUE cumo_nary_s_allocate(VALUE klass);
|
|
23
|
+
#define na_s_allocate_view cumo_nary_s_allocate_view
|
|
24
|
+
VALUE cumo_nary_s_allocate_view(VALUE klass);
|
|
25
|
+
#define na_s_new_like cumo_nary_s_new_like
|
|
26
|
+
VALUE cumo_nary_s_new_like(VALUE type, VALUE obj);
|
|
27
|
+
|
|
28
|
+
#define na_alloc_shape cumo_na_alloc_shape
|
|
29
|
+
void cumo_na_alloc_shape(narray_t *na, int ndim);
|
|
30
|
+
#define na_array_to_internal_shape cumo_na_array_to_internal_shape
|
|
31
|
+
void cumo_na_array_to_internal_shape(VALUE self, VALUE ary, size_t *shape);
|
|
32
|
+
#define na_index_arg_to_internal_order cumo_na_index_arg_to_internal_order
|
|
33
|
+
void cumo_na_index_arg_to_internal_order(int argc, VALUE *argv, VALUE self);
|
|
34
|
+
#define na_setup_shape cumo_na_setup_shape
|
|
35
|
+
void cumo_na_setup_shape(narray_t *na, int ndim, size_t *shape);
|
|
36
|
+
|
|
37
|
+
#define na_get_elmsz cumo_nary_element_stride
|
|
38
|
+
#define nary_element_stride cumo_nary_element_stride
|
|
39
|
+
//#define na_element_stride cumo_nary_element_stride
|
|
40
|
+
unsigned int cumo_nary_element_stride(VALUE nary);
|
|
41
|
+
#define na_dtype_elmsz cumo_nary_dtype_element_stride
|
|
42
|
+
size_t cumo_nary_dtype_element_stride(VALUE klass);
|
|
43
|
+
|
|
44
|
+
#define na_get_pointer cumo_nary_get_pointer
|
|
45
|
+
char *cumo_nary_get_pointer(VALUE);
|
|
46
|
+
#define na_get_pointer_for_write cumo_nary_get_pointer_for_write
|
|
47
|
+
char *cumo_nary_get_pointer_for_write(VALUE);
|
|
48
|
+
#define na_get_pointer_for_read cumo_nary_get_pointer_for_read
|
|
49
|
+
char *cumo_nary_get_pointer_for_read(VALUE);
|
|
50
|
+
#define na_get_pointer_for_read_write cumo_nary_get_pointer_for_read_write
|
|
51
|
+
char *cumo_nary_get_pointer_for_read_write(VALUE);
|
|
52
|
+
#define na_get_offset cumo_nary_get_offset
|
|
53
|
+
size_t cumo_nary_get_offset(VALUE self);
|
|
54
|
+
|
|
55
|
+
#define na_copy_flags cumo_nary_copy_flags
|
|
56
|
+
void cumo_nary_copy_flags(VALUE src, VALUE dst);
|
|
57
|
+
|
|
58
|
+
#define na_check_ladder cumo_nary_check_ladder
|
|
59
|
+
VALUE cumo_nary_check_ladder(VALUE self, int start_dim);
|
|
60
|
+
#define na_check_contiguous cumo_nary_check_contiguous
|
|
61
|
+
VALUE cumo_nary_check_contiguous(VALUE self);
|
|
62
|
+
|
|
63
|
+
#define na_flatten_dim cumo_nary_flatten_dim
|
|
64
|
+
VALUE cumo_nary_flatten_dim(VALUE self, int sd);
|
|
65
|
+
|
|
66
|
+
#define na_flatten cumo_nary_flatten
|
|
67
|
+
VALUE cumo_nary_flatten(VALUE);
|
|
68
|
+
|
|
69
|
+
#define na_copy cumo_nary_dup
|
|
70
|
+
VALUE cumo_nary_dup(VALUE);
|
|
71
|
+
|
|
72
|
+
#define na_store cumo_nary_store
|
|
73
|
+
VALUE cumo_nary_store(VALUE self, VALUE src);
|
|
74
|
+
|
|
75
|
+
#define na_upcast cumo_na_upcast
|
|
76
|
+
VALUE cumo_na_upcast(VALUE type1, VALUE type2);
|
|
77
|
+
|
|
78
|
+
#define na_release_lock cumo_na_release_lock
|
|
79
|
+
void cumo_na_release_lock(VALUE); // currently do nothing
|
|
80
|
+
|
|
81
|
+
// used in reduce methods
|
|
82
|
+
#define nary_reduce_dimension cumo_nary_reduce_dimension
|
|
83
|
+
#define na_reduce_dimension cumo_nary_reduce_dimension
|
|
84
|
+
VALUE cumo_nary_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
|
|
85
|
+
ndfunc_t *ndf, na_iter_func_t nan_iter);
|
|
86
|
+
|
|
87
|
+
#define nary_reduce_options cumo_nary_reduce_options
|
|
88
|
+
#define na_reduce_options cumo_nary_reduce_options
|
|
89
|
+
VALUE cumo_nary_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
|
|
90
|
+
ndfunc_t *ndf);
|
|
91
|
+
|
|
92
|
+
// ndloop
|
|
93
|
+
#define na_ndloop cumo_na_ndloop
|
|
94
|
+
VALUE cumo_na_ndloop(ndfunc_t *nf, int argc, ...);
|
|
95
|
+
#define na_ndloop2 cumo_na_ndloop2
|
|
96
|
+
VALUE cumo_na_ndloop2(ndfunc_t *nf, VALUE args);
|
|
97
|
+
#define na_ndloop3 cumo_na_ndloop3
|
|
98
|
+
VALUE cumo_na_ndloop3(ndfunc_t *nf, void *ptr, int argc, ...);
|
|
99
|
+
#define na_ndloop4 cumo_na_ndloop4
|
|
100
|
+
VALUE cumo_na_ndloop4(ndfunc_t *nf, void *ptr, VALUE args);
|
|
101
|
+
|
|
102
|
+
#define na_ndloop_cast_narray_to_rarray cumo_na_ndloop_cast_narray_to_rarray
|
|
103
|
+
VALUE cumo_na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt);
|
|
104
|
+
#define na_ndloop_store_rarray cumo_na_ndloop_store_rarray
|
|
105
|
+
VALUE cumo_na_ndloop_store_rarray(ndfunc_t *nf, VALUE nary, VALUE rary);
|
|
106
|
+
#define na_ndloop_store_rarray2 cumo_na_ndloop_store_rarray2
|
|
107
|
+
VALUE cumo_na_ndloop_store_rarray2(ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt);
|
|
108
|
+
#define na_ndloop_inspect cumo_na_ndloop_inspect
|
|
109
|
+
VALUE cumo_na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt);
|
|
110
|
+
#define na_ndloop_with_index cumo_na_ndloop_with_index
|
|
111
|
+
VALUE cumo_na_ndloop_with_index(ndfunc_t *nf, int argc, ...);
|
|
112
|
+
|
|
113
|
+
#define na_info_str cumo_nary_info_str
|
|
114
|
+
VALUE cumo_nary_info_str(VALUE);
|
|
115
|
+
|
|
116
|
+
#define na_test_reduce cumo_nary_test_reduce
|
|
117
|
+
bool cumo_nary_test_reduce(VALUE reduce, int dim);
|
|
118
|
+
|
|
119
|
+
#define nary_step_array_index cumo_nary_step_array_index
|
|
120
|
+
void cumo_nary_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
|
|
121
|
+
#define nary_step_sequence cumo_nary_step_sequence
|
|
122
|
+
void cumo_nary_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
|
|
123
|
+
|
|
124
|
+
// used in aref, aset
|
|
125
|
+
#define na_get_result_dimension cumo_nary_get_result_dimension
|
|
126
|
+
int cumo_nary_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
|
127
|
+
#define na_aref_main cumo_nary_aref_main
|
|
128
|
+
VALUE cumo_nary_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
|
129
|
+
|
|
130
|
+
// defined in array, used in math
|
|
131
|
+
#define na_ary_composition_dtype cumo_na_ary_composition_dtype
|
|
132
|
+
VALUE cumo_na_ary_composition_dtype(VALUE ary);
|
|
133
|
+
|
|
134
|
+
#include "ruby/version.h"
|
|
135
|
+
|
|
136
|
+
#if RUBY_API_VERSION_CODE == 20100 // 2.1.0
|
|
137
|
+
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *);
|
|
138
|
+
VALUE rb_extract_keywords(VALUE *orighash);
|
|
139
|
+
#endif
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
#endif /* ifndef CUMO_INTERN_H */
|