cumo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
@@ -0,0 +1,79 @@
|
|
1
|
+
#ifndef CUMO_CUDA_THRUST_COMPLEX_H
|
2
|
+
#define CUMO_CUDA_THRUST_COMPLEX_H
|
3
|
+
|
4
|
+
#include "cumo/types/complex_kernel.h"
|
5
|
+
#include "cumo/cuda/cumo_thrust.hpp"
|
6
|
+
|
7
|
+
// ref. https://github.com/thrust/thrust/blob/master/examples/summary_statistics.cu
|
8
|
+
|
9
|
+
// structure used to accumulate the moments and other
|
10
|
+
// statistical properties encountered so far.
|
11
|
+
template <typename T, typename R>
|
12
|
+
struct cumo_thrust_complex_variance_data
|
13
|
+
{
|
14
|
+
R n;
|
15
|
+
T mean;
|
16
|
+
R M2;
|
17
|
+
|
18
|
+
// initialize to the identity element
|
19
|
+
void initialize()
|
20
|
+
{
|
21
|
+
n = M2 = 0;
|
22
|
+
mean = c_zero();
|
23
|
+
}
|
24
|
+
|
25
|
+
__host__ __device__ R variance() { return M2 / (n - 1); }
|
26
|
+
__host__ __device__ R variance_n() { return M2 / n; }
|
27
|
+
};
|
28
|
+
|
29
|
+
// stats_unary_op is a functor that takes in a value x and
|
30
|
+
// returns a variace_data whose mean value is initialized to x.
|
31
|
+
template <typename T, typename R>
|
32
|
+
struct cumo_thrust_complex_variance_unary_op
|
33
|
+
{
|
34
|
+
__host__ __device__
|
35
|
+
cumo_thrust_complex_variance_data<T,R> operator()(const T& x) const
|
36
|
+
{
|
37
|
+
cumo_thrust_complex_variance_data<T,R> result;
|
38
|
+
result.n = 1;
|
39
|
+
result.mean = x;
|
40
|
+
result.M2 = 0;
|
41
|
+
|
42
|
+
return result;
|
43
|
+
}
|
44
|
+
};
|
45
|
+
|
46
|
+
// cumo_thrust_variance_binary_op is a functor that accepts two cumo_thrust_variance_data
|
47
|
+
// structs and returns a new cumo_thrust_variance_data which are an
|
48
|
+
// approximation to the cumo_thrust_variance for
|
49
|
+
// all values that have been agregated so far
|
50
|
+
template <typename T, typename R>
|
51
|
+
struct cumo_thrust_complex_variance_binary_op
|
52
|
+
: public thrust::binary_function<const cumo_thrust_complex_variance_data<T,R>&,
|
53
|
+
const cumo_thrust_complex_variance_data<T,R>&,
|
54
|
+
cumo_thrust_complex_variance_data<T,R> >
|
55
|
+
{
|
56
|
+
__host__ __device__
|
57
|
+
cumo_thrust_complex_variance_data<T,R> operator()(const cumo_thrust_complex_variance_data<T,R>& x, const cumo_thrust_complex_variance_data<T,R>& y) const
|
58
|
+
{
|
59
|
+
cumo_thrust_complex_variance_data<T,R> result;
|
60
|
+
|
61
|
+
// precompute some common subexpressions
|
62
|
+
R n = x.n + y.n;
|
63
|
+
|
64
|
+
T delta = c_sub(y.mean, x.mean);
|
65
|
+
R delta2 = c_abs_square(delta);
|
66
|
+
|
67
|
+
//Basic number of samples (n)
|
68
|
+
result.n = n;
|
69
|
+
|
70
|
+
result.mean = c_add(x.mean, c_mul_r(delta, y.n / n));
|
71
|
+
|
72
|
+
result.M2 = x.M2 + y.M2;
|
73
|
+
result.M2 += delta2 * x.n * y.n / n;
|
74
|
+
|
75
|
+
return result;
|
76
|
+
}
|
77
|
+
};
|
78
|
+
|
79
|
+
#endif /* ifndef CUMO_CUDA_THRUST_COMPLEX_H */
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#ifndef CUMO_CUDA_DRIVER_H
|
2
|
+
#define CUMO_CUDA_DRIVER_H
|
3
|
+
#include <cuda.h>
|
4
|
+
|
5
|
+
#if defined(__cplusplus)
|
6
|
+
extern "C" {
|
7
|
+
#if 0
|
8
|
+
} /* satisfy cc-mode */
|
9
|
+
#endif
|
10
|
+
#endif
|
11
|
+
|
12
|
+
extern VALUE cumo_cuda_eDriverError;
|
13
|
+
extern VALUE cumo_cuda_mDriver;
|
14
|
+
|
15
|
+
#if defined(__cplusplus)
|
16
|
+
#if 0
|
17
|
+
{ /* satisfy cc-mode */
|
18
|
+
#endif
|
19
|
+
} /* extern "C" { */
|
20
|
+
#endif
|
21
|
+
|
22
|
+
#endif /* ifndef CUMO_CUDA_DRIVER_H */
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#ifndef CUMO_CUDA_MEMORY_POOL_H
|
2
|
+
#define CUMO_CUDA_MEMORY_POOL_H
|
3
|
+
|
4
|
+
#include "cumo/narray.h"
|
5
|
+
|
6
|
+
#if defined(__cplusplus)
|
7
|
+
extern "C" {
|
8
|
+
#if 0
|
9
|
+
} /* satisfy cc-mode */
|
10
|
+
#endif
|
11
|
+
#endif
|
12
|
+
|
13
|
+
extern VALUE cumo_cuda_eOutOfMemoryError;
|
14
|
+
|
15
|
+
char*
|
16
|
+
cumo_cuda_runtime_malloc(size_t size);
|
17
|
+
|
18
|
+
void
|
19
|
+
cumo_cuda_runtime_free(char *ptr);
|
20
|
+
|
21
|
+
#if defined(__cplusplus)
|
22
|
+
#if 0
|
23
|
+
{ /* satisfy cc-mode */
|
24
|
+
#endif
|
25
|
+
} /* extern "C" { */
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#endif /* ifndef CUMO_CUDA_MEMORY_POOL_H */
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#ifndef CUMO_CUDA_NVRTC_H
|
2
|
+
#define CUMO_CUDA_NVRTC_H
|
3
|
+
#include <nvrtc.h>
|
4
|
+
|
5
|
+
#if defined(__cplusplus)
|
6
|
+
extern "C" {
|
7
|
+
#if 0
|
8
|
+
} /* satisfy cc-mode */
|
9
|
+
#endif
|
10
|
+
#endif
|
11
|
+
|
12
|
+
extern VALUE cumo_cuda_eNVRTCError;
|
13
|
+
extern VALUE cumo_cuda_mNVRTC;
|
14
|
+
|
15
|
+
#if defined(__cplusplus)
|
16
|
+
#if 0
|
17
|
+
{ /* satisfy cc-mode */
|
18
|
+
#endif
|
19
|
+
} /* extern "C" { */
|
20
|
+
#endif
|
21
|
+
|
22
|
+
#endif /* ifndef CUMO_CUDA_NVRTC_H */
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#ifndef CUMO_CUDA_RUNTIME_H
|
2
|
+
#define CUMO_CUDA_RUNTIME_H
|
3
|
+
|
4
|
+
#include "cumo/narray.h"
|
5
|
+
#include <cuda_runtime.h>
|
6
|
+
|
7
|
+
#if defined(__cplusplus)
|
8
|
+
extern "C" {
|
9
|
+
#if 0
|
10
|
+
} /* satisfy cc-mode */
|
11
|
+
#endif
|
12
|
+
#endif
|
13
|
+
|
14
|
+
extern VALUE cumo_cuda_eRuntimeError;
|
15
|
+
|
16
|
+
static inline void
|
17
|
+
cumo_cuda_runtime_check_status(cudaError_t status)
|
18
|
+
{
|
19
|
+
if (status != 0) {
|
20
|
+
rb_raise(cumo_cuda_eRuntimeError, "%s (error=%d)", cudaGetErrorString(status), status);
|
21
|
+
}
|
22
|
+
}
|
23
|
+
|
24
|
+
static inline bool
|
25
|
+
cumo_cuda_runtime_is_device_memory(void* ptr)
|
26
|
+
{
|
27
|
+
struct cudaPointerAttributes attrs;
|
28
|
+
cudaError_t status = cudaPointerGetAttributes(&attrs, ptr);
|
29
|
+
cudaGetLastError(); // reset last error to success
|
30
|
+
return (status != cudaErrorInvalidValue);
|
31
|
+
}
|
32
|
+
|
33
|
+
#if defined(__cplusplus)
|
34
|
+
#if 0
|
35
|
+
{ /* satisfy cc-mode */
|
36
|
+
#endif
|
37
|
+
} /* extern "C" { */
|
38
|
+
#endif
|
39
|
+
|
40
|
+
#endif /* ifndef CUMO_CUDA_RUNTIME_H */
|
@@ -0,0 +1,238 @@
|
|
1
|
+
#ifndef CUMO_INDEXER_H
|
2
|
+
#define CUMO_INDEXER_H
|
3
|
+
|
4
|
+
/* Add cumo_ prefix */
|
5
|
+
#define na_indexer_t cumo_na_indexer_t
|
6
|
+
#define na_iarray_t cumo_na_iarray_t
|
7
|
+
#define na_reduction_arg_t cumo_na_reduction_arg_t
|
8
|
+
|
9
|
+
#ifndef __CUDACC__
|
10
|
+
#include "cumo/narray.h"
|
11
|
+
#include "cumo/ndloop.h"
|
12
|
+
#else
|
13
|
+
#include "cumo/narray_kernel.h"
|
14
|
+
#endif
|
15
|
+
|
16
|
+
/* A structure to get indices for each dimension.
|
17
|
+
*
|
18
|
+
* Note that shapes of each argument NArray are typically equivalent, and
|
19
|
+
* thus indexer would point the same indicies for all NArrays.
|
20
|
+
*/
|
21
|
+
typedef struct {
|
22
|
+
unsigned char ndim; // # of dimensions
|
23
|
+
size_t total_size; // # of total elements
|
24
|
+
size_t shape[NA_MAX_DIMENSION]; // # of elements for each dimension
|
25
|
+
uint64_t index[NA_MAX_DIMENSION]; // indicies for each dimension
|
26
|
+
uint64_t raw_index;
|
27
|
+
} na_indexer_t;
|
28
|
+
|
29
|
+
/* A structure to get data address with indexer.
|
30
|
+
*
|
31
|
+
* Note that strides would be different for each NArray although indexer points same indicies.
|
32
|
+
*/
|
33
|
+
typedef struct {
|
34
|
+
char* ptr;
|
35
|
+
ssize_t step[NA_MAX_DIMENSION]; // or strides
|
36
|
+
} na_iarray_t;
|
37
|
+
|
38
|
+
typedef struct {
|
39
|
+
na_iarray_t in;
|
40
|
+
na_iarray_t out;
|
41
|
+
na_indexer_t in_indexer;
|
42
|
+
na_indexer_t out_indexer;
|
43
|
+
na_indexer_t reduce_indexer;
|
44
|
+
} na_reduction_arg_t;
|
45
|
+
|
46
|
+
#ifndef __CUDACC__
|
47
|
+
extern int na_debug_flag; // narray.c
|
48
|
+
|
49
|
+
static void
|
50
|
+
print_na_indexer_t(na_indexer_t* indexer)
|
51
|
+
{
|
52
|
+
printf("na_indexer_t = 0x%"SZF"x {\n", (size_t)indexer);
|
53
|
+
printf(" ndim = %d\n", indexer->ndim);
|
54
|
+
printf(" total_size = %ld\n", indexer->total_size);
|
55
|
+
printf(" shape = 0x%"SZF"x\n", (size_t)indexer->shape);
|
56
|
+
for (int i = 0; i < indexer->ndim; ++i) {
|
57
|
+
printf(" shape[%d] = %ld\n", i, indexer->shape[i]);
|
58
|
+
}
|
59
|
+
printf("}\n");
|
60
|
+
}
|
61
|
+
|
62
|
+
static void
|
63
|
+
print_na_iarray_t(na_iarray_t* iarray, unsigned char ndim)
|
64
|
+
{
|
65
|
+
printf("na_iarray_t = 0x%"SZF"x {\n", (size_t)iarray);
|
66
|
+
printf(" ptr = 0x%"SZF"x\n", (size_t)iarray->ptr);
|
67
|
+
printf(" step = 0x%"SZF"x\n", (size_t)iarray->step);
|
68
|
+
for (int i = 0; i < ndim; ++i) {
|
69
|
+
printf(" step[%d] = %ld\n", i, iarray->step[i]);
|
70
|
+
}
|
71
|
+
printf("}\n");
|
72
|
+
}
|
73
|
+
|
74
|
+
static void
|
75
|
+
print_na_reduction_arg_t(na_reduction_arg_t* arg)
|
76
|
+
{
|
77
|
+
printf("na_reduction_arg_t = 0x%"SZF"x {\n", (size_t)arg);
|
78
|
+
printf("--in--\n");
|
79
|
+
print_na_iarray_t(&arg->in, arg->in_indexer.ndim);
|
80
|
+
printf("--out--\n");
|
81
|
+
print_na_iarray_t(&arg->out, arg->out_indexer.ndim);
|
82
|
+
printf("--in_indexer--\n");
|
83
|
+
print_na_indexer_t(&arg->in_indexer);
|
84
|
+
printf("--out_indexer--\n");
|
85
|
+
print_na_indexer_t(&arg->out_indexer);
|
86
|
+
printf("--reduce_indexer--\n");
|
87
|
+
print_na_indexer_t(&arg->reduce_indexer);
|
88
|
+
printf("}\n");
|
89
|
+
}
|
90
|
+
|
91
|
+
// Note that you, then, have to call na_indexer_set to create index[]
|
92
|
+
static na_indexer_t
|
93
|
+
na_make_indexer(na_loop_args_t* arg)
|
94
|
+
{
|
95
|
+
na_indexer_t indexer;
|
96
|
+
indexer.ndim = arg->ndim;
|
97
|
+
indexer.total_size = 1;
|
98
|
+
for (int i = 0; i < arg->ndim; ++i) {
|
99
|
+
indexer.shape[i] = arg->shape[i];
|
100
|
+
indexer.total_size *= arg->shape[i];
|
101
|
+
}
|
102
|
+
return indexer;
|
103
|
+
}
|
104
|
+
|
105
|
+
static na_iarray_t
|
106
|
+
na_make_iarray_given_ndim(na_loop_args_t* arg, int ndim)
|
107
|
+
{
|
108
|
+
na_iarray_t iarray;
|
109
|
+
iarray.ptr = arg->ptr + arg->iter[0].pos;
|
110
|
+
for (int idim = ndim; --idim >= 0;) {
|
111
|
+
iarray.step[idim] = arg->iter[idim].step;
|
112
|
+
}
|
113
|
+
return iarray;
|
114
|
+
}
|
115
|
+
|
116
|
+
static na_iarray_t
|
117
|
+
na_make_iarray(na_loop_args_t* arg)
|
118
|
+
{
|
119
|
+
return na_make_iarray_given_ndim(arg, arg->ndim);
|
120
|
+
}
|
121
|
+
|
122
|
+
static na_reduction_arg_t
|
123
|
+
na_make_reduction_arg(na_loop_t* lp_user)
|
124
|
+
{
|
125
|
+
na_reduction_arg_t arg;
|
126
|
+
int i;
|
127
|
+
int in_ndim = lp_user->args[0].ndim;
|
128
|
+
|
129
|
+
// in shape = (2, 3, 4, 5, 6)
|
130
|
+
// axis = (1, 3)
|
131
|
+
// out shape = (2, 4, 6)
|
132
|
+
// reduce shape = (3, 5)
|
133
|
+
|
134
|
+
arg.in = na_make_iarray(&lp_user->args[0]);
|
135
|
+
arg.in_indexer = na_make_indexer(&lp_user->args[0]);
|
136
|
+
|
137
|
+
arg.reduce_indexer.ndim = 0;
|
138
|
+
arg.reduce_indexer.total_size = 1;
|
139
|
+
arg.out_indexer.ndim = 0;
|
140
|
+
arg.out_indexer.total_size = 1;
|
141
|
+
for (i = 0; i < in_ndim; ++i) {
|
142
|
+
if (na_test_reduce(lp_user->reduce, i)) {
|
143
|
+
arg.reduce_indexer.shape[arg.reduce_indexer.ndim] = arg.in_indexer.shape[i];
|
144
|
+
arg.reduce_indexer.total_size *= arg.in_indexer.shape[i];
|
145
|
+
++arg.reduce_indexer.ndim;
|
146
|
+
} else {
|
147
|
+
arg.out_indexer.shape[arg.out_indexer.ndim] = arg.in_indexer.shape[i];
|
148
|
+
arg.out_indexer.total_size *= arg.in_indexer.shape[i];
|
149
|
+
++arg.out_indexer.ndim;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
arg.out = na_make_iarray_given_ndim(&lp_user->args[1], arg.out_indexer.ndim);
|
153
|
+
|
154
|
+
if (na_debug_flag) {
|
155
|
+
print_na_reduction_arg_t(&arg);
|
156
|
+
}
|
157
|
+
|
158
|
+
assert(arg.reduce_indexer.ndim == lp_user->reduce_dim);
|
159
|
+
assert(arg.in_indexer.ndim == arg.reduce_indexer.ndim + arg.out_indexer.ndim);
|
160
|
+
|
161
|
+
return arg;
|
162
|
+
}
|
163
|
+
|
164
|
+
#endif // #ifndef __CUDACC__
|
165
|
+
|
166
|
+
#define CUMO_NA_INDEXER_OPTIMIZED_NDIM 4
|
167
|
+
|
168
|
+
#ifdef __CUDACC__
|
169
|
+
|
170
|
+
__host__ __device__
|
171
|
+
static inline void
|
172
|
+
cumo_na_indexer_set_dim(na_indexer_t* indexer, uint64_t i) {
|
173
|
+
indexer->raw_index = i;
|
174
|
+
for (int j = indexer->ndim; --j >= 0;) {
|
175
|
+
indexer->index[j] = i % indexer->shape[j];
|
176
|
+
i /= indexer->shape[j];
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
// Let compiler optimize
|
181
|
+
#define CUMO_NA_INDEXER_SET(NDIM) \
|
182
|
+
__host__ __device__ \
|
183
|
+
static inline void \
|
184
|
+
cumo_na_indexer_set_dim##NDIM(na_indexer_t* indexer, uint64_t i) { \
|
185
|
+
indexer->raw_index = i; \
|
186
|
+
for (int j = NDIM; --j >= 0;) { \
|
187
|
+
indexer->index[j] = i % indexer->shape[j]; \
|
188
|
+
i /= indexer->shape[j]; \
|
189
|
+
} \
|
190
|
+
}
|
191
|
+
|
192
|
+
CUMO_NA_INDEXER_SET(4)
|
193
|
+
CUMO_NA_INDEXER_SET(3)
|
194
|
+
CUMO_NA_INDEXER_SET(2)
|
195
|
+
CUMO_NA_INDEXER_SET(0)
|
196
|
+
|
197
|
+
__host__ __device__
|
198
|
+
static inline void
|
199
|
+
cumo_na_indexer_set_dim1(na_indexer_t* indexer, uint64_t i) {
|
200
|
+
indexer->raw_index = i;
|
201
|
+
}
|
202
|
+
|
203
|
+
__host__ __device__
|
204
|
+
static inline char*
|
205
|
+
cumo_na_iarray_at_dim(na_iarray_t* iarray, na_indexer_t* indexer) {
|
206
|
+
char* ptr = iarray->ptr;
|
207
|
+
for (int idim = 0; idim < indexer->ndim; ++idim) {
|
208
|
+
ptr += iarray->step[idim] * indexer->index[idim];
|
209
|
+
}
|
210
|
+
return ptr;
|
211
|
+
}
|
212
|
+
|
213
|
+
// Let compiler optimize
|
214
|
+
#define CUMO_NA_IARRAY_AT(NDIM) \
|
215
|
+
__host__ __device__ \
|
216
|
+
static inline char* \
|
217
|
+
cumo_na_iarray_at_dim##NDIM(na_iarray_t* iarray, na_indexer_t* indexer) { \
|
218
|
+
char* ptr = iarray->ptr; \
|
219
|
+
for (int idim = 0; idim < NDIM; ++idim) { \
|
220
|
+
ptr += iarray->step[idim] * indexer->index[idim]; \
|
221
|
+
} \
|
222
|
+
return ptr; \
|
223
|
+
}
|
224
|
+
|
225
|
+
CUMO_NA_IARRAY_AT(4)
|
226
|
+
CUMO_NA_IARRAY_AT(3)
|
227
|
+
CUMO_NA_IARRAY_AT(2)
|
228
|
+
CUMO_NA_IARRAY_AT(0)
|
229
|
+
|
230
|
+
__host__ __device__
|
231
|
+
static inline char*
|
232
|
+
cumo_na_iarray_at_dim1(na_iarray_t* iarray, na_indexer_t* indexer) {
|
233
|
+
return iarray->ptr + iarray->step[0] * indexer->raw_index;
|
234
|
+
}
|
235
|
+
|
236
|
+
#endif // #ifdef __CUDACC__
|
237
|
+
|
238
|
+
#endif // CUMO_INDEXER_H
|
@@ -0,0 +1,142 @@
|
|
1
|
+
#ifndef CUMO_INTERN_H
|
2
|
+
#define CUMO_INTERN_H
|
3
|
+
|
4
|
+
void cumo_debug_breakpoint(void);
|
5
|
+
|
6
|
+
/* Add cumo_ prefix to avoid C symbol collisions with Numo without modifying C implementations */
|
7
|
+
|
8
|
+
#define rb_narray_new cumo_nary_new
|
9
|
+
#define nary_new cumo_nary_new
|
10
|
+
VALUE cumo_nary_new(VALUE elem, int ndim, size_t *shape);
|
11
|
+
#define rb_narray_view_new cumo_nary_view_new
|
12
|
+
#define nary_view_new cumo_nary_view_new
|
13
|
+
VALUE cumo_nary_view_new(VALUE elem, int ndim, size_t *shape);
|
14
|
+
#define rb_narray_debug_info cumo_nary_debug_info
|
15
|
+
#define nary_debug_info cumo_nary_debug_info
|
16
|
+
VALUE cumo_nary_debug_info(VALUE);
|
17
|
+
|
18
|
+
#define na_make_view cumo_nary_make_view
|
19
|
+
VALUE cumo_nary_make_view(VALUE self);
|
20
|
+
|
21
|
+
#define na_s_allocate cumo_nary_s_allocate
|
22
|
+
VALUE cumo_nary_s_allocate(VALUE klass);
|
23
|
+
#define na_s_allocate_view cumo_nary_s_allocate_view
|
24
|
+
VALUE cumo_nary_s_allocate_view(VALUE klass);
|
25
|
+
#define na_s_new_like cumo_nary_s_new_like
|
26
|
+
VALUE cumo_nary_s_new_like(VALUE type, VALUE obj);
|
27
|
+
|
28
|
+
#define na_alloc_shape cumo_na_alloc_shape
|
29
|
+
void cumo_na_alloc_shape(narray_t *na, int ndim);
|
30
|
+
#define na_array_to_internal_shape cumo_na_array_to_internal_shape
|
31
|
+
void cumo_na_array_to_internal_shape(VALUE self, VALUE ary, size_t *shape);
|
32
|
+
#define na_index_arg_to_internal_order cumo_na_index_arg_to_internal_order
|
33
|
+
void cumo_na_index_arg_to_internal_order(int argc, VALUE *argv, VALUE self);
|
34
|
+
#define na_setup_shape cumo_na_setup_shape
|
35
|
+
void cumo_na_setup_shape(narray_t *na, int ndim, size_t *shape);
|
36
|
+
|
37
|
+
#define na_get_elmsz cumo_nary_element_stride
|
38
|
+
#define nary_element_stride cumo_nary_element_stride
|
39
|
+
//#define na_element_stride cumo_nary_element_stride
|
40
|
+
unsigned int cumo_nary_element_stride(VALUE nary);
|
41
|
+
#define na_dtype_elmsz cumo_nary_dtype_element_stride
|
42
|
+
size_t cumo_nary_dtype_element_stride(VALUE klass);
|
43
|
+
|
44
|
+
#define na_get_pointer cumo_nary_get_pointer
|
45
|
+
char *cumo_nary_get_pointer(VALUE);
|
46
|
+
#define na_get_pointer_for_write cumo_nary_get_pointer_for_write
|
47
|
+
char *cumo_nary_get_pointer_for_write(VALUE);
|
48
|
+
#define na_get_pointer_for_read cumo_nary_get_pointer_for_read
|
49
|
+
char *cumo_nary_get_pointer_for_read(VALUE);
|
50
|
+
#define na_get_pointer_for_read_write cumo_nary_get_pointer_for_read_write
|
51
|
+
char *cumo_nary_get_pointer_for_read_write(VALUE);
|
52
|
+
#define na_get_offset cumo_nary_get_offset
|
53
|
+
size_t cumo_nary_get_offset(VALUE self);
|
54
|
+
|
55
|
+
#define na_copy_flags cumo_nary_copy_flags
|
56
|
+
void cumo_nary_copy_flags(VALUE src, VALUE dst);
|
57
|
+
|
58
|
+
#define na_check_ladder cumo_nary_check_ladder
|
59
|
+
VALUE cumo_nary_check_ladder(VALUE self, int start_dim);
|
60
|
+
#define na_check_contiguous cumo_nary_check_contiguous
|
61
|
+
VALUE cumo_nary_check_contiguous(VALUE self);
|
62
|
+
|
63
|
+
#define na_flatten_dim cumo_nary_flatten_dim
|
64
|
+
VALUE cumo_nary_flatten_dim(VALUE self, int sd);
|
65
|
+
|
66
|
+
#define na_flatten cumo_nary_flatten
|
67
|
+
VALUE cumo_nary_flatten(VALUE);
|
68
|
+
|
69
|
+
#define na_copy cumo_nary_dup
|
70
|
+
VALUE cumo_nary_dup(VALUE);
|
71
|
+
|
72
|
+
#define na_store cumo_nary_store
|
73
|
+
VALUE cumo_nary_store(VALUE self, VALUE src);
|
74
|
+
|
75
|
+
#define na_upcast cumo_na_upcast
|
76
|
+
VALUE cumo_na_upcast(VALUE type1, VALUE type2);
|
77
|
+
|
78
|
+
#define na_release_lock cumo_na_release_lock
|
79
|
+
void cumo_na_release_lock(VALUE); // currently do nothing
|
80
|
+
|
81
|
+
// used in reduce methods
|
82
|
+
#define nary_reduce_dimension cumo_nary_reduce_dimension
|
83
|
+
#define na_reduce_dimension cumo_nary_reduce_dimension
|
84
|
+
VALUE cumo_nary_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
|
85
|
+
ndfunc_t *ndf, na_iter_func_t nan_iter);
|
86
|
+
|
87
|
+
#define nary_reduce_options cumo_nary_reduce_options
|
88
|
+
#define na_reduce_options cumo_nary_reduce_options
|
89
|
+
VALUE cumo_nary_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
|
90
|
+
ndfunc_t *ndf);
|
91
|
+
|
92
|
+
// ndloop
|
93
|
+
#define na_ndloop cumo_na_ndloop
|
94
|
+
VALUE cumo_na_ndloop(ndfunc_t *nf, int argc, ...);
|
95
|
+
#define na_ndloop2 cumo_na_ndloop2
|
96
|
+
VALUE cumo_na_ndloop2(ndfunc_t *nf, VALUE args);
|
97
|
+
#define na_ndloop3 cumo_na_ndloop3
|
98
|
+
VALUE cumo_na_ndloop3(ndfunc_t *nf, void *ptr, int argc, ...);
|
99
|
+
#define na_ndloop4 cumo_na_ndloop4
|
100
|
+
VALUE cumo_na_ndloop4(ndfunc_t *nf, void *ptr, VALUE args);
|
101
|
+
|
102
|
+
#define na_ndloop_cast_narray_to_rarray cumo_na_ndloop_cast_narray_to_rarray
|
103
|
+
VALUE cumo_na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt);
|
104
|
+
#define na_ndloop_store_rarray cumo_na_ndloop_store_rarray
|
105
|
+
VALUE cumo_na_ndloop_store_rarray(ndfunc_t *nf, VALUE nary, VALUE rary);
|
106
|
+
#define na_ndloop_store_rarray2 cumo_na_ndloop_store_rarray2
|
107
|
+
VALUE cumo_na_ndloop_store_rarray2(ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt);
|
108
|
+
#define na_ndloop_inspect cumo_na_ndloop_inspect
|
109
|
+
VALUE cumo_na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt);
|
110
|
+
#define na_ndloop_with_index cumo_na_ndloop_with_index
|
111
|
+
VALUE cumo_na_ndloop_with_index(ndfunc_t *nf, int argc, ...);
|
112
|
+
|
113
|
+
#define na_info_str cumo_nary_info_str
|
114
|
+
VALUE cumo_nary_info_str(VALUE);
|
115
|
+
|
116
|
+
#define na_test_reduce cumo_nary_test_reduce
|
117
|
+
bool cumo_nary_test_reduce(VALUE reduce, int dim);
|
118
|
+
|
119
|
+
#define nary_step_array_index cumo_nary_step_array_index
|
120
|
+
void cumo_nary_step_array_index(VALUE self, size_t ary_size, size_t *plen, ssize_t *pbeg, ssize_t *pstep);
|
121
|
+
#define nary_step_sequence cumo_nary_step_sequence
|
122
|
+
void cumo_nary_step_sequence(VALUE self, size_t *plen, double *pbeg, double *pstep);
|
123
|
+
|
124
|
+
// used in aref, aset
|
125
|
+
#define na_get_result_dimension cumo_nary_get_result_dimension
|
126
|
+
int cumo_nary_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx);
|
127
|
+
#define na_aref_main cumo_nary_aref_main
|
128
|
+
VALUE cumo_nary_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos);
|
129
|
+
|
130
|
+
// defined in array, used in math
|
131
|
+
#define na_ary_composition_dtype cumo_na_ary_composition_dtype
|
132
|
+
VALUE cumo_na_ary_composition_dtype(VALUE ary);
|
133
|
+
|
134
|
+
#include "ruby/version.h"
|
135
|
+
|
136
|
+
#if RUBY_API_VERSION_CODE == 20100 // 2.1.0
|
137
|
+
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *);
|
138
|
+
VALUE rb_extract_keywords(VALUE *orighash);
|
139
|
+
#endif
|
140
|
+
|
141
|
+
|
142
|
+
#endif /* ifndef CUMO_INTERN_H */
|