cumo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.travis.yml +5 -0
- data/3rd_party/mkmf-cu/.gitignore +36 -0
- data/3rd_party/mkmf-cu/Gemfile +3 -0
- data/3rd_party/mkmf-cu/LICENSE +21 -0
- data/3rd_party/mkmf-cu/README.md +36 -0
- data/3rd_party/mkmf-cu/Rakefile +11 -0
- data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
- data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
- data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +82 -0
- data/README.md +252 -0
- data/Rakefile +43 -0
- data/bench/broadcast_fp32.rb +138 -0
- data/bench/cumo_bench.rb +193 -0
- data/bench/numo_bench.rb +138 -0
- data/bench/reduction_fp32.rb +117 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/cumo.gemspec +32 -0
- data/ext/cumo/cuda/cublas.c +278 -0
- data/ext/cumo/cuda/driver.c +421 -0
- data/ext/cumo/cuda/memory_pool.cpp +185 -0
- data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
- data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
- data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
- data/ext/cumo/cuda/nvrtc.c +207 -0
- data/ext/cumo/cuda/runtime.c +167 -0
- data/ext/cumo/cumo.c +148 -0
- data/ext/cumo/depend.erb +58 -0
- data/ext/cumo/extconf.rb +179 -0
- data/ext/cumo/include/cumo.h +25 -0
- data/ext/cumo/include/cumo/compat.h +23 -0
- data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
- data/ext/cumo/include/cumo/cuda/driver.h +22 -0
- data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
- data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
- data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
- data/ext/cumo/include/cumo/indexer.h +238 -0
- data/ext/cumo/include/cumo/intern.h +142 -0
- data/ext/cumo/include/cumo/intern_fwd.h +38 -0
- data/ext/cumo/include/cumo/intern_kernel.h +6 -0
- data/ext/cumo/include/cumo/narray.h +429 -0
- data/ext/cumo/include/cumo/narray_kernel.h +149 -0
- data/ext/cumo/include/cumo/ndloop.h +95 -0
- data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
- data/ext/cumo/include/cumo/template.h +158 -0
- data/ext/cumo/include/cumo/template_kernel.h +77 -0
- data/ext/cumo/include/cumo/types/bit.h +40 -0
- data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
- data/ext/cumo/include/cumo/types/complex.h +402 -0
- data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
- data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
- data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/dfloat.h +47 -0
- data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/float_def.h +34 -0
- data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
- data/ext/cumo/include/cumo/types/float_macro.h +191 -0
- data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
- data/ext/cumo/include/cumo/types/int16.h +24 -0
- data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
- data/ext/cumo/include/cumo/types/int32.h +24 -0
- data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int64.h +24 -0
- data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int8.h +24 -0
- data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
- data/ext/cumo/include/cumo/types/int_macro.h +67 -0
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
- data/ext/cumo/include/cumo/types/real_accum.h +486 -0
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
- data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
- data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
- data/ext/cumo/include/cumo/types/robject.h +27 -0
- data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
- data/ext/cumo/include/cumo/types/scomplex.h +46 -0
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
- data/ext/cumo/include/cumo/types/sfloat.h +48 -0
- data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
- data/ext/cumo/include/cumo/types/uint16.h +25 -0
- data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint32.h +25 -0
- data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint64.h +25 -0
- data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint8.h +25 -0
- data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
- data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
- data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
- data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
- data/ext/cumo/narray/SFMT-params.h +97 -0
- data/ext/cumo/narray/SFMT-params19937.h +46 -0
- data/ext/cumo/narray/SFMT.c +620 -0
- data/ext/cumo/narray/SFMT.h +167 -0
- data/ext/cumo/narray/array.c +638 -0
- data/ext/cumo/narray/data.c +961 -0
- data/ext/cumo/narray/gen/cogen.rb +56 -0
- data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
- data/ext/cumo/narray/gen/def/bit.rb +37 -0
- data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/int16.rb +36 -0
- data/ext/cumo/narray/gen/def/int32.rb +36 -0
- data/ext/cumo/narray/gen/def/int64.rb +36 -0
- data/ext/cumo/narray/gen/def/int8.rb +36 -0
- data/ext/cumo/narray/gen/def/robject.rb +37 -0
- data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
- data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
- data/ext/cumo/narray/gen/def/uint16.rb +36 -0
- data/ext/cumo/narray/gen/def/uint32.rb +36 -0
- data/ext/cumo/narray/gen/def/uint64.rb +36 -0
- data/ext/cumo/narray/gen/def/uint8.rb +36 -0
- data/ext/cumo/narray/gen/erbpp2.rb +346 -0
- data/ext/cumo/narray/gen/narray_def.rb +268 -0
- data/ext/cumo/narray/gen/spec.rb +425 -0
- data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
- data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
- data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
- data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
- data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
- data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
- data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
- data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
- data/ext/cumo/narray/gen/tmpl/class.c +9 -0
- data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
- data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
- data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
- data/ext/cumo/narray/gen/tmpl/each.c +47 -0
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
- data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
- data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
- data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
- data/ext/cumo/narray/gen/tmpl/format.c +62 -0
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
- data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
- data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
- data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
- data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
- data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
- data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
- data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
- data/ext/cumo/narray/gen/tmpl/median.c +66 -0
- data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
- data/ext/cumo/narray/gen/tmpl/module.c +9 -0
- data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
- data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
- data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
- data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
- data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
- data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
- data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
- data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
- data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
- data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
- data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
- data/ext/cumo/narray/gen/tmpl/store.c +41 -0
- data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
- data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
- data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
- data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
- data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
- data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
- data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
- data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
- data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
- data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
- data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
- data/ext/cumo/narray/index.c +880 -0
- data/ext/cumo/narray/kwargs.c +153 -0
- data/ext/cumo/narray/math.c +142 -0
- data/ext/cumo/narray/narray.c +1948 -0
- data/ext/cumo/narray/ndloop.c +2105 -0
- data/ext/cumo/narray/rand.c +45 -0
- data/ext/cumo/narray/step.c +474 -0
- data/ext/cumo/narray/struct.c +886 -0
- data/lib/cumo.rb +3 -0
- data/lib/cumo/cuda.rb +11 -0
- data/lib/cumo/cuda/compile_error.rb +36 -0
- data/lib/cumo/cuda/compiler.rb +161 -0
- data/lib/cumo/cuda/device.rb +47 -0
- data/lib/cumo/cuda/link_state.rb +31 -0
- data/lib/cumo/cuda/module.rb +40 -0
- data/lib/cumo/cuda/nvrtc_program.rb +27 -0
- data/lib/cumo/linalg.rb +12 -0
- data/lib/cumo/narray.rb +2 -0
- data/lib/cumo/narray/extra.rb +1278 -0
- data/lib/erbpp.rb +294 -0
- data/lib/erbpp/line_number.rb +137 -0
- data/lib/erbpp/narray_def.rb +381 -0
- data/numo-narray-version +1 -0
- data/run.gdb +7 -0
- metadata +353 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
static void
|
|
2
|
+
<%=c_iter%>(na_loop_t *const lp)
|
|
3
|
+
{
|
|
4
|
+
size_t i;
|
|
5
|
+
BIT_DIGIT *a;
|
|
6
|
+
size_t p;
|
|
7
|
+
ssize_t s;
|
|
8
|
+
size_t *idx;
|
|
9
|
+
BIT_DIGIT x=0;
|
|
10
|
+
char *idx0, *idx1;
|
|
11
|
+
size_t count;
|
|
12
|
+
size_t e;
|
|
13
|
+
where_opt_t *g;
|
|
14
|
+
|
|
15
|
+
// TODO(sonots): CUDA kernelize
|
|
16
|
+
SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
|
17
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
|
18
|
+
|
|
19
|
+
g = (where_opt_t*)(lp->opt_ptr);
|
|
20
|
+
count = g->count;
|
|
21
|
+
idx0 = g->idx0;
|
|
22
|
+
idx1 = g->idx1;
|
|
23
|
+
e = g->elmsz;
|
|
24
|
+
INIT_COUNTER(lp, i);
|
|
25
|
+
INIT_PTR_BIT_IDX(lp, 0, a, p, s, idx);
|
|
26
|
+
if (idx) {
|
|
27
|
+
for (; i--;) {
|
|
28
|
+
LOAD_BIT(a, p+*idx, x);
|
|
29
|
+
idx++;
|
|
30
|
+
if (x==0) {
|
|
31
|
+
STORE_INT(idx0,e,count);
|
|
32
|
+
idx0 += e;
|
|
33
|
+
} else {
|
|
34
|
+
STORE_INT(idx1,e,count);
|
|
35
|
+
idx1 += e;
|
|
36
|
+
}
|
|
37
|
+
count++;
|
|
38
|
+
}
|
|
39
|
+
} else {
|
|
40
|
+
for (; i--;) {
|
|
41
|
+
LOAD_BIT(a, p, x);
|
|
42
|
+
p+=s;
|
|
43
|
+
if (x==0) {
|
|
44
|
+
STORE_INT(idx0,e,count);
|
|
45
|
+
idx0 += e;
|
|
46
|
+
} else {
|
|
47
|
+
STORE_INT(idx1,e,count);
|
|
48
|
+
idx1 += e;
|
|
49
|
+
}
|
|
50
|
+
count++;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
g->count = count;
|
|
54
|
+
g->idx0 = idx0;
|
|
55
|
+
g->idx1 = idx1;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/*
|
|
59
|
+
Returns two index arrays.
|
|
60
|
+
The first array contains index where the bit is one (true).
|
|
61
|
+
The second array contains index where the bit is zero (false).
|
|
62
|
+
@overload <%=op_map%>
|
|
63
|
+
@return [Cumo::Int32,Cumo::Int64]*2
|
|
64
|
+
*/
|
|
65
|
+
static VALUE
|
|
66
|
+
<%=c_func(0)%>(VALUE self)
|
|
67
|
+
{
|
|
68
|
+
VALUE idx_1, idx_0;
|
|
69
|
+
size_t size, n_1, n_0;
|
|
70
|
+
where_opt_t *g;
|
|
71
|
+
|
|
72
|
+
ndfunc_arg_in_t ain[1] = {{cT,0}};
|
|
73
|
+
ndfunc_t ndf = { <%=c_iter%>, FULL_LOOP, 1, 0, ain, 0 };
|
|
74
|
+
|
|
75
|
+
size = RNARRAY_SIZE(self);
|
|
76
|
+
n_1 = NUM2SIZET(<%=find_tmpl("count_true_cpu").c_func%>(0, NULL, self));
|
|
77
|
+
n_0 = size - n_1;
|
|
78
|
+
g = ALLOCA_N(where_opt_t,1);
|
|
79
|
+
g->count = 0;
|
|
80
|
+
if (size>4294967295ul) {
|
|
81
|
+
idx_1 = nary_new(cumo_cInt64, 1, &n_1);
|
|
82
|
+
idx_0 = nary_new(cumo_cInt64, 1, &n_0);
|
|
83
|
+
g->elmsz = 8;
|
|
84
|
+
} else {
|
|
85
|
+
idx_1 = nary_new(cumo_cInt32, 1, &n_1);
|
|
86
|
+
idx_0 = nary_new(cumo_cInt32, 1, &n_0);
|
|
87
|
+
g->elmsz = 4;
|
|
88
|
+
}
|
|
89
|
+
g->idx1 = na_get_pointer_for_write(idx_1);
|
|
90
|
+
g->idx0 = na_get_pointer_for_write(idx_0);
|
|
91
|
+
na_ndloop3(&ndf, g, 1, self);
|
|
92
|
+
na_release_lock(idx_0);
|
|
93
|
+
na_release_lock(idx_1);
|
|
94
|
+
return rb_assoc_new(idx_1,idx_0);
|
|
95
|
+
}
|
|
@@ -0,0 +1,880 @@
|
|
|
1
|
+
#include <string.h>
|
|
2
|
+
#include <ruby.h>
|
|
3
|
+
#include "cumo/narray.h"
|
|
4
|
+
#include "cumo/cuda/runtime.h"
|
|
5
|
+
#include "cumo/template.h"
|
|
6
|
+
|
|
7
|
+
#if SIZEOF_VOIDP == 8
|
|
8
|
+
#define cIndex cumo_cInt64
|
|
9
|
+
#elif SIZEOF_VOIDP == 4
|
|
10
|
+
#define cIndex cumo_cInt32
|
|
11
|
+
#endif
|
|
12
|
+
|
|
13
|
+
// from ruby/enumerator.c
|
|
14
|
+
struct enumerator {
|
|
15
|
+
VALUE obj;
|
|
16
|
+
ID meth;
|
|
17
|
+
VALUE args;
|
|
18
|
+
// use only above in this source
|
|
19
|
+
VALUE fib;
|
|
20
|
+
VALUE dst;
|
|
21
|
+
VALUE lookahead;
|
|
22
|
+
VALUE feedvalue;
|
|
23
|
+
VALUE stop_exc;
|
|
24
|
+
VALUE size;
|
|
25
|
+
// incompatible below depending on ruby version
|
|
26
|
+
//VALUE procs; // ruby 2.4
|
|
27
|
+
//rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
|
|
28
|
+
//VALUE (*size_fn)(ANYARGS); // ruby 2.0
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// note: the memory refed by this pointer is not freed and causes memroy leak.
|
|
32
|
+
//
|
|
33
|
+
// @example
|
|
34
|
+
// a[1..3,1] generates two na_index_arg_t(s). First is for 1..3, and second is for 1.
|
|
35
|
+
typedef struct {
|
|
36
|
+
size_t n; // the number of elements of the dimesnion
|
|
37
|
+
size_t beg; // the starting point in the dimension
|
|
38
|
+
ssize_t step; // the step size of the dimension
|
|
39
|
+
size_t *idx; // list of indices
|
|
40
|
+
int reduce; // true if the dimension is reduced by addition
|
|
41
|
+
int orig_dim; // the dimension of original array
|
|
42
|
+
} na_index_arg_t;
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
static void
|
|
46
|
+
print_index_arg(na_index_arg_t *q, int n)
|
|
47
|
+
{
|
|
48
|
+
int i;
|
|
49
|
+
printf("na_index_arg_t = 0x%"SZF"x {\n",(size_t)q);
|
|
50
|
+
for (i=0; i<n; i++) {
|
|
51
|
+
printf(" q[%d].n=%"SZF"d\n",i,q[i].n);
|
|
52
|
+
printf(" q[%d].beg=%"SZF"d\n",i,q[i].beg);
|
|
53
|
+
printf(" q[%d].step=%"SZF"d\n",i,q[i].step);
|
|
54
|
+
printf(" q[%d].idx=0x%"SZF"x\n",i,(size_t)q[i].idx);
|
|
55
|
+
printf(" q[%d].reduce=0x%x\n",i,q[i].reduce);
|
|
56
|
+
printf(" q[%d].orig_dim=%d\n",i,q[i].orig_dim);
|
|
57
|
+
}
|
|
58
|
+
printf("}\n");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
static VALUE sym_ast;
|
|
62
|
+
static VALUE sym_all;
|
|
63
|
+
//static VALUE sym_reduce;
|
|
64
|
+
static VALUE sym_minus;
|
|
65
|
+
static VALUE sym_new;
|
|
66
|
+
static VALUE sym_reverse;
|
|
67
|
+
static VALUE sym_plus;
|
|
68
|
+
static VALUE sym_sum;
|
|
69
|
+
static VALUE sym_tilde;
|
|
70
|
+
static VALUE sym_rest;
|
|
71
|
+
static ID id_beg;
|
|
72
|
+
static ID id_end;
|
|
73
|
+
static ID id_exclude_end;
|
|
74
|
+
static ID id_each;
|
|
75
|
+
static ID id_step;
|
|
76
|
+
static ID id_dup;
|
|
77
|
+
static ID id_bracket;
|
|
78
|
+
static ID id_shift_left;
|
|
79
|
+
static ID id_mask;
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
static void
|
|
83
|
+
na_index_set_step(na_index_arg_t *q, int i, size_t n, size_t beg, ssize_t step)
|
|
84
|
+
{
|
|
85
|
+
q->n = n;
|
|
86
|
+
q->beg = beg;
|
|
87
|
+
q->step = step;
|
|
88
|
+
q->idx = NULL;
|
|
89
|
+
q->reduce = 0;
|
|
90
|
+
q->orig_dim = i;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
static void
|
|
94
|
+
na_index_set_scalar(na_index_arg_t *q, int i, ssize_t size, ssize_t x)
|
|
95
|
+
{
|
|
96
|
+
if (x < -size || x >= size)
|
|
97
|
+
rb_raise(rb_eRangeError,
|
|
98
|
+
"array index (%"SZF"d) is out of array size (%"SZF"d)",
|
|
99
|
+
x, size);
|
|
100
|
+
if (x < 0)
|
|
101
|
+
x += size;
|
|
102
|
+
q->n = 1;
|
|
103
|
+
q->beg = x;
|
|
104
|
+
q->step = 0;
|
|
105
|
+
q->idx = NULL;
|
|
106
|
+
q->reduce = 0;
|
|
107
|
+
q->orig_dim = i;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
static inline ssize_t
|
|
111
|
+
na_range_check(ssize_t pos, ssize_t size, int dim)
|
|
112
|
+
{
|
|
113
|
+
ssize_t idx=pos;
|
|
114
|
+
|
|
115
|
+
if (idx < 0) idx += size;
|
|
116
|
+
if (idx < 0 || idx >= size) {
|
|
117
|
+
rb_raise(rb_eIndexError, "index=%"SZF"d out of shape[%d]=%"SZF"d",
|
|
118
|
+
pos, dim, size);
|
|
119
|
+
}
|
|
120
|
+
return idx;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
static void
|
|
124
|
+
na_parse_array(VALUE ary, int orig_dim, ssize_t size, na_index_arg_t *q)
|
|
125
|
+
{
|
|
126
|
+
int k;
|
|
127
|
+
int n = RARRAY_LEN(ary);
|
|
128
|
+
q->idx = ALLOC_N(size_t, n);
|
|
129
|
+
for (k=0; k<n; k++) {
|
|
130
|
+
q->idx[k] = na_range_check(NUM2SSIZET(RARRAY_AREF(ary,k)), size, orig_dim);
|
|
131
|
+
}
|
|
132
|
+
q->n = n;
|
|
133
|
+
q->beg = 0;
|
|
134
|
+
q->step = 1;
|
|
135
|
+
q->reduce = 0;
|
|
136
|
+
q->orig_dim = orig_dim;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
static void
|
|
140
|
+
na_parse_narray_index(VALUE a, int orig_dim, ssize_t size, na_index_arg_t *q)
|
|
141
|
+
{
|
|
142
|
+
VALUE idx;
|
|
143
|
+
narray_t *na;
|
|
144
|
+
narray_data_t *nidx;
|
|
145
|
+
size_t k, n;
|
|
146
|
+
ssize_t *nidxp;
|
|
147
|
+
|
|
148
|
+
GetNArray(a,na);
|
|
149
|
+
if (NA_NDIM(na) != 1) {
|
|
150
|
+
rb_raise(rb_eIndexError, "should be 1-d NArray");
|
|
151
|
+
}
|
|
152
|
+
n = NA_SIZE(na);
|
|
153
|
+
idx = nary_new(cIndex,1,&n);
|
|
154
|
+
na_store(idx,a);
|
|
155
|
+
|
|
156
|
+
GetNArrayData(idx,nidx);
|
|
157
|
+
nidxp = (ssize_t*)nidx->ptr;
|
|
158
|
+
q->idx = ALLOC_N(size_t, n);
|
|
159
|
+
|
|
160
|
+
// ndixp is cuda memory (cuda narray)
|
|
161
|
+
SHOW_SYNCHRONIZE_WARNING_ONCE("na_parse_narray_index", "any");
|
|
162
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
|
163
|
+
|
|
164
|
+
for (k=0; k<n; k++) {
|
|
165
|
+
q->idx[k] = na_range_check(nidxp[k], size, orig_dim);
|
|
166
|
+
}
|
|
167
|
+
q->n = n;
|
|
168
|
+
q->beg = 0;
|
|
169
|
+
q->step = 1;
|
|
170
|
+
q->reduce = 0;
|
|
171
|
+
q->orig_dim = orig_dim;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
static void
|
|
175
|
+
na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, na_index_arg_t *q)
|
|
176
|
+
{
|
|
177
|
+
int n;
|
|
178
|
+
VALUE excl_end;
|
|
179
|
+
ssize_t beg, end, beg_orig, end_orig;
|
|
180
|
+
const char *dot = "..", *edot = "...";
|
|
181
|
+
|
|
182
|
+
beg = beg_orig = NUM2SSIZET(rb_funcall(range,id_beg,0));
|
|
183
|
+
if (beg < 0) {
|
|
184
|
+
beg += size;
|
|
185
|
+
}
|
|
186
|
+
end = end_orig = NUM2SSIZET(rb_funcall(range,id_end,0));
|
|
187
|
+
if (end < 0) {
|
|
188
|
+
end += size;
|
|
189
|
+
}
|
|
190
|
+
excl_end = rb_funcall(range,id_exclude_end,0);
|
|
191
|
+
if (RTEST(excl_end)) {
|
|
192
|
+
end--;
|
|
193
|
+
dot = edot;
|
|
194
|
+
}
|
|
195
|
+
if (beg < 0 || beg >= size || end < 0 || end >= size) {
|
|
196
|
+
rb_raise(rb_eRangeError,
|
|
197
|
+
"%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
|
|
198
|
+
beg_orig, dot, end_orig, size);
|
|
199
|
+
}
|
|
200
|
+
n = (end-beg)/step+1;
|
|
201
|
+
if (n<0) n=0;
|
|
202
|
+
na_index_set_step(q,orig_dim,n,beg,step);
|
|
203
|
+
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
static void
|
|
207
|
+
na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, na_index_arg_t *q)
|
|
208
|
+
{
|
|
209
|
+
int len;
|
|
210
|
+
ssize_t step;
|
|
211
|
+
struct enumerator *e;
|
|
212
|
+
|
|
213
|
+
if (!RB_TYPE_P(enum_obj, T_DATA)) {
|
|
214
|
+
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
|
215
|
+
}
|
|
216
|
+
e = (struct enumerator *)DATA_PTR(enum_obj);
|
|
217
|
+
|
|
218
|
+
if (rb_obj_is_kind_of(e->obj, rb_cRange)) {
|
|
219
|
+
if (e->meth == id_each) {
|
|
220
|
+
na_parse_range(e->obj, 1, orig_dim, size, q);
|
|
221
|
+
}
|
|
222
|
+
else if (e->meth == id_step) {
|
|
223
|
+
if (TYPE(e->args) != T_ARRAY) {
|
|
224
|
+
rb_raise(rb_eArgError,"no argument for step");
|
|
225
|
+
}
|
|
226
|
+
len = RARRAY_LEN(e->args);
|
|
227
|
+
if (len != 1) {
|
|
228
|
+
rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
|
|
229
|
+
}
|
|
230
|
+
step = NUM2SSIZET(RARRAY_AREF(e->args,0));
|
|
231
|
+
na_parse_range(e->obj, step, orig_dim, size, q);
|
|
232
|
+
} else {
|
|
233
|
+
rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
|
|
234
|
+
}
|
|
235
|
+
} else {
|
|
236
|
+
rb_raise(rb_eTypeError,"not Range object");
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Analyze *a* which is *i*-th index object and store the information to q
|
|
241
|
+
//
|
|
242
|
+
// a: a ruby object of i-th index
|
|
243
|
+
// size: size of i-th dimension of original NArray
|
|
244
|
+
// i: parse i-th index
|
|
245
|
+
// q: parsed information is stored to *q
|
|
246
|
+
static void
|
|
247
|
+
na_index_parse_each(volatile VALUE a, ssize_t size, int i, na_index_arg_t *q)
|
|
248
|
+
{
|
|
249
|
+
switch(TYPE(a)) {
|
|
250
|
+
|
|
251
|
+
case T_FIXNUM:
|
|
252
|
+
na_index_set_scalar(q,i,size,FIX2LONG(a));
|
|
253
|
+
break;
|
|
254
|
+
|
|
255
|
+
case T_BIGNUM:
|
|
256
|
+
na_index_set_scalar(q,i,size,NUM2SSIZET(a));
|
|
257
|
+
break;
|
|
258
|
+
|
|
259
|
+
case T_FLOAT:
|
|
260
|
+
na_index_set_scalar(q,i,size,NUM2SSIZET(a));
|
|
261
|
+
break;
|
|
262
|
+
|
|
263
|
+
case T_NIL:
|
|
264
|
+
case T_TRUE:
|
|
265
|
+
na_index_set_step(q,i,size,0,1);
|
|
266
|
+
break;
|
|
267
|
+
|
|
268
|
+
case T_SYMBOL:
|
|
269
|
+
if (a==sym_all || a==sym_ast) {
|
|
270
|
+
na_index_set_step(q,i,size,0,1);
|
|
271
|
+
}
|
|
272
|
+
else if (a==sym_reverse) {
|
|
273
|
+
na_index_set_step(q,i,size,size-1,-1);
|
|
274
|
+
}
|
|
275
|
+
else if (a==sym_new) {
|
|
276
|
+
na_index_set_step(q,i,1,0,1);
|
|
277
|
+
}
|
|
278
|
+
else if (a==sym_reduce || a==sym_sum || a==sym_plus) {
|
|
279
|
+
na_index_set_step(q,i,size,0,1);
|
|
280
|
+
q->reduce = 1;
|
|
281
|
+
} else {
|
|
282
|
+
rb_raise(rb_eIndexError, "invalid symbol for index");
|
|
283
|
+
}
|
|
284
|
+
break;
|
|
285
|
+
|
|
286
|
+
case T_ARRAY:
|
|
287
|
+
na_parse_array(a, i, size, q);
|
|
288
|
+
break;
|
|
289
|
+
|
|
290
|
+
default:
|
|
291
|
+
if (rb_obj_is_kind_of(a, rb_cRange)) {
|
|
292
|
+
na_parse_range(a, 1, i, size, q);
|
|
293
|
+
}
|
|
294
|
+
else if (rb_obj_is_kind_of(a, rb_cEnumerator)) {
|
|
295
|
+
na_parse_enumerator(a, i, size, q);
|
|
296
|
+
}
|
|
297
|
+
else if (rb_obj_is_kind_of(a, na_cStep)) {
|
|
298
|
+
ssize_t beg, step, n;
|
|
299
|
+
nary_step_array_index(a, size, (size_t*)(&n), &beg, &step);
|
|
300
|
+
na_index_set_step(q,i,n,beg,step);
|
|
301
|
+
}
|
|
302
|
+
// NArray index
|
|
303
|
+
else if (NA_IsNArray(a)) {
|
|
304
|
+
na_parse_narray_index(a, i, size, q);
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
rb_raise(rb_eIndexError, "not allowed type");
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
static size_t
|
|
314
|
+
na_index_parse_args(VALUE args, narray_t *na, na_index_arg_t *q, int ndim)
|
|
315
|
+
{
|
|
316
|
+
int i, j, k, l, nidx;
|
|
317
|
+
size_t total=1;
|
|
318
|
+
VALUE v;
|
|
319
|
+
|
|
320
|
+
if (ndim == 0) {
|
|
321
|
+
return /*total*/1;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
nidx = RARRAY_LEN(args);
|
|
325
|
+
|
|
326
|
+
for (i=j=k=0; i<nidx; i++) {
|
|
327
|
+
v = RARRAY_AREF(args,i);
|
|
328
|
+
// rest (ellipsis) dimension
|
|
329
|
+
if (v==Qfalse) {
|
|
330
|
+
for (l = ndim - (nidx-1); l>0; l--) {
|
|
331
|
+
//printf("i=%d j=%d k=%d l=%d ndim=%d nidx=%d\n",i,j,k,l,ndim,nidx);
|
|
332
|
+
na_index_parse_each(Qtrue, na->shape[k], k, &q[j]);
|
|
333
|
+
if (q[j].n > 1) {
|
|
334
|
+
total *= q[j].n;
|
|
335
|
+
}
|
|
336
|
+
j++;
|
|
337
|
+
k++;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
// new dimension
|
|
341
|
+
else if (v==sym_new) {
|
|
342
|
+
na_index_parse_each(v, 1, k, &q[j]);
|
|
343
|
+
j++;
|
|
344
|
+
}
|
|
345
|
+
// other dimention
|
|
346
|
+
else {
|
|
347
|
+
na_index_parse_each(v, na->shape[k], k, &q[j]);
|
|
348
|
+
if (q[j].n > 1) {
|
|
349
|
+
total *= q[j].n;
|
|
350
|
+
}
|
|
351
|
+
j++;
|
|
352
|
+
k++;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return total;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
static void
|
|
360
|
+
na_get_strides_nadata(const narray_data_t *na, ssize_t *strides, ssize_t elmsz)
|
|
361
|
+
{
|
|
362
|
+
int i = na->base.ndim - 1;
|
|
363
|
+
strides[i] = elmsz;
|
|
364
|
+
for (; i>0; i--) {
|
|
365
|
+
strides[i-1] = strides[i] * na->base.shape[i];
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
static void
|
|
370
|
+
na_index_aref_nadata(narray_data_t *na1, narray_view_t *na2,
|
|
371
|
+
na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
|
|
372
|
+
{
|
|
373
|
+
int i, j;
|
|
374
|
+
ssize_t size, k, total=1;
|
|
375
|
+
ssize_t stride1;
|
|
376
|
+
ssize_t *strides_na1;
|
|
377
|
+
size_t *index;
|
|
378
|
+
ssize_t beg, step;
|
|
379
|
+
VALUE m;
|
|
380
|
+
|
|
381
|
+
strides_na1 = ALLOCA_N(ssize_t, na1->base.ndim);
|
|
382
|
+
na_get_strides_nadata(na1, strides_na1, elmsz);
|
|
383
|
+
|
|
384
|
+
for (i=j=0; i<ndim; i++) {
|
|
385
|
+
stride1 = strides_na1[q[i].orig_dim];
|
|
386
|
+
|
|
387
|
+
// numeric index -- trim dimension
|
|
388
|
+
if (!keep_dim && q[i].n==1 && q[i].step==0) {
|
|
389
|
+
beg = q[i].beg;
|
|
390
|
+
na2->offset += stride1 * beg;
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
na2->base.shape[j] = size = q[i].n;
|
|
395
|
+
|
|
396
|
+
if (q[i].reduce != 0) {
|
|
397
|
+
m = rb_funcall(INT2FIX(1),id_shift_left,1,INT2FIX(j));
|
|
398
|
+
na2->base.reduce = rb_funcall(m,'|',1,na2->base.reduce);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// array index
|
|
402
|
+
if (q[i].idx != NULL) {
|
|
403
|
+
index = q[i].idx;
|
|
404
|
+
SDX_SET_INDEX(na2->stridx[j],index);
|
|
405
|
+
q[i].idx = NULL;
|
|
406
|
+
for (k=0; k<size; k++) {
|
|
407
|
+
index[k] = index[k] * stride1;
|
|
408
|
+
}
|
|
409
|
+
} else {
|
|
410
|
+
beg = q[i].beg;
|
|
411
|
+
step = q[i].step;
|
|
412
|
+
na2->offset += stride1*beg;
|
|
413
|
+
SDX_SET_STRIDE(na2->stridx[j], stride1*step);
|
|
414
|
+
}
|
|
415
|
+
j++;
|
|
416
|
+
total *= size;
|
|
417
|
+
}
|
|
418
|
+
na2->base.size = total;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
static void
|
|
423
|
+
na_index_aref_naview(narray_view_t *na1, narray_view_t *na2,
|
|
424
|
+
na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
|
|
425
|
+
{
|
|
426
|
+
int i, j;
|
|
427
|
+
ssize_t total=1;
|
|
428
|
+
|
|
429
|
+
for (i=j=0; i<ndim; i++) {
|
|
430
|
+
stridx_t sdx1 = na1->stridx[q[i].orig_dim];
|
|
431
|
+
ssize_t size;
|
|
432
|
+
|
|
433
|
+
// numeric index -- trim dimension
|
|
434
|
+
if (!keep_dim && q[i].n==1 && q[i].step==0) {
|
|
435
|
+
if (SDX_IS_INDEX(sdx1)) {
|
|
436
|
+
na2->offset += SDX_GET_INDEX(sdx1)[q[i].beg];
|
|
437
|
+
} else {
|
|
438
|
+
na2->offset += SDX_GET_STRIDE(sdx1)*q[i].beg;
|
|
439
|
+
}
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
na2->base.shape[j] = size = q[i].n;
|
|
444
|
+
|
|
445
|
+
if (q[i].reduce != 0) {
|
|
446
|
+
VALUE m = rb_funcall(INT2FIX(1),id_shift_left,1,INT2FIX(j));
|
|
447
|
+
na2->base.reduce = rb_funcall(m,'|',1,na2->base.reduce);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
if (q[i].orig_dim >= na1->base.ndim) {
|
|
451
|
+
// new dimension
|
|
452
|
+
SDX_SET_STRIDE(na2->stridx[j], elmsz);
|
|
453
|
+
}
|
|
454
|
+
else if (q[i].idx != NULL && SDX_IS_INDEX(sdx1)) {
|
|
455
|
+
// index <- index
|
|
456
|
+
int k;
|
|
457
|
+
size_t *index = q[i].idx;
|
|
458
|
+
SDX_SET_INDEX(na2->stridx[j], index);
|
|
459
|
+
q[i].idx = NULL;
|
|
460
|
+
|
|
461
|
+
for (k=0; k<size; k++) {
|
|
462
|
+
index[k] = SDX_GET_INDEX(sdx1)[index[k]];
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
else if (q[i].idx != NULL && SDX_IS_STRIDE(sdx1)) {
|
|
466
|
+
// index <- step
|
|
467
|
+
ssize_t stride1 = SDX_GET_STRIDE(sdx1);
|
|
468
|
+
size_t *index = q[i].idx;
|
|
469
|
+
SDX_SET_INDEX(na2->stridx[j],index);
|
|
470
|
+
q[i].idx = NULL;
|
|
471
|
+
|
|
472
|
+
if (stride1<0) {
|
|
473
|
+
size_t last;
|
|
474
|
+
int k;
|
|
475
|
+
stride1 = -stride1;
|
|
476
|
+
last = na1->base.shape[q[i].orig_dim] - 1;
|
|
477
|
+
if (na2->offset < last * stride1) {
|
|
478
|
+
rb_raise(rb_eStandardError,"bug: negative offset");
|
|
479
|
+
}
|
|
480
|
+
na2->offset -= last * stride1;
|
|
481
|
+
for (k=0; k<size; k++) {
|
|
482
|
+
index[k] = (last - index[k]) * stride1;
|
|
483
|
+
}
|
|
484
|
+
} else {
|
|
485
|
+
int k;
|
|
486
|
+
for (k=0; k<size; k++) {
|
|
487
|
+
index[k] = index[k] * stride1;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
else if (q[i].idx == NULL && SDX_IS_INDEX(sdx1)) {
|
|
492
|
+
// step <- index
|
|
493
|
+
int k;
|
|
494
|
+
size_t beg = q[i].beg;
|
|
495
|
+
ssize_t step = q[i].step;
|
|
496
|
+
size_t *index = ALLOC_N(size_t, size);
|
|
497
|
+
SDX_SET_INDEX(na2->stridx[j],index);
|
|
498
|
+
for (k=0; k<size; k++) {
|
|
499
|
+
index[k] = SDX_GET_INDEX(sdx1)[beg+step*k];
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
else if (q[i].idx == NULL && SDX_IS_STRIDE(sdx1)) {
|
|
503
|
+
// step <- step
|
|
504
|
+
size_t beg = q[i].beg;
|
|
505
|
+
ssize_t step = q[i].step;
|
|
506
|
+
ssize_t stride1 = SDX_GET_STRIDE(sdx1);
|
|
507
|
+
na2->offset += stride1*beg;
|
|
508
|
+
SDX_SET_STRIDE(na2->stridx[j], stride1*step);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
j++;
|
|
512
|
+
total *= size;
|
|
513
|
+
}
|
|
514
|
+
na2->base.size = total;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
static int
|
|
519
|
+
na_ndim_new_narray(int ndim, const na_index_arg_t *q)
|
|
520
|
+
{
|
|
521
|
+
int i, ndim_new=0;
|
|
522
|
+
for (i=0; i<ndim; i++) {
|
|
523
|
+
if (q[i].n>1 || q[i].step!=0) {
|
|
524
|
+
ndim_new++;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
return ndim_new;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
typedef struct {
|
|
531
|
+
VALUE args, self, store;
|
|
532
|
+
int ndim;
|
|
533
|
+
na_index_arg_t *q; // multi-dimensional index args
|
|
534
|
+
narray_t *na1;
|
|
535
|
+
int keep_dim;
|
|
536
|
+
size_t pos; // offset position for 0-dimensional narray. 0-dimensional array does not use q.
|
|
537
|
+
} na_aref_md_data_t;
|
|
538
|
+
|
|
539
|
+
static na_index_arg_t*
|
|
540
|
+
na_allocate_index_args(int ndim)
|
|
541
|
+
{
|
|
542
|
+
na_index_arg_t *q;
|
|
543
|
+
int i;
|
|
544
|
+
if (ndim == 0) return NULL;
|
|
545
|
+
|
|
546
|
+
q = ALLOC_N(na_index_arg_t, ndim);
|
|
547
|
+
for (i=0; i<ndim; i++) {
|
|
548
|
+
q[i].idx = NULL;
|
|
549
|
+
}
|
|
550
|
+
return q;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
static
|
|
554
|
+
VALUE na_aref_md_protected(VALUE data_value)
|
|
555
|
+
{
|
|
556
|
+
na_aref_md_data_t *data = (na_aref_md_data_t*)(data_value);
|
|
557
|
+
VALUE self = data->self;
|
|
558
|
+
VALUE args = data->args;
|
|
559
|
+
VALUE store = data->store;
|
|
560
|
+
int ndim = data->ndim;
|
|
561
|
+
na_index_arg_t *q = data->q;
|
|
562
|
+
narray_t *na1 = data->na1;
|
|
563
|
+
int keep_dim = data->keep_dim;
|
|
564
|
+
|
|
565
|
+
int ndim_new;
|
|
566
|
+
VALUE view;
|
|
567
|
+
narray_view_t *na2;
|
|
568
|
+
ssize_t elmsz;
|
|
569
|
+
|
|
570
|
+
na_index_parse_args(args, na1, q, ndim);
|
|
571
|
+
|
|
572
|
+
if (na_debug_flag) print_index_arg(q,ndim);
|
|
573
|
+
|
|
574
|
+
if (keep_dim) {
|
|
575
|
+
ndim_new = ndim;
|
|
576
|
+
} else {
|
|
577
|
+
ndim_new = na_ndim_new_narray(ndim, q);
|
|
578
|
+
}
|
|
579
|
+
view = na_s_allocate_view(CLASS_OF(self));
|
|
580
|
+
|
|
581
|
+
na_copy_flags(self, view);
|
|
582
|
+
GetNArrayView(view,na2);
|
|
583
|
+
|
|
584
|
+
na_alloc_shape((narray_t*)na2, ndim_new);
|
|
585
|
+
|
|
586
|
+
na2->stridx = ALLOC_N(stridx_t,ndim_new);
|
|
587
|
+
|
|
588
|
+
elmsz = nary_element_stride(self);
|
|
589
|
+
|
|
590
|
+
switch(na1->type) {
|
|
591
|
+
case NARRAY_DATA_T:
|
|
592
|
+
case NARRAY_FILEMAP_T:
|
|
593
|
+
if (ndim == 0) {
|
|
594
|
+
na2->offset = data->pos;
|
|
595
|
+
na2->base.size = 1;
|
|
596
|
+
} else {
|
|
597
|
+
na_index_aref_nadata((narray_data_t *)na1,na2,q,elmsz,ndim,keep_dim);
|
|
598
|
+
}
|
|
599
|
+
na2->data = self;
|
|
600
|
+
break;
|
|
601
|
+
case NARRAY_VIEW_T:
|
|
602
|
+
if (ndim == 0) {
|
|
603
|
+
na2->offset = ((narray_view_t *)na1)->offset + data->pos;
|
|
604
|
+
na2->data = ((narray_view_t *)na1)->data;
|
|
605
|
+
na2->base.size = 1;
|
|
606
|
+
} else {
|
|
607
|
+
na2->offset = ((narray_view_t *)na1)->offset;
|
|
608
|
+
na2->data = ((narray_view_t *)na1)->data;
|
|
609
|
+
na_index_aref_naview((narray_view_t *)na1,na2,q,elmsz,ndim,keep_dim);
|
|
610
|
+
}
|
|
611
|
+
break;
|
|
612
|
+
}
|
|
613
|
+
if (store) {
|
|
614
|
+
na_get_pointer_for_write(store); // allocate memory
|
|
615
|
+
na_store(na_flatten_dim(store,0),view);
|
|
616
|
+
return store;
|
|
617
|
+
}
|
|
618
|
+
return view;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
static VALUE
|
|
622
|
+
na_aref_md_ensure(VALUE data_value)
|
|
623
|
+
{
|
|
624
|
+
na_aref_md_data_t *data = (na_aref_md_data_t*)(data_value);
|
|
625
|
+
int i;
|
|
626
|
+
for (i=0; i<data->ndim; i++) {
|
|
627
|
+
xfree(data->q[i].idx);
|
|
628
|
+
}
|
|
629
|
+
if (data->q) xfree(data->q);
|
|
630
|
+
return Qnil;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
static VALUE
|
|
634
|
+
na_aref_md(int argc, VALUE *argv, VALUE self, int keep_dim, int result_nd, size_t pos)
|
|
635
|
+
{
|
|
636
|
+
VALUE args; // should be GC protected
|
|
637
|
+
narray_t *na1;
|
|
638
|
+
na_aref_md_data_t data;
|
|
639
|
+
VALUE store = 0;
|
|
640
|
+
VALUE idx;
|
|
641
|
+
narray_t *nidx;
|
|
642
|
+
|
|
643
|
+
GetNArray(self,na1);
|
|
644
|
+
|
|
645
|
+
args = rb_ary_new4(argc,argv);
|
|
646
|
+
|
|
647
|
+
if (argc == 1 && result_nd == 1) {
|
|
648
|
+
idx = argv[0];
|
|
649
|
+
if (rb_obj_is_kind_of(idx, rb_cArray)) {
|
|
650
|
+
idx = rb_apply(cumo_cNArray,id_bracket,idx);
|
|
651
|
+
}
|
|
652
|
+
if (rb_obj_is_kind_of(idx, cumo_cNArray)) {
|
|
653
|
+
GetNArray(idx,nidx);
|
|
654
|
+
if (NA_NDIM(nidx)>1) {
|
|
655
|
+
store = nary_new(CLASS_OF(self),NA_NDIM(nidx),NA_SHAPE(nidx));
|
|
656
|
+
idx = na_flatten(idx);
|
|
657
|
+
RARRAY_ASET(args,0,idx);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
// flatten should be done only for narray-view with non-uniform stride.
|
|
661
|
+
if (na1->ndim > 1) {
|
|
662
|
+
self = na_flatten(self);
|
|
663
|
+
GetNArray(self,na1);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
data.args = args;
|
|
668
|
+
data.self = self;
|
|
669
|
+
data.store = store;
|
|
670
|
+
data.ndim = result_nd;
|
|
671
|
+
data.q = na_allocate_index_args(result_nd);
|
|
672
|
+
data.na1 = na1;
|
|
673
|
+
data.keep_dim = keep_dim;
|
|
674
|
+
|
|
675
|
+
switch(na1->type) {
|
|
676
|
+
case NARRAY_DATA_T:
|
|
677
|
+
data.pos = pos;
|
|
678
|
+
break;
|
|
679
|
+
case NARRAY_FILEMAP_T:
|
|
680
|
+
data.pos = pos; // correct? I have never used..
|
|
681
|
+
break;
|
|
682
|
+
case NARRAY_VIEW_T:
|
|
683
|
+
{
|
|
684
|
+
narray_view_t *nv;
|
|
685
|
+
GetNArrayView(self,nv);
|
|
686
|
+
// pos obtained by na_get_result_dimension adds view->offset.
|
|
687
|
+
data.pos = pos - nv->offset;
|
|
688
|
+
}
|
|
689
|
+
break;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
return rb_ensure(na_aref_md_protected, (VALUE)&data, na_aref_md_ensure, (VALUE)&data);
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
/* method: [](idx1,idx2,...,idxN) */
|
|
697
|
+
VALUE
|
|
698
|
+
na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos)
|
|
699
|
+
{
|
|
700
|
+
na_index_arg_to_internal_order(nidx, idx, self);
|
|
701
|
+
|
|
702
|
+
if (nidx==0) {
|
|
703
|
+
return rb_funcall(self,id_dup,0);
|
|
704
|
+
}
|
|
705
|
+
if (nidx==1) {
|
|
706
|
+
if (CLASS_OF(*idx)==cumo_cBit) {
|
|
707
|
+
return rb_funcall(*idx,id_mask,1,self);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
return na_aref_md(nidx, idx, self, keep_dim, result_nd, pos);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
/* method: slice(idx1,idx2,...,idxN) */
|
|
715
|
+
static VALUE na_slice(int argc, VALUE *argv, VALUE self)
|
|
716
|
+
{
|
|
717
|
+
int result_nd;
|
|
718
|
+
size_t pos;
|
|
719
|
+
|
|
720
|
+
result_nd = na_get_result_dimension(self, argc, argv, 0, &pos);
|
|
721
|
+
return na_aref_main(argc, argv, self, 1, result_nd, pos);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
static int
|
|
726
|
+
check_index_count(int argc, int na_ndim, int count_new, int count_rest)
|
|
727
|
+
{
|
|
728
|
+
int result_nd = na_ndim + count_new;
|
|
729
|
+
|
|
730
|
+
switch(count_rest) {
|
|
731
|
+
case 0:
|
|
732
|
+
if (count_new == 0 && argc == 1) return 1;
|
|
733
|
+
if (argc == result_nd) return result_nd;
|
|
734
|
+
rb_raise(rb_eIndexError,"# of index(=%i) should be "
|
|
735
|
+
"equal to ndim(=%i)",argc,na_ndim);
|
|
736
|
+
break;
|
|
737
|
+
case 1:
|
|
738
|
+
if (argc-1 <= result_nd) return result_nd;
|
|
739
|
+
rb_raise(rb_eIndexError,"# of index(=%i) > ndim(=%i) with :rest",
|
|
740
|
+
argc,na_ndim);
|
|
741
|
+
break;
|
|
742
|
+
}
|
|
743
|
+
return -1;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
int
|
|
747
|
+
na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx)
|
|
748
|
+
{
|
|
749
|
+
int i, j;
|
|
750
|
+
int count_new=0;
|
|
751
|
+
int count_rest=0;
|
|
752
|
+
int count_else=0;
|
|
753
|
+
ssize_t x, s, m, pos, *idx;
|
|
754
|
+
narray_t *na;
|
|
755
|
+
narray_view_t *nv;
|
|
756
|
+
stridx_t sdx;
|
|
757
|
+
VALUE a;
|
|
758
|
+
|
|
759
|
+
GetNArray(self,na);
|
|
760
|
+
if (na->size == 0) {
|
|
761
|
+
rb_raise(rb_eRuntimeError, "cannot get index of empty array");
|
|
762
|
+
return -1;
|
|
763
|
+
}
|
|
764
|
+
idx = ALLOCA_N(ssize_t, argc);
|
|
765
|
+
for (i=j=0; i<argc; i++) {
|
|
766
|
+
a = argv[i];
|
|
767
|
+
switch(TYPE(a)) {
|
|
768
|
+
case T_FIXNUM:
|
|
769
|
+
idx[j++] = FIX2LONG(a);
|
|
770
|
+
break;
|
|
771
|
+
case T_BIGNUM:
|
|
772
|
+
case T_FLOAT:
|
|
773
|
+
idx[j++] = NUM2SSIZET(a);
|
|
774
|
+
break;
|
|
775
|
+
case T_FALSE:
|
|
776
|
+
case T_SYMBOL:
|
|
777
|
+
if (a==sym_rest || a==sym_tilde || a==Qfalse) {
|
|
778
|
+
argv[i] = Qfalse;
|
|
779
|
+
count_rest++;
|
|
780
|
+
break;
|
|
781
|
+
} else if (a==sym_new || a==sym_minus) {
|
|
782
|
+
argv[i] = sym_new;
|
|
783
|
+
count_new++;
|
|
784
|
+
}
|
|
785
|
+
// not break
|
|
786
|
+
default:
|
|
787
|
+
count_else++;
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
if (count_rest > 1) {
|
|
792
|
+
rb_raise(rb_eIndexError,"multiple rest-dimension is not allowd");
|
|
793
|
+
}
|
|
794
|
+
if (count_else != 0) {
|
|
795
|
+
return check_index_count(argc, na->ndim, count_new, count_rest);
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
switch(na->type) {
|
|
799
|
+
case NARRAY_VIEW_T:
|
|
800
|
+
GetNArrayView(self,nv);
|
|
801
|
+
pos = nv->offset;
|
|
802
|
+
if (j == na->ndim) {
|
|
803
|
+
for (i=j-1; i>=0; i--) {
|
|
804
|
+
x = na_range_check(idx[i], na->shape[i], i);
|
|
805
|
+
sdx = nv->stridx[i];
|
|
806
|
+
if (SDX_IS_INDEX(sdx)) {
|
|
807
|
+
pos += SDX_GET_INDEX(sdx)[x];
|
|
808
|
+
} else {
|
|
809
|
+
pos += SDX_GET_STRIDE(sdx)*x;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
*pos_idx = pos;
|
|
813
|
+
}
|
|
814
|
+
else if (argc==1 && j==1) {
|
|
815
|
+
x = na_range_check(idx[0], na->size, 0);
|
|
816
|
+
for (i=na->ndim-1; i>=0; i--) {
|
|
817
|
+
s = na->shape[i];
|
|
818
|
+
m = x % s;
|
|
819
|
+
x = x / s;
|
|
820
|
+
sdx = nv->stridx[i];
|
|
821
|
+
if (SDX_IS_INDEX(sdx)) {
|
|
822
|
+
pos += SDX_GET_INDEX(sdx)[m];
|
|
823
|
+
} else {
|
|
824
|
+
pos += SDX_GET_STRIDE(sdx)*m;
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
*pos_idx = pos;
|
|
828
|
+
} else {
|
|
829
|
+
return check_index_count(argc, na->ndim, count_new, count_rest);
|
|
830
|
+
}
|
|
831
|
+
break;
|
|
832
|
+
default:
|
|
833
|
+
if (!stride) {
|
|
834
|
+
stride = nary_element_stride(self);
|
|
835
|
+
}
|
|
836
|
+
if (argc==1 && j==1) {
|
|
837
|
+
x = na_range_check(idx[0], na->size, 0);
|
|
838
|
+
*pos_idx = stride * x;
|
|
839
|
+
}
|
|
840
|
+
else if (j == na->ndim) {
|
|
841
|
+
pos = 0;
|
|
842
|
+
for (i=j-1; i>=0; i--) {
|
|
843
|
+
x = na_range_check(idx[i], na->shape[i], i);
|
|
844
|
+
pos += stride * x;
|
|
845
|
+
stride *= na->shape[i];
|
|
846
|
+
}
|
|
847
|
+
*pos_idx = pos;
|
|
848
|
+
} else {
|
|
849
|
+
return check_index_count(argc, na->ndim, count_new, count_rest);
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
return 0;
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
void
|
|
857
|
+
Init_cumo_nary_index()
|
|
858
|
+
{
|
|
859
|
+
rb_define_method(cNArray, "slice", na_slice, -1);
|
|
860
|
+
|
|
861
|
+
sym_ast = ID2SYM(rb_intern("*"));
|
|
862
|
+
sym_all = ID2SYM(rb_intern("all"));
|
|
863
|
+
sym_minus = ID2SYM(rb_intern("-"));
|
|
864
|
+
sym_new = ID2SYM(rb_intern("new"));
|
|
865
|
+
sym_reverse = ID2SYM(rb_intern("reverse"));
|
|
866
|
+
sym_plus = ID2SYM(rb_intern("+"));
|
|
867
|
+
//sym_reduce = ID2SYM(rb_intern("reduce"));
|
|
868
|
+
sym_sum = ID2SYM(rb_intern("sum"));
|
|
869
|
+
sym_tilde = ID2SYM(rb_intern("~"));
|
|
870
|
+
sym_rest = ID2SYM(rb_intern("rest"));
|
|
871
|
+
id_beg = rb_intern("begin");
|
|
872
|
+
id_end = rb_intern("end");
|
|
873
|
+
id_exclude_end = rb_intern("exclude_end?");
|
|
874
|
+
id_each = rb_intern("each");
|
|
875
|
+
id_step = rb_intern("step");
|
|
876
|
+
id_dup = rb_intern("dup");
|
|
877
|
+
id_bracket = rb_intern("[]");
|
|
878
|
+
id_shift_left = rb_intern("<<");
|
|
879
|
+
id_mask = rb_intern("mask");
|
|
880
|
+
}
|