cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,95 @@
1
+ static void
2
+ <%=c_iter%>(na_loop_t *const lp)
3
+ {
4
+ size_t i;
5
+ BIT_DIGIT *a;
6
+ size_t p;
7
+ ssize_t s;
8
+ size_t *idx;
9
+ BIT_DIGIT x=0;
10
+ char *idx0, *idx1;
11
+ size_t count;
12
+ size_t e;
13
+ where_opt_t *g;
14
+
15
+ // TODO(sonots): CUDA kernelize
16
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
17
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
18
+
19
+ g = (where_opt_t*)(lp->opt_ptr);
20
+ count = g->count;
21
+ idx0 = g->idx0;
22
+ idx1 = g->idx1;
23
+ e = g->elmsz;
24
+ INIT_COUNTER(lp, i);
25
+ INIT_PTR_BIT_IDX(lp, 0, a, p, s, idx);
26
+ if (idx) {
27
+ for (; i--;) {
28
+ LOAD_BIT(a, p+*idx, x);
29
+ idx++;
30
+ if (x==0) {
31
+ STORE_INT(idx0,e,count);
32
+ idx0 += e;
33
+ } else {
34
+ STORE_INT(idx1,e,count);
35
+ idx1 += e;
36
+ }
37
+ count++;
38
+ }
39
+ } else {
40
+ for (; i--;) {
41
+ LOAD_BIT(a, p, x);
42
+ p+=s;
43
+ if (x==0) {
44
+ STORE_INT(idx0,e,count);
45
+ idx0 += e;
46
+ } else {
47
+ STORE_INT(idx1,e,count);
48
+ idx1 += e;
49
+ }
50
+ count++;
51
+ }
52
+ }
53
+ g->count = count;
54
+ g->idx0 = idx0;
55
+ g->idx1 = idx1;
56
+ }
57
+
58
+ /*
59
+ Returns two index arrays.
60
+ The first array contains index where the bit is one (true).
61
+ The second array contains index where the bit is zero (false).
62
+ @overload <%=op_map%>
63
+ @return [Cumo::Int32,Cumo::Int64]*2
64
+ */
65
+ static VALUE
66
+ <%=c_func(0)%>(VALUE self)
67
+ {
68
+ VALUE idx_1, idx_0;
69
+ size_t size, n_1, n_0;
70
+ where_opt_t *g;
71
+
72
+ ndfunc_arg_in_t ain[1] = {{cT,0}};
73
+ ndfunc_t ndf = { <%=c_iter%>, FULL_LOOP, 1, 0, ain, 0 };
74
+
75
+ size = RNARRAY_SIZE(self);
76
+ n_1 = NUM2SIZET(<%=find_tmpl("count_true_cpu").c_func%>(0, NULL, self));
77
+ n_0 = size - n_1;
78
+ g = ALLOCA_N(where_opt_t,1);
79
+ g->count = 0;
80
+ if (size>4294967295ul) {
81
+ idx_1 = nary_new(cumo_cInt64, 1, &n_1);
82
+ idx_0 = nary_new(cumo_cInt64, 1, &n_0);
83
+ g->elmsz = 8;
84
+ } else {
85
+ idx_1 = nary_new(cumo_cInt32, 1, &n_1);
86
+ idx_0 = nary_new(cumo_cInt32, 1, &n_0);
87
+ g->elmsz = 4;
88
+ }
89
+ g->idx1 = na_get_pointer_for_write(idx_1);
90
+ g->idx0 = na_get_pointer_for_write(idx_0);
91
+ na_ndloop3(&ndf, g, 1, self);
92
+ na_release_lock(idx_0);
93
+ na_release_lock(idx_1);
94
+ return rb_assoc_new(idx_1,idx_0);
95
+ }
@@ -0,0 +1,880 @@
1
+ #include <string.h>
2
+ #include <ruby.h>
3
+ #include "cumo/narray.h"
4
+ #include "cumo/cuda/runtime.h"
5
+ #include "cumo/template.h"
6
+
7
+ #if SIZEOF_VOIDP == 8
8
+ #define cIndex cumo_cInt64
9
+ #elif SIZEOF_VOIDP == 4
10
+ #define cIndex cumo_cInt32
11
+ #endif
12
+
13
+ // from ruby/enumerator.c
14
+ struct enumerator {
15
+ VALUE obj;
16
+ ID meth;
17
+ VALUE args;
18
+ // use only above in this source
19
+ VALUE fib;
20
+ VALUE dst;
21
+ VALUE lookahead;
22
+ VALUE feedvalue;
23
+ VALUE stop_exc;
24
+ VALUE size;
25
+ // incompatible below depending on ruby version
26
+ //VALUE procs; // ruby 2.4
27
+ //rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
28
+ //VALUE (*size_fn)(ANYARGS); // ruby 2.0
29
+ };
30
+
31
+ // note: the memory refed by this pointer is not freed and causes memroy leak.
32
+ //
33
+ // @example
34
+ // a[1..3,1] generates two na_index_arg_t(s). First is for 1..3, and second is for 1.
35
+ typedef struct {
36
+ size_t n; // the number of elements of the dimesnion
37
+ size_t beg; // the starting point in the dimension
38
+ ssize_t step; // the step size of the dimension
39
+ size_t *idx; // list of indices
40
+ int reduce; // true if the dimension is reduced by addition
41
+ int orig_dim; // the dimension of original array
42
+ } na_index_arg_t;
43
+
44
+
45
+ static void
46
+ print_index_arg(na_index_arg_t *q, int n)
47
+ {
48
+ int i;
49
+ printf("na_index_arg_t = 0x%"SZF"x {\n",(size_t)q);
50
+ for (i=0; i<n; i++) {
51
+ printf(" q[%d].n=%"SZF"d\n",i,q[i].n);
52
+ printf(" q[%d].beg=%"SZF"d\n",i,q[i].beg);
53
+ printf(" q[%d].step=%"SZF"d\n",i,q[i].step);
54
+ printf(" q[%d].idx=0x%"SZF"x\n",i,(size_t)q[i].idx);
55
+ printf(" q[%d].reduce=0x%x\n",i,q[i].reduce);
56
+ printf(" q[%d].orig_dim=%d\n",i,q[i].orig_dim);
57
+ }
58
+ printf("}\n");
59
+ }
60
+
61
+ static VALUE sym_ast;
62
+ static VALUE sym_all;
63
+ //static VALUE sym_reduce;
64
+ static VALUE sym_minus;
65
+ static VALUE sym_new;
66
+ static VALUE sym_reverse;
67
+ static VALUE sym_plus;
68
+ static VALUE sym_sum;
69
+ static VALUE sym_tilde;
70
+ static VALUE sym_rest;
71
+ static ID id_beg;
72
+ static ID id_end;
73
+ static ID id_exclude_end;
74
+ static ID id_each;
75
+ static ID id_step;
76
+ static ID id_dup;
77
+ static ID id_bracket;
78
+ static ID id_shift_left;
79
+ static ID id_mask;
80
+
81
+
82
+ static void
83
+ na_index_set_step(na_index_arg_t *q, int i, size_t n, size_t beg, ssize_t step)
84
+ {
85
+ q->n = n;
86
+ q->beg = beg;
87
+ q->step = step;
88
+ q->idx = NULL;
89
+ q->reduce = 0;
90
+ q->orig_dim = i;
91
+ }
92
+
93
+ static void
94
+ na_index_set_scalar(na_index_arg_t *q, int i, ssize_t size, ssize_t x)
95
+ {
96
+ if (x < -size || x >= size)
97
+ rb_raise(rb_eRangeError,
98
+ "array index (%"SZF"d) is out of array size (%"SZF"d)",
99
+ x, size);
100
+ if (x < 0)
101
+ x += size;
102
+ q->n = 1;
103
+ q->beg = x;
104
+ q->step = 0;
105
+ q->idx = NULL;
106
+ q->reduce = 0;
107
+ q->orig_dim = i;
108
+ }
109
+
110
+ static inline ssize_t
111
+ na_range_check(ssize_t pos, ssize_t size, int dim)
112
+ {
113
+ ssize_t idx=pos;
114
+
115
+ if (idx < 0) idx += size;
116
+ if (idx < 0 || idx >= size) {
117
+ rb_raise(rb_eIndexError, "index=%"SZF"d out of shape[%d]=%"SZF"d",
118
+ pos, dim, size);
119
+ }
120
+ return idx;
121
+ }
122
+
123
+ static void
124
+ na_parse_array(VALUE ary, int orig_dim, ssize_t size, na_index_arg_t *q)
125
+ {
126
+ int k;
127
+ int n = RARRAY_LEN(ary);
128
+ q->idx = ALLOC_N(size_t, n);
129
+ for (k=0; k<n; k++) {
130
+ q->idx[k] = na_range_check(NUM2SSIZET(RARRAY_AREF(ary,k)), size, orig_dim);
131
+ }
132
+ q->n = n;
133
+ q->beg = 0;
134
+ q->step = 1;
135
+ q->reduce = 0;
136
+ q->orig_dim = orig_dim;
137
+ }
138
+
139
+ static void
140
+ na_parse_narray_index(VALUE a, int orig_dim, ssize_t size, na_index_arg_t *q)
141
+ {
142
+ VALUE idx;
143
+ narray_t *na;
144
+ narray_data_t *nidx;
145
+ size_t k, n;
146
+ ssize_t *nidxp;
147
+
148
+ GetNArray(a,na);
149
+ if (NA_NDIM(na) != 1) {
150
+ rb_raise(rb_eIndexError, "should be 1-d NArray");
151
+ }
152
+ n = NA_SIZE(na);
153
+ idx = nary_new(cIndex,1,&n);
154
+ na_store(idx,a);
155
+
156
+ GetNArrayData(idx,nidx);
157
+ nidxp = (ssize_t*)nidx->ptr;
158
+ q->idx = ALLOC_N(size_t, n);
159
+
160
+ // ndixp is cuda memory (cuda narray)
161
+ SHOW_SYNCHRONIZE_WARNING_ONCE("na_parse_narray_index", "any");
162
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
163
+
164
+ for (k=0; k<n; k++) {
165
+ q->idx[k] = na_range_check(nidxp[k], size, orig_dim);
166
+ }
167
+ q->n = n;
168
+ q->beg = 0;
169
+ q->step = 1;
170
+ q->reduce = 0;
171
+ q->orig_dim = orig_dim;
172
+ }
173
+
174
+ static void
175
+ na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, na_index_arg_t *q)
176
+ {
177
+ int n;
178
+ VALUE excl_end;
179
+ ssize_t beg, end, beg_orig, end_orig;
180
+ const char *dot = "..", *edot = "...";
181
+
182
+ beg = beg_orig = NUM2SSIZET(rb_funcall(range,id_beg,0));
183
+ if (beg < 0) {
184
+ beg += size;
185
+ }
186
+ end = end_orig = NUM2SSIZET(rb_funcall(range,id_end,0));
187
+ if (end < 0) {
188
+ end += size;
189
+ }
190
+ excl_end = rb_funcall(range,id_exclude_end,0);
191
+ if (RTEST(excl_end)) {
192
+ end--;
193
+ dot = edot;
194
+ }
195
+ if (beg < 0 || beg >= size || end < 0 || end >= size) {
196
+ rb_raise(rb_eRangeError,
197
+ "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
198
+ beg_orig, dot, end_orig, size);
199
+ }
200
+ n = (end-beg)/step+1;
201
+ if (n<0) n=0;
202
+ na_index_set_step(q,orig_dim,n,beg,step);
203
+
204
+ }
205
+
206
+ static void
207
+ na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, na_index_arg_t *q)
208
+ {
209
+ int len;
210
+ ssize_t step;
211
+ struct enumerator *e;
212
+
213
+ if (!RB_TYPE_P(enum_obj, T_DATA)) {
214
+ rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
215
+ }
216
+ e = (struct enumerator *)DATA_PTR(enum_obj);
217
+
218
+ if (rb_obj_is_kind_of(e->obj, rb_cRange)) {
219
+ if (e->meth == id_each) {
220
+ na_parse_range(e->obj, 1, orig_dim, size, q);
221
+ }
222
+ else if (e->meth == id_step) {
223
+ if (TYPE(e->args) != T_ARRAY) {
224
+ rb_raise(rb_eArgError,"no argument for step");
225
+ }
226
+ len = RARRAY_LEN(e->args);
227
+ if (len != 1) {
228
+ rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
229
+ }
230
+ step = NUM2SSIZET(RARRAY_AREF(e->args,0));
231
+ na_parse_range(e->obj, step, orig_dim, size, q);
232
+ } else {
233
+ rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
234
+ }
235
+ } else {
236
+ rb_raise(rb_eTypeError,"not Range object");
237
+ }
238
+ }
239
+
240
+ // Analyze *a* which is *i*-th index object and store the information to q
241
+ //
242
+ // a: a ruby object of i-th index
243
+ // size: size of i-th dimension of original NArray
244
+ // i: parse i-th index
245
+ // q: parsed information is stored to *q
246
+ static void
247
+ na_index_parse_each(volatile VALUE a, ssize_t size, int i, na_index_arg_t *q)
248
+ {
249
+ switch(TYPE(a)) {
250
+
251
+ case T_FIXNUM:
252
+ na_index_set_scalar(q,i,size,FIX2LONG(a));
253
+ break;
254
+
255
+ case T_BIGNUM:
256
+ na_index_set_scalar(q,i,size,NUM2SSIZET(a));
257
+ break;
258
+
259
+ case T_FLOAT:
260
+ na_index_set_scalar(q,i,size,NUM2SSIZET(a));
261
+ break;
262
+
263
+ case T_NIL:
264
+ case T_TRUE:
265
+ na_index_set_step(q,i,size,0,1);
266
+ break;
267
+
268
+ case T_SYMBOL:
269
+ if (a==sym_all || a==sym_ast) {
270
+ na_index_set_step(q,i,size,0,1);
271
+ }
272
+ else if (a==sym_reverse) {
273
+ na_index_set_step(q,i,size,size-1,-1);
274
+ }
275
+ else if (a==sym_new) {
276
+ na_index_set_step(q,i,1,0,1);
277
+ }
278
+ else if (a==sym_reduce || a==sym_sum || a==sym_plus) {
279
+ na_index_set_step(q,i,size,0,1);
280
+ q->reduce = 1;
281
+ } else {
282
+ rb_raise(rb_eIndexError, "invalid symbol for index");
283
+ }
284
+ break;
285
+
286
+ case T_ARRAY:
287
+ na_parse_array(a, i, size, q);
288
+ break;
289
+
290
+ default:
291
+ if (rb_obj_is_kind_of(a, rb_cRange)) {
292
+ na_parse_range(a, 1, i, size, q);
293
+ }
294
+ else if (rb_obj_is_kind_of(a, rb_cEnumerator)) {
295
+ na_parse_enumerator(a, i, size, q);
296
+ }
297
+ else if (rb_obj_is_kind_of(a, na_cStep)) {
298
+ ssize_t beg, step, n;
299
+ nary_step_array_index(a, size, (size_t*)(&n), &beg, &step);
300
+ na_index_set_step(q,i,n,beg,step);
301
+ }
302
+ // NArray index
303
+ else if (NA_IsNArray(a)) {
304
+ na_parse_narray_index(a, i, size, q);
305
+ }
306
+ else {
307
+ rb_raise(rb_eIndexError, "not allowed type");
308
+ }
309
+ }
310
+ }
311
+
312
+
313
+ static size_t
314
+ na_index_parse_args(VALUE args, narray_t *na, na_index_arg_t *q, int ndim)
315
+ {
316
+ int i, j, k, l, nidx;
317
+ size_t total=1;
318
+ VALUE v;
319
+
320
+ if (ndim == 0) {
321
+ return /*total*/1;
322
+ }
323
+
324
+ nidx = RARRAY_LEN(args);
325
+
326
+ for (i=j=k=0; i<nidx; i++) {
327
+ v = RARRAY_AREF(args,i);
328
+ // rest (ellipsis) dimension
329
+ if (v==Qfalse) {
330
+ for (l = ndim - (nidx-1); l>0; l--) {
331
+ //printf("i=%d j=%d k=%d l=%d ndim=%d nidx=%d\n",i,j,k,l,ndim,nidx);
332
+ na_index_parse_each(Qtrue, na->shape[k], k, &q[j]);
333
+ if (q[j].n > 1) {
334
+ total *= q[j].n;
335
+ }
336
+ j++;
337
+ k++;
338
+ }
339
+ }
340
+ // new dimension
341
+ else if (v==sym_new) {
342
+ na_index_parse_each(v, 1, k, &q[j]);
343
+ j++;
344
+ }
345
+ // other dimention
346
+ else {
347
+ na_index_parse_each(v, na->shape[k], k, &q[j]);
348
+ if (q[j].n > 1) {
349
+ total *= q[j].n;
350
+ }
351
+ j++;
352
+ k++;
353
+ }
354
+ }
355
+ return total;
356
+ }
357
+
358
+
359
+ static void
360
+ na_get_strides_nadata(const narray_data_t *na, ssize_t *strides, ssize_t elmsz)
361
+ {
362
+ int i = na->base.ndim - 1;
363
+ strides[i] = elmsz;
364
+ for (; i>0; i--) {
365
+ strides[i-1] = strides[i] * na->base.shape[i];
366
+ }
367
+ }
368
+
369
+ static void
370
+ na_index_aref_nadata(narray_data_t *na1, narray_view_t *na2,
371
+ na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
372
+ {
373
+ int i, j;
374
+ ssize_t size, k, total=1;
375
+ ssize_t stride1;
376
+ ssize_t *strides_na1;
377
+ size_t *index;
378
+ ssize_t beg, step;
379
+ VALUE m;
380
+
381
+ strides_na1 = ALLOCA_N(ssize_t, na1->base.ndim);
382
+ na_get_strides_nadata(na1, strides_na1, elmsz);
383
+
384
+ for (i=j=0; i<ndim; i++) {
385
+ stride1 = strides_na1[q[i].orig_dim];
386
+
387
+ // numeric index -- trim dimension
388
+ if (!keep_dim && q[i].n==1 && q[i].step==0) {
389
+ beg = q[i].beg;
390
+ na2->offset += stride1 * beg;
391
+ continue;
392
+ }
393
+
394
+ na2->base.shape[j] = size = q[i].n;
395
+
396
+ if (q[i].reduce != 0) {
397
+ m = rb_funcall(INT2FIX(1),id_shift_left,1,INT2FIX(j));
398
+ na2->base.reduce = rb_funcall(m,'|',1,na2->base.reduce);
399
+ }
400
+
401
+ // array index
402
+ if (q[i].idx != NULL) {
403
+ index = q[i].idx;
404
+ SDX_SET_INDEX(na2->stridx[j],index);
405
+ q[i].idx = NULL;
406
+ for (k=0; k<size; k++) {
407
+ index[k] = index[k] * stride1;
408
+ }
409
+ } else {
410
+ beg = q[i].beg;
411
+ step = q[i].step;
412
+ na2->offset += stride1*beg;
413
+ SDX_SET_STRIDE(na2->stridx[j], stride1*step);
414
+ }
415
+ j++;
416
+ total *= size;
417
+ }
418
+ na2->base.size = total;
419
+ }
420
+
421
+
422
+ static void
423
+ na_index_aref_naview(narray_view_t *na1, narray_view_t *na2,
424
+ na_index_arg_t *q, ssize_t elmsz, int ndim, int keep_dim)
425
+ {
426
+ int i, j;
427
+ ssize_t total=1;
428
+
429
+ for (i=j=0; i<ndim; i++) {
430
+ stridx_t sdx1 = na1->stridx[q[i].orig_dim];
431
+ ssize_t size;
432
+
433
+ // numeric index -- trim dimension
434
+ if (!keep_dim && q[i].n==1 && q[i].step==0) {
435
+ if (SDX_IS_INDEX(sdx1)) {
436
+ na2->offset += SDX_GET_INDEX(sdx1)[q[i].beg];
437
+ } else {
438
+ na2->offset += SDX_GET_STRIDE(sdx1)*q[i].beg;
439
+ }
440
+ continue;
441
+ }
442
+
443
+ na2->base.shape[j] = size = q[i].n;
444
+
445
+ if (q[i].reduce != 0) {
446
+ VALUE m = rb_funcall(INT2FIX(1),id_shift_left,1,INT2FIX(j));
447
+ na2->base.reduce = rb_funcall(m,'|',1,na2->base.reduce);
448
+ }
449
+
450
+ if (q[i].orig_dim >= na1->base.ndim) {
451
+ // new dimension
452
+ SDX_SET_STRIDE(na2->stridx[j], elmsz);
453
+ }
454
+ else if (q[i].idx != NULL && SDX_IS_INDEX(sdx1)) {
455
+ // index <- index
456
+ int k;
457
+ size_t *index = q[i].idx;
458
+ SDX_SET_INDEX(na2->stridx[j], index);
459
+ q[i].idx = NULL;
460
+
461
+ for (k=0; k<size; k++) {
462
+ index[k] = SDX_GET_INDEX(sdx1)[index[k]];
463
+ }
464
+ }
465
+ else if (q[i].idx != NULL && SDX_IS_STRIDE(sdx1)) {
466
+ // index <- step
467
+ ssize_t stride1 = SDX_GET_STRIDE(sdx1);
468
+ size_t *index = q[i].idx;
469
+ SDX_SET_INDEX(na2->stridx[j],index);
470
+ q[i].idx = NULL;
471
+
472
+ if (stride1<0) {
473
+ size_t last;
474
+ int k;
475
+ stride1 = -stride1;
476
+ last = na1->base.shape[q[i].orig_dim] - 1;
477
+ if (na2->offset < last * stride1) {
478
+ rb_raise(rb_eStandardError,"bug: negative offset");
479
+ }
480
+ na2->offset -= last * stride1;
481
+ for (k=0; k<size; k++) {
482
+ index[k] = (last - index[k]) * stride1;
483
+ }
484
+ } else {
485
+ int k;
486
+ for (k=0; k<size; k++) {
487
+ index[k] = index[k] * stride1;
488
+ }
489
+ }
490
+ }
491
+ else if (q[i].idx == NULL && SDX_IS_INDEX(sdx1)) {
492
+ // step <- index
493
+ int k;
494
+ size_t beg = q[i].beg;
495
+ ssize_t step = q[i].step;
496
+ size_t *index = ALLOC_N(size_t, size);
497
+ SDX_SET_INDEX(na2->stridx[j],index);
498
+ for (k=0; k<size; k++) {
499
+ index[k] = SDX_GET_INDEX(sdx1)[beg+step*k];
500
+ }
501
+ }
502
+ else if (q[i].idx == NULL && SDX_IS_STRIDE(sdx1)) {
503
+ // step <- step
504
+ size_t beg = q[i].beg;
505
+ ssize_t step = q[i].step;
506
+ ssize_t stride1 = SDX_GET_STRIDE(sdx1);
507
+ na2->offset += stride1*beg;
508
+ SDX_SET_STRIDE(na2->stridx[j], stride1*step);
509
+ }
510
+
511
+ j++;
512
+ total *= size;
513
+ }
514
+ na2->base.size = total;
515
+ }
516
+
517
+
518
+ static int
519
+ na_ndim_new_narray(int ndim, const na_index_arg_t *q)
520
+ {
521
+ int i, ndim_new=0;
522
+ for (i=0; i<ndim; i++) {
523
+ if (q[i].n>1 || q[i].step!=0) {
524
+ ndim_new++;
525
+ }
526
+ }
527
+ return ndim_new;
528
+ }
529
+
530
+ typedef struct {
531
+ VALUE args, self, store;
532
+ int ndim;
533
+ na_index_arg_t *q; // multi-dimensional index args
534
+ narray_t *na1;
535
+ int keep_dim;
536
+ size_t pos; // offset position for 0-dimensional narray. 0-dimensional array does not use q.
537
+ } na_aref_md_data_t;
538
+
539
+ static na_index_arg_t*
540
+ na_allocate_index_args(int ndim)
541
+ {
542
+ na_index_arg_t *q;
543
+ int i;
544
+ if (ndim == 0) return NULL;
545
+
546
+ q = ALLOC_N(na_index_arg_t, ndim);
547
+ for (i=0; i<ndim; i++) {
548
+ q[i].idx = NULL;
549
+ }
550
+ return q;
551
+ }
552
+
553
+ static
554
+ VALUE na_aref_md_protected(VALUE data_value)
555
+ {
556
+ na_aref_md_data_t *data = (na_aref_md_data_t*)(data_value);
557
+ VALUE self = data->self;
558
+ VALUE args = data->args;
559
+ VALUE store = data->store;
560
+ int ndim = data->ndim;
561
+ na_index_arg_t *q = data->q;
562
+ narray_t *na1 = data->na1;
563
+ int keep_dim = data->keep_dim;
564
+
565
+ int ndim_new;
566
+ VALUE view;
567
+ narray_view_t *na2;
568
+ ssize_t elmsz;
569
+
570
+ na_index_parse_args(args, na1, q, ndim);
571
+
572
+ if (na_debug_flag) print_index_arg(q,ndim);
573
+
574
+ if (keep_dim) {
575
+ ndim_new = ndim;
576
+ } else {
577
+ ndim_new = na_ndim_new_narray(ndim, q);
578
+ }
579
+ view = na_s_allocate_view(CLASS_OF(self));
580
+
581
+ na_copy_flags(self, view);
582
+ GetNArrayView(view,na2);
583
+
584
+ na_alloc_shape((narray_t*)na2, ndim_new);
585
+
586
+ na2->stridx = ALLOC_N(stridx_t,ndim_new);
587
+
588
+ elmsz = nary_element_stride(self);
589
+
590
+ switch(na1->type) {
591
+ case NARRAY_DATA_T:
592
+ case NARRAY_FILEMAP_T:
593
+ if (ndim == 0) {
594
+ na2->offset = data->pos;
595
+ na2->base.size = 1;
596
+ } else {
597
+ na_index_aref_nadata((narray_data_t *)na1,na2,q,elmsz,ndim,keep_dim);
598
+ }
599
+ na2->data = self;
600
+ break;
601
+ case NARRAY_VIEW_T:
602
+ if (ndim == 0) {
603
+ na2->offset = ((narray_view_t *)na1)->offset + data->pos;
604
+ na2->data = ((narray_view_t *)na1)->data;
605
+ na2->base.size = 1;
606
+ } else {
607
+ na2->offset = ((narray_view_t *)na1)->offset;
608
+ na2->data = ((narray_view_t *)na1)->data;
609
+ na_index_aref_naview((narray_view_t *)na1,na2,q,elmsz,ndim,keep_dim);
610
+ }
611
+ break;
612
+ }
613
+ if (store) {
614
+ na_get_pointer_for_write(store); // allocate memory
615
+ na_store(na_flatten_dim(store,0),view);
616
+ return store;
617
+ }
618
+ return view;
619
+ }
620
+
621
+ static VALUE
622
+ na_aref_md_ensure(VALUE data_value)
623
+ {
624
+ na_aref_md_data_t *data = (na_aref_md_data_t*)(data_value);
625
+ int i;
626
+ for (i=0; i<data->ndim; i++) {
627
+ xfree(data->q[i].idx);
628
+ }
629
+ if (data->q) xfree(data->q);
630
+ return Qnil;
631
+ }
632
+
633
+ static VALUE
634
+ na_aref_md(int argc, VALUE *argv, VALUE self, int keep_dim, int result_nd, size_t pos)
635
+ {
636
+ VALUE args; // should be GC protected
637
+ narray_t *na1;
638
+ na_aref_md_data_t data;
639
+ VALUE store = 0;
640
+ VALUE idx;
641
+ narray_t *nidx;
642
+
643
+ GetNArray(self,na1);
644
+
645
+ args = rb_ary_new4(argc,argv);
646
+
647
+ if (argc == 1 && result_nd == 1) {
648
+ idx = argv[0];
649
+ if (rb_obj_is_kind_of(idx, rb_cArray)) {
650
+ idx = rb_apply(cumo_cNArray,id_bracket,idx);
651
+ }
652
+ if (rb_obj_is_kind_of(idx, cumo_cNArray)) {
653
+ GetNArray(idx,nidx);
654
+ if (NA_NDIM(nidx)>1) {
655
+ store = nary_new(CLASS_OF(self),NA_NDIM(nidx),NA_SHAPE(nidx));
656
+ idx = na_flatten(idx);
657
+ RARRAY_ASET(args,0,idx);
658
+ }
659
+ }
660
+ // flatten should be done only for narray-view with non-uniform stride.
661
+ if (na1->ndim > 1) {
662
+ self = na_flatten(self);
663
+ GetNArray(self,na1);
664
+ }
665
+ }
666
+
667
+ data.args = args;
668
+ data.self = self;
669
+ data.store = store;
670
+ data.ndim = result_nd;
671
+ data.q = na_allocate_index_args(result_nd);
672
+ data.na1 = na1;
673
+ data.keep_dim = keep_dim;
674
+
675
+ switch(na1->type) {
676
+ case NARRAY_DATA_T:
677
+ data.pos = pos;
678
+ break;
679
+ case NARRAY_FILEMAP_T:
680
+ data.pos = pos; // correct? I have never used..
681
+ break;
682
+ case NARRAY_VIEW_T:
683
+ {
684
+ narray_view_t *nv;
685
+ GetNArrayView(self,nv);
686
+ // pos obtained by na_get_result_dimension adds view->offset.
687
+ data.pos = pos - nv->offset;
688
+ }
689
+ break;
690
+ }
691
+
692
+ return rb_ensure(na_aref_md_protected, (VALUE)&data, na_aref_md_ensure, (VALUE)&data);
693
+ }
694
+
695
+
696
+ /* method: [](idx1,idx2,...,idxN) */
697
+ VALUE
698
+ na_aref_main(int nidx, VALUE *idx, VALUE self, int keep_dim, int result_nd, size_t pos)
699
+ {
700
+ na_index_arg_to_internal_order(nidx, idx, self);
701
+
702
+ if (nidx==0) {
703
+ return rb_funcall(self,id_dup,0);
704
+ }
705
+ if (nidx==1) {
706
+ if (CLASS_OF(*idx)==cumo_cBit) {
707
+ return rb_funcall(*idx,id_mask,1,self);
708
+ }
709
+ }
710
+ return na_aref_md(nidx, idx, self, keep_dim, result_nd, pos);
711
+ }
712
+
713
+
714
+ /* method: slice(idx1,idx2,...,idxN) */
715
+ static VALUE na_slice(int argc, VALUE *argv, VALUE self)
716
+ {
717
+ int result_nd;
718
+ size_t pos;
719
+
720
+ result_nd = na_get_result_dimension(self, argc, argv, 0, &pos);
721
+ return na_aref_main(argc, argv, self, 1, result_nd, pos);
722
+ }
723
+
724
+
725
+ static int
726
+ check_index_count(int argc, int na_ndim, int count_new, int count_rest)
727
+ {
728
+ int result_nd = na_ndim + count_new;
729
+
730
+ switch(count_rest) {
731
+ case 0:
732
+ if (count_new == 0 && argc == 1) return 1;
733
+ if (argc == result_nd) return result_nd;
734
+ rb_raise(rb_eIndexError,"# of index(=%i) should be "
735
+ "equal to ndim(=%i)",argc,na_ndim);
736
+ break;
737
+ case 1:
738
+ if (argc-1 <= result_nd) return result_nd;
739
+ rb_raise(rb_eIndexError,"# of index(=%i) > ndim(=%i) with :rest",
740
+ argc,na_ndim);
741
+ break;
742
+ }
743
+ return -1;
744
+ }
745
+
746
+ int
747
+ na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride, size_t *pos_idx)
748
+ {
749
+ int i, j;
750
+ int count_new=0;
751
+ int count_rest=0;
752
+ int count_else=0;
753
+ ssize_t x, s, m, pos, *idx;
754
+ narray_t *na;
755
+ narray_view_t *nv;
756
+ stridx_t sdx;
757
+ VALUE a;
758
+
759
+ GetNArray(self,na);
760
+ if (na->size == 0) {
761
+ rb_raise(rb_eRuntimeError, "cannot get index of empty array");
762
+ return -1;
763
+ }
764
+ idx = ALLOCA_N(ssize_t, argc);
765
+ for (i=j=0; i<argc; i++) {
766
+ a = argv[i];
767
+ switch(TYPE(a)) {
768
+ case T_FIXNUM:
769
+ idx[j++] = FIX2LONG(a);
770
+ break;
771
+ case T_BIGNUM:
772
+ case T_FLOAT:
773
+ idx[j++] = NUM2SSIZET(a);
774
+ break;
775
+ case T_FALSE:
776
+ case T_SYMBOL:
777
+ if (a==sym_rest || a==sym_tilde || a==Qfalse) {
778
+ argv[i] = Qfalse;
779
+ count_rest++;
780
+ break;
781
+ } else if (a==sym_new || a==sym_minus) {
782
+ argv[i] = sym_new;
783
+ count_new++;
784
+ }
785
+ // not break
786
+ default:
787
+ count_else++;
788
+ }
789
+ }
790
+
791
+ if (count_rest > 1) {
792
+ rb_raise(rb_eIndexError,"multiple rest-dimension is not allowd");
793
+ }
794
+ if (count_else != 0) {
795
+ return check_index_count(argc, na->ndim, count_new, count_rest);
796
+ }
797
+
798
+ switch(na->type) {
799
+ case NARRAY_VIEW_T:
800
+ GetNArrayView(self,nv);
801
+ pos = nv->offset;
802
+ if (j == na->ndim) {
803
+ for (i=j-1; i>=0; i--) {
804
+ x = na_range_check(idx[i], na->shape[i], i);
805
+ sdx = nv->stridx[i];
806
+ if (SDX_IS_INDEX(sdx)) {
807
+ pos += SDX_GET_INDEX(sdx)[x];
808
+ } else {
809
+ pos += SDX_GET_STRIDE(sdx)*x;
810
+ }
811
+ }
812
+ *pos_idx = pos;
813
+ }
814
+ else if (argc==1 && j==1) {
815
+ x = na_range_check(idx[0], na->size, 0);
816
+ for (i=na->ndim-1; i>=0; i--) {
817
+ s = na->shape[i];
818
+ m = x % s;
819
+ x = x / s;
820
+ sdx = nv->stridx[i];
821
+ if (SDX_IS_INDEX(sdx)) {
822
+ pos += SDX_GET_INDEX(sdx)[m];
823
+ } else {
824
+ pos += SDX_GET_STRIDE(sdx)*m;
825
+ }
826
+ }
827
+ *pos_idx = pos;
828
+ } else {
829
+ return check_index_count(argc, na->ndim, count_new, count_rest);
830
+ }
831
+ break;
832
+ default:
833
+ if (!stride) {
834
+ stride = nary_element_stride(self);
835
+ }
836
+ if (argc==1 && j==1) {
837
+ x = na_range_check(idx[0], na->size, 0);
838
+ *pos_idx = stride * x;
839
+ }
840
+ else if (j == na->ndim) {
841
+ pos = 0;
842
+ for (i=j-1; i>=0; i--) {
843
+ x = na_range_check(idx[i], na->shape[i], i);
844
+ pos += stride * x;
845
+ stride *= na->shape[i];
846
+ }
847
+ *pos_idx = pos;
848
+ } else {
849
+ return check_index_count(argc, na->ndim, count_new, count_rest);
850
+ }
851
+ }
852
+ return 0;
853
+ }
854
+
855
+
856
+ void
857
+ Init_cumo_nary_index()
858
+ {
859
+ rb_define_method(cNArray, "slice", na_slice, -1);
860
+
861
+ sym_ast = ID2SYM(rb_intern("*"));
862
+ sym_all = ID2SYM(rb_intern("all"));
863
+ sym_minus = ID2SYM(rb_intern("-"));
864
+ sym_new = ID2SYM(rb_intern("new"));
865
+ sym_reverse = ID2SYM(rb_intern("reverse"));
866
+ sym_plus = ID2SYM(rb_intern("+"));
867
+ //sym_reduce = ID2SYM(rb_intern("reduce"));
868
+ sym_sum = ID2SYM(rb_intern("sum"));
869
+ sym_tilde = ID2SYM(rb_intern("~"));
870
+ sym_rest = ID2SYM(rb_intern("rest"));
871
+ id_beg = rb_intern("begin");
872
+ id_end = rb_intern("end");
873
+ id_exclude_end = rb_intern("exclude_end?");
874
+ id_each = rb_intern("each");
875
+ id_step = rb_intern("step");
876
+ id_dup = rb_intern("dup");
877
+ id_bracket = rb_intern("[]");
878
+ id_shift_left = rb_intern("<<");
879
+ id_mask = rb_intern("mask");
880
+ }