cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,70 @@
1
+ <% unless type_name == 'robject' %>
2
+ void <%="cumo_#{c_iter}_stride_kernel_launch"%>(char *p1, char *p2, char *p3, char *p4, ssize_t s1, ssize_t s2, ssize_t s3, ssize_t s4, uint64_t n);
3
+ <% end %>
4
+
5
+ static void
6
+ <%=c_iter%>(na_loop_t *const lp)
7
+ {
8
+ size_t i, n;
9
+ char *p1, *p2, *p3, *p4;
10
+ ssize_t s1, s2, s3, s4;
11
+ INIT_COUNTER(lp, n);
12
+ INIT_PTR(lp, 0, p1, s1);
13
+ INIT_PTR(lp, 1, p2, s2);
14
+ INIT_PTR(lp, 2, p3, s3);
15
+ INIT_PTR(lp, 3, p4, s4);
16
+ for (i=n; i--;) {
17
+ <% if type_name == 'robject' %>
18
+ {
19
+ dtype x, y, a, b;
20
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
21
+ GET_DATA_STRIDE(p1,s1,dtype,x);
22
+ GET_DATA_STRIDE(p2,s2,dtype,y);
23
+ <% if is_int and %w[divmod].include? name %>
24
+ if (y==0) {
25
+ lp->err_type = rb_eZeroDivError;
26
+ return;
27
+ }
28
+ <% end %>
29
+ m_<%=name%>(x,y,a,b);
30
+ SET_DATA_STRIDE(p3,s3,dtype,a);
31
+ SET_DATA_STRIDE(p4,s4,dtype,b);
32
+ }
33
+ <% else %>
34
+ <%="cumo_#{c_iter}_stride_kernel_launch"%>(p1,p2,p3,p4,s1,s2,s3,s4,n);
35
+ <% end %>
36
+ }
37
+ }
38
+
39
+ static VALUE
40
+ <%=c_func%>_self(VALUE self, VALUE other)
41
+ {
42
+ ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
43
+ ndfunc_arg_out_t aout[2] = {{cT,0},{cT,0}};
44
+ ndfunc_t ndf = { <%=c_iter%>, STRIDE_LOOP, 2, 2, ain, aout };
45
+
46
+ return na_ndloop(&ndf, 2, self, other);
47
+ }
48
+
49
+ /*
50
+ Binary <%=name%>.
51
+ @overload <%=op_map%> other
52
+ @param [Cumo::NArray,Numeric] other
53
+ @return [Cumo::NArray] <%=name%> of self and other.
54
+ */
55
+ static VALUE
56
+ <%=c_func(1)%>(VALUE self, VALUE other)
57
+ {
58
+ <% if is_object %>
59
+ return <%=c_func%>_self(self, other);
60
+ <% else %>
61
+ VALUE klass, v;
62
+ klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
63
+ if (klass==cT) {
64
+ return <%=c_func%>_self(self, other);
65
+ } else {
66
+ v = rb_funcall(klass, id_cast, 1, self);
67
+ return rb_funcall(v, <%=id_op%>, 1, other);
68
+ }
69
+ <% end %>
70
+ }
@@ -0,0 +1,15 @@
1
+ <% unless type_name == 'robject' %>
2
+ __global__ void <%="cumo_#{c_iter}_stride_kernel"%>(char *p1, char *p2, char *p3, char *p4, ssize_t s1, ssize_t s2, ssize_t s3, ssize_t s4, uint64_t n)
3
+ {
4
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
5
+ m_<%=name%>(*(dtype*)(p1+(i*s1)),*(dtype*)(p2+(i*s2)),*(dtype*)(p3+(i*s3)), *(dtype*)(p4+(i*s4)));
6
+ }
7
+ }
8
+
9
+ void <%="cumo_#{c_iter}_stride_kernel_launch"%>(char *p1, char *p2, char *p3, char *p4, ssize_t s1, ssize_t s2, ssize_t s3, ssize_t s4, uint64_t n)
10
+ {
11
+ size_t gridDim = get_gridDim(n);
12
+ size_t blockDim = get_blockDim(n);
13
+ <%="cumo_#{c_iter}_stride_kernel"%><<<gridDim, blockDim>>>(p1,p2,p3,p4,s1,s2,s3,s4,n);
14
+ }
15
+ <% end %>
@@ -0,0 +1,31 @@
1
+ <% unless type_name == 'robject' %>
2
+
3
+ <% ((0..opt_indexer_ndim).to_a << '').each do |idim| %>
4
+ __global__ void <%="cumo_#{c_iter}_kernel_dim#{idim}"%>(na_iarray_t a1, na_iarray_t a2, na_iarray_t a3, na_indexer_t indexer)
5
+ {
6
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) {
7
+ cumo_na_indexer_set_dim<%=idim%>(&indexer, i);
8
+ char* p1 = cumo_na_iarray_at_dim<%=idim%>(&a1, &indexer);
9
+ char* p2 = cumo_na_iarray_at_dim<%=idim%>(&a2, &indexer);
10
+ char* p3 = cumo_na_iarray_at_dim<%=idim%>(&a3, &indexer);
11
+ *(dtype*)(p3) = m_<%=name%>(*(dtype*)(p1),*(dtype*)(p2));
12
+ }
13
+ }
14
+ <% end %>
15
+
16
+ void <%="cumo_#{c_iter}_kernel_launch"%>(na_iarray_t* a1, na_iarray_t* a2, na_iarray_t* a3, na_indexer_t* indexer)
17
+ {
18
+ size_t gridDim = get_gridDim(indexer->total_size);
19
+ size_t blockDim = get_blockDim(indexer->total_size);
20
+ switch (indexer->ndim) {
21
+ <% (0..opt_indexer_ndim).each do |idim| %>
22
+ case <%=idim%>:
23
+ <%="cumo_#{c_iter}_kernel_dim#{idim}"%><<<gridDim, blockDim>>>(*a1,*a2,*a3,*indexer);
24
+ break;
25
+ <% end %>
26
+ default:
27
+ <%="cumo_#{c_iter}_kernel_dim"%><<<gridDim, blockDim>>>(*a1,*a2,*a3,*indexer);
28
+ break;
29
+ }
30
+ }
31
+ <% end %>
@@ -0,0 +1,45 @@
1
+ <% unless type_name == 'robject' %>
2
+ void <%="cumo_#{c_iter}_stride_kernel_launch"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n);
3
+ <% end %>
4
+
5
+ static void
6
+ <%=c_iter%>(na_loop_t *const lp)
7
+ {
8
+ size_t i;
9
+ char *p1, *p2, *p3;
10
+ ssize_t s1, s2, s3;
11
+ INIT_COUNTER(lp, i);
12
+ INIT_PTR(lp, 0, p1, s1);
13
+ INIT_PTR(lp, 1, p2, s2);
14
+ INIT_PTR(lp, 2, p3, s3);
15
+ <% if type_name == 'robject' %>
16
+ {
17
+ dtype x, y;
18
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
19
+ for (; i--;) {
20
+ GET_DATA_STRIDE(p1,s1,dtype,x);
21
+ GET_DATA_STRIDE(p2,s2,dtype,y);
22
+ x = m_<%=name%>(x,y);
23
+ SET_DATA_STRIDE(p3,s3,dtype,x);
24
+ }
25
+ }
26
+ <% else %>
27
+ <%="cumo_#{c_iter}_stride_kernel_launch"%>(p1,p2,p3,s1,s2,s3,i);
28
+ <% end %>
29
+ }
30
+
31
+ /*
32
+ Calculate <%=name%>(a1,a2).
33
+ @overload <%=name%>(a1,a2)
34
+ @param [Cumo::NArray,Numeric] a1 first value
35
+ @param [Cumo::NArray,Numeric] a2 second value
36
+ @return [Cumo::<%=class_name%>] <%=name%>(a1,a2).
37
+ */
38
+ static VALUE
39
+ <%=c_func(2)%>(VALUE mod, VALUE a1, VALUE a2)
40
+ {
41
+ ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
42
+ ndfunc_arg_out_t aout[1] = {{cT,0}};
43
+ ndfunc_t ndf = { <%=c_iter%>, STRIDE_LOOP, 2, 1, ain, aout };
44
+ return na_ndloop(&ndf, 2, a1, a2);
45
+ }
@@ -0,0 +1,15 @@
1
+ <% unless type_name == 'robject' %>
2
+ __global__ void <%="cumo_#{c_iter}_stride_kernel"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n)
3
+ {
4
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
5
+ *(dtype*)(p3+(i*s3)) = m_<%=name%>(*(dtype*)(p1+(i*s1)),*(dtype*)(p2+(i*s2)));
6
+ }
7
+ }
8
+
9
+ void <%="cumo_#{c_iter}_stride_kernel_launch"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n)
10
+ {
11
+ size_t gridDim = get_gridDim(n);
12
+ size_t blockDim = get_blockDim(n);
13
+ <%="cumo_#{c_iter}_stride_kernel"%><<<gridDim, blockDim>>>(p1,p2,p3,s1,s2,s3,n);
14
+ }
15
+ <% end %>
@@ -0,0 +1,181 @@
1
+ // ------- Integer count without weights -------
2
+ <%
3
+ [32,64].each do |bits|
4
+ cnt_cT = "cumo_cUInt#{bits}"
5
+ cnt_type = "u_int#{bits}_t"
6
+ %>
7
+ static void
8
+ <%=c_iter%>_<%=bits%>(na_loop_t *const lp)
9
+ {
10
+ size_t i, x, n;
11
+ char *p1, *p2;
12
+ ssize_t s1, s2;
13
+ size_t *idx1;
14
+
15
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
16
+ INIT_PTR(lp, 1, p2, s2);
17
+ i = lp->args[0].shape[0];
18
+ n = lp->args[1].shape[0];
19
+
20
+ // initialize
21
+ for (x=0; x < n; x++) {
22
+ *(<%=cnt_type%>*)(p2 + s2*x) = 0;
23
+ }
24
+
25
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_<%=bits%>", "<%=type_name%>");
26
+ if (idx1) {
27
+ for (; i--;) {
28
+ GET_DATA_INDEX(p1,idx1,dtype,x);
29
+ (*(<%=cnt_type%>*)(p2 + s2*x))++;
30
+ }
31
+ } else {
32
+ for (; i--;) {
33
+ GET_DATA_STRIDE(p1,s1,dtype,x);
34
+ (*(<%=cnt_type%>*)(p2 + s2*x))++;
35
+ }
36
+ }
37
+ }
38
+
39
+ static VALUE
40
+ <%=c_func%>_<%=bits%>(VALUE self, size_t length)
41
+ {
42
+ size_t shape_out[1] = {length};
43
+ ndfunc_arg_in_t ain[1] = {{cT,1}};
44
+ ndfunc_arg_out_t aout[1] = {{<%=cnt_cT%>,1,shape_out}};
45
+ ndfunc_t ndf = {<%=c_iter%>_<%=bits%>, NO_LOOP|NDF_STRIDE_LOOP|NDF_INDEX_LOOP,
46
+ 1, 1, ain, aout};
47
+
48
+ return na_ndloop(&ndf, 1, self);
49
+ }
50
+ <% end %>
51
+ // ------- end of Integer count without weights -------
52
+
53
+ // ------- Float count with weights -------
54
+ <%
55
+ [["SF","float"],
56
+ ["DF","double"]].each do |fn,cnt_type|
57
+ cnt_cT = "cumo_c#{fn}loat"
58
+ fn = fn.downcase
59
+ %>
60
+ static void
61
+ <%=c_iter%>_<%=fn%>(na_loop_t *const lp)
62
+ {
63
+ <%=cnt_type%> w;
64
+ size_t i, x, n, m;
65
+ char *p1, *p2, *p3;
66
+ ssize_t s1, s2, s3;
67
+
68
+ INIT_PTR(lp, 0, p1, s1);
69
+ INIT_PTR(lp, 1, p2, s2);
70
+ INIT_PTR(lp, 2, p3, s3);
71
+ i = lp->args[0].shape[0];
72
+ m = lp->args[1].shape[0];
73
+ n = lp->args[2].shape[0];
74
+
75
+ if (i != m) {
76
+ rb_raise(nary_eShapeError,
77
+ "size mismatch along last axis between self and weight");
78
+ }
79
+
80
+ // initialize
81
+ for (x=0; x < n; x++) {
82
+ *(<%=cnt_type%>*)(p3 + s3*x) = 0;
83
+ }
84
+ for (; i--;) {
85
+ GET_DATA_STRIDE(p1,s1,dtype,x);
86
+ GET_DATA_STRIDE(p2,s2,<%=cnt_type%>,w);
87
+ (*(<%=cnt_type%>*)(p3 + s3*x)) += w;
88
+ }
89
+ }
90
+
91
+ static VALUE
92
+ <%=c_func%>_<%=fn%>(VALUE self, VALUE weight, size_t length)
93
+ {
94
+ size_t shape_out[1] = {length};
95
+ ndfunc_arg_in_t ain[2] = {{cT,1},{<%=cnt_cT%>,1}};
96
+ ndfunc_arg_out_t aout[1] = {{<%=cnt_cT%>,1,shape_out}};
97
+ ndfunc_t ndf = {<%=c_iter%>_<%=fn%>, NO_LOOP|NDF_STRIDE_LOOP,
98
+ 2, 1, ain, aout};
99
+
100
+ return na_ndloop(&ndf, 2, self, weight);
101
+ }
102
+ <% end %>
103
+ // ------- end of Float count with weights -------
104
+
105
+ /*
106
+ Count the number of occurrences of each non-negative integer value.
107
+ Only Integer-types has this method.
108
+
109
+ @overload <%=name%>([weight], minlength:nil)
110
+ @param [SFloat or DFloat or Array] weight (optional) Array of
111
+ float values. Its size along last axis should be same as that of self.
112
+ @param [Integer] minlength (keyword, optional) Minimum size along
113
+ last axis for the output array.
114
+ @return [UInt32 or UInt64 or SFloat or DFloat]
115
+ Returns Float NArray if weight array is supplied,
116
+ otherwise returns UInt32 or UInt64 depending on the size along last axis.
117
+ @example
118
+ Cumo::Int32[0..4].bincount
119
+ => Cumo::UInt32#shape=[5]
120
+ [1, 1, 1, 1, 1]
121
+
122
+ Cumo::Int32[0, 1, 1, 3, 2, 1, 7].bincount
123
+ => Cumo::UInt32#shape=[8]
124
+ [1, 3, 1, 1, 0, 0, 0, 1]
125
+
126
+ x = Cumo::Int32[0, 1, 1, 3, 2, 1, 7, 23]
127
+ x.bincount.size == x.max+1
128
+ => true
129
+
130
+ w = Cumo::DFloat[0.3, 0.5, 0.2, 0.7, 1.0, -0.6]
131
+ x = Cumo::Int32[0, 1, 1, 2, 2, 2]
132
+ x.bincount(w)
133
+ => Cumo::DFloat#shape=[3]
134
+ [0.3, 0.7, 1.1]
135
+
136
+ */
137
+ static VALUE
138
+ <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
139
+ {
140
+ VALUE weight=Qnil, kw=Qnil;
141
+ VALUE opts[1] = {Qundef};
142
+ VALUE v, wclass;
143
+ ID table[1] = {id_minlength};
144
+ size_t length, minlength;
145
+
146
+ rb_scan_args(argc, argv, "01:", &weight, &kw);
147
+ rb_get_kwargs(kw, table, 0, 1, opts);
148
+
149
+ <% if is_unsigned %>
150
+ v = <%=type_name%>_max(0,0,self);
151
+ <% else %>
152
+ v = <%=type_name%>_minmax(0,0,self);
153
+ if (m_num_to_data(RARRAY_AREF(v,0)) < 0) {
154
+ rb_raise(rb_eArgError,"array items must be non-netagive");
155
+ }
156
+ v = RARRAY_AREF(v,1);
157
+ <% end %>
158
+ length = NUM2SIZET(v) + 1;
159
+
160
+ if (opts[0] != Qundef) {
161
+ minlength = NUM2SIZET(opts[0]);
162
+ if (minlength > length) {
163
+ length = minlength;
164
+ }
165
+ }
166
+
167
+ if (NIL_P(weight)) {
168
+ if (length > 4294967295ul) {
169
+ return <%=c_func%>_64(self, length);
170
+ } else {
171
+ return <%=c_func%>_32(self, length);
172
+ }
173
+ } else {
174
+ wclass = CLASS_OF(weight);
175
+ if (wclass == cumo_cSFloat) {
176
+ return <%=c_func%>_sf(self, weight, length);
177
+ } else {
178
+ return <%=c_func%>_df(self, weight, length);
179
+ }
180
+ }
181
+ }
@@ -0,0 +1,44 @@
1
+ <% children.each do |c|%>
2
+ <%= c.result %>
3
+
4
+ <% end %>
5
+ /*
6
+ Cast object to Cumo::<%=class_name%>.
7
+ @overload [](elements)
8
+ @overload <%=name%>(array)
9
+ @param [Numeric,Array] elements
10
+ @param [Array] array
11
+ @return [Cumo::<%=class_name%>]
12
+ */
13
+ static VALUE
14
+ <%=c_func(1)%>(VALUE type, VALUE obj)
15
+ {
16
+ VALUE v;
17
+ narray_t *na;
18
+ dtype x;
19
+
20
+ if (CLASS_OF(obj)==cT) {
21
+ return obj;
22
+ }
23
+ if (RTEST(rb_obj_is_kind_of(obj,rb_cNumeric))) {
24
+ x = m_num_to_data(obj);
25
+ return <%=type_name%>_new_dim0(x);
26
+ }
27
+ if (RTEST(rb_obj_is_kind_of(obj,rb_cArray))) {
28
+ return <%=find_tmpl("cast_array").c_func%>(obj);
29
+ }
30
+ if (IsNArray(obj)) {
31
+ GetNArray(obj,na);
32
+ v = nary_new(cT, NA_NDIM(na), NA_SHAPE(na));
33
+ if (NA_SIZE(na) > 0) {
34
+ <%=find_tmpl("store").c_func%>(v,obj);
35
+ }
36
+ return v;
37
+ }
38
+ <% if is_object %>
39
+ return robject_new_dim0(obj);
40
+ <% else %>
41
+ rb_raise(nary_eCastError,"cannot cast to %s",rb_class2name(type));
42
+ return Qnil;
43
+ <% end %>
44
+ }
@@ -0,0 +1,13 @@
1
+ static VALUE
2
+ <%=c_func(:nodef)%>(VALUE rary)
3
+ {
4
+ VALUE nary;
5
+ narray_t *na;
6
+
7
+ nary = na_s_new_like(cT, rary);
8
+ GetNArray(nary,na);
9
+ if (na->size > 0) {
10
+ <%=find_tmpl("store").find("array").c_func%>(nary,rary);
11
+ }
12
+ return nary;
13
+ }
@@ -0,0 +1,9 @@
1
+ /*
2
+ class definition: <%= full_class_name %>
3
+ */
4
+
5
+ VALUE <%=class_var%>;
6
+
7
+ static VALUE <%= find('store').c_func %>(VALUE,VALUE);
8
+
9
+ <%= method_code %>
@@ -0,0 +1,6 @@
1
+ /*
2
+ class definition: <%= full_class_name %>
3
+ */
4
+
5
+ <%= method_code %>
6
+
@@ -0,0 +1,121 @@
1
+ static void
2
+ <%=c_iter%>(na_loop_t *const lp)
3
+ {
4
+ size_t i;
5
+ char *p1, *p2, *p3, *p4;
6
+ ssize_t s1, s2, s3, s4;
7
+ dtype x, min, max;
8
+ INIT_COUNTER(lp, i);
9
+ INIT_PTR(lp, 0, p1, s1);
10
+ INIT_PTR(lp, 1, p2, s2);
11
+ INIT_PTR(lp, 2, p3, s3);
12
+ INIT_PTR(lp, 3, p4, s4);
13
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
14
+ for (; i--;) {
15
+ GET_DATA_STRIDE(p1,s1,dtype,x);
16
+ GET_DATA_STRIDE(p2,s2,dtype,min);
17
+ GET_DATA_STRIDE(p3,s3,dtype,max);
18
+ if (m_gt(min,max)) {rb_raise(nary_eOperationError,"min is greater than max");}
19
+ if (m_lt(x,min)) {x=min;}
20
+ if (m_gt(x,max)) {x=max;}
21
+ SET_DATA_STRIDE(p4,s4,dtype,x);
22
+ }
23
+ }
24
+
25
+ static void
26
+ <%=c_iter%>_min(na_loop_t *const lp)
27
+ {
28
+ size_t i;
29
+ char *p1, *p2, *p3;
30
+ ssize_t s1, s2, s3;
31
+ dtype x, min;
32
+ INIT_COUNTER(lp, i);
33
+ INIT_PTR(lp, 0, p1, s1);
34
+ INIT_PTR(lp, 1, p2, s2);
35
+ INIT_PTR(lp, 2, p3, s3);
36
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_min", "<%=type_name%>");
37
+ for (; i--;) {
38
+ GET_DATA_STRIDE(p1,s1,dtype,x);
39
+ GET_DATA_STRIDE(p2,s2,dtype,min);
40
+ if (m_lt(x,min)) {x=min;}
41
+ SET_DATA_STRIDE(p3,s3,dtype,x);
42
+ }
43
+ }
44
+
45
+ static void
46
+ <%=c_iter%>_max(na_loop_t *const lp)
47
+ {
48
+ size_t i;
49
+ char *p1, *p2, *p3;
50
+ ssize_t s1, s2, s3;
51
+ dtype x, max;
52
+ INIT_COUNTER(lp, i);
53
+ INIT_PTR(lp, 0, p1, s1);
54
+ INIT_PTR(lp, 1, p2, s2);
55
+ INIT_PTR(lp, 2, p3, s3);
56
+ SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_max", "<%=type_name%>");
57
+ for (; i--;) {
58
+ GET_DATA_STRIDE(p1,s1,dtype,x);
59
+ GET_DATA_STRIDE(p2,s2,dtype,max);
60
+ if (m_gt(x,max)) {x=max;}
61
+ SET_DATA_STRIDE(p3,s3,dtype,x);
62
+ }
63
+ }
64
+
65
+ /*
66
+ Clip array elements by [min,max].
67
+ If either of min or max is nil, one side is clipped.
68
+ @overload <%=name%>(min,max)
69
+ @param [Cumo::NArray,Numeric] min
70
+ @param [Cumo::NArray,Numeric] max
71
+ @return [Cumo::NArray] result of clip.
72
+
73
+ @example
74
+ a = Cumo::Int32.new(10).seq
75
+ p a.clip(1,8)
76
+ # Cumo::Int32#shape=[10]
77
+ # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
78
+
79
+ p a
80
+ # Cumo::Int32#shape=[10]
81
+ # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
82
+
83
+ p a.inplace.clip(3,6)
84
+ # Cumo::Int32(view)#shape=[10]
85
+ # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
86
+
87
+ p a
88
+ # Cumo::Int32#shape=[10]
89
+ # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
90
+
91
+ p a = Cumo::Int32.new(10).seq
92
+ # Cumo::Int32#shape=[10]
93
+ # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
94
+
95
+ p a.clip([3,4,1,1,1,4,4,4,4,4], 8)
96
+ # Cumo::Int32#shape=[10]
97
+ # [3, 4, 2, 3, 4, 5, 6, 7, 8, 8]
98
+ */
99
+ static VALUE
100
+ <%=c_func(2)%>(VALUE self, VALUE min, VALUE max)
101
+ {
102
+ ndfunc_arg_in_t ain[3] = {{Qnil,0},{cT,0},{cT,0}};
103
+ ndfunc_arg_out_t aout[1] = {{cT,0}};
104
+ ndfunc_t ndf_min = { <%=c_iter%>_min, STRIDE_LOOP, 2, 1, ain, aout };
105
+ ndfunc_t ndf_max = { <%=c_iter%>_max, STRIDE_LOOP, 2, 1, ain, aout };
106
+ ndfunc_t ndf_both = { <%=c_iter%>, STRIDE_LOOP, 3, 1, ain, aout };
107
+
108
+ if (RTEST(min)) {
109
+ if (RTEST(max)) {
110
+ return na_ndloop(&ndf_both, 3, self, min, max);
111
+ } else {
112
+ return na_ndloop(&ndf_min, 2, self, min);
113
+ }
114
+ } else {
115
+ if (RTEST(max)) {
116
+ return na_ndloop(&ndf_max, 2, self, max);
117
+ }
118
+ }
119
+ rb_raise(rb_eArgError,"min and max are not given");
120
+ return Qnil;
121
+ }