cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,153 @@
1
+ /**********************************************************************
2
+
3
+ Function to extract Keyword argument for ruby-2.1.x
4
+ Copied from class.c in ruby-2.4.2
5
+
6
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
7
+
8
+ **********************************************************************/
9
+ #include <ruby.h>
10
+ #define rb_hash_tbl_raw(hash) rb_hash_tbl(hash)
11
+
12
+ /* from internal.h */
13
+ struct RBasicRaw {
14
+ VALUE flags;
15
+ VALUE klass;
16
+ };
17
+
18
+ #define RBASIC_SET_CLASS(obj, cls) do { \
19
+ VALUE _obj_ = (obj); \
20
+ RB_OBJ_WRITE(_obj_, &((struct RBasicRaw *)(_obj_))->klass, cls); \
21
+ } while (0)
22
+
23
+ /* from class.c */
24
+ VALUE
25
+ rb_keyword_error_new(const char *error, VALUE keys)
26
+ {
27
+ const char *msg = "";
28
+ VALUE error_message;
29
+
30
+ if (RARRAY_LEN(keys) == 1) {
31
+ keys = RARRAY_AREF(keys, 0);
32
+ }
33
+ else {
34
+ keys = rb_ary_join(keys, rb_usascii_str_new2(", "));
35
+ msg = "s";
36
+ }
37
+
38
+ error_message = rb_sprintf("%s keyword%s: %"PRIsVALUE, error, msg, keys);
39
+
40
+ return rb_exc_new_str(rb_eArgError, error_message);
41
+ }
42
+
43
+ NORETURN(static void rb_keyword_error(const char *error, VALUE keys));
44
+ static void
45
+ rb_keyword_error(const char *error, VALUE keys)
46
+ {
47
+ rb_exc_raise(rb_keyword_error_new(error, keys));
48
+ }
49
+
50
+ NORETURN(static void unknown_keyword_error(VALUE hash, const ID *table, int keywords));
51
+ static void
52
+ unknown_keyword_error(VALUE hash, const ID *table, int keywords)
53
+ {
54
+ st_table *tbl = rb_hash_tbl_raw(hash);
55
+ VALUE keys;
56
+ int i;
57
+ for (i = 0; i < keywords; i++) {
58
+ st_data_t key = ID2SYM(table[i]);
59
+ st_delete(tbl, &key, NULL);
60
+ }
61
+ keys = rb_funcallv(hash, rb_intern("keys"), 0, 0);
62
+ if (!RB_TYPE_P(keys, T_ARRAY)) rb_raise(rb_eArgError, "unknown keyword");
63
+ rb_keyword_error("unknown", keys);
64
+ }
65
+
66
+ static int
67
+ separate_symbol(st_data_t key, st_data_t value, st_data_t arg)
68
+ {
69
+ VALUE *kwdhash = (VALUE *)arg;
70
+
71
+ if (!SYMBOL_P(key)) kwdhash++;
72
+ if (!*kwdhash) *kwdhash = rb_hash_new();
73
+ rb_hash_aset(*kwdhash, (VALUE)key, (VALUE)value);
74
+ return ST_CONTINUE;
75
+ }
76
+
77
+ VALUE
78
+ rb_extract_keywords(VALUE *orighash)
79
+ {
80
+ VALUE parthash[2] = {0, 0};
81
+ VALUE hash = *orighash;
82
+
83
+ if (RHASH_EMPTY_P(hash)) {
84
+ *orighash = 0;
85
+ return hash;
86
+ }
87
+ st_foreach(rb_hash_tbl_raw(hash), separate_symbol, (st_data_t)&parthash);
88
+ *orighash = parthash[1];
89
+ if (parthash[1] && RBASIC_CLASS(hash) != rb_cHash) {
90
+ RBASIC_SET_CLASS(parthash[1], RBASIC_CLASS(hash));
91
+ }
92
+ return parthash[0];
93
+ }
94
+
95
+ int
96
+ rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
97
+ {
98
+ int i = 0, j;
99
+ int rest = 0;
100
+ VALUE missing = Qnil;
101
+ st_data_t key;
102
+
103
+ #define extract_kwarg(keyword, val) \
104
+ (key = (st_data_t)(keyword), values ? \
105
+ st_delete(rb_hash_tbl_raw(keyword_hash), &key, (val)) : \
106
+ st_lookup(rb_hash_tbl_raw(keyword_hash), key, (val)))
107
+
108
+ if (NIL_P(keyword_hash)) keyword_hash = 0;
109
+
110
+ if (optional < 0) {
111
+ rest = 1;
112
+ optional = -1-optional;
113
+ }
114
+ if (values) {
115
+ for (j = 0; j < required + optional; j++) {
116
+ values[j] = Qundef;
117
+ }
118
+ }
119
+ if (required) {
120
+ for (; i < required; i++) {
121
+ VALUE keyword = ID2SYM(table[i]);
122
+ if (keyword_hash) {
123
+ st_data_t val;
124
+ if (extract_kwarg(keyword, &val)) {
125
+ if (values) values[i] = (VALUE)val;
126
+ continue;
127
+ }
128
+ }
129
+ if (NIL_P(missing)) missing = rb_ary_tmp_new(1);
130
+ rb_ary_push(missing, keyword);
131
+ }
132
+ if (!NIL_P(missing)) {
133
+ rb_keyword_error("missing", missing);
134
+ }
135
+ }
136
+ j = i;
137
+ if (optional && keyword_hash) {
138
+ for (i = 0; i < optional; i++) {
139
+ st_data_t val;
140
+ if (extract_kwarg(ID2SYM(table[required+i]), &val)) {
141
+ if (values) values[required+i] = (VALUE)val;
142
+ j++;
143
+ }
144
+ }
145
+ }
146
+ if (!rest && keyword_hash) {
147
+ if (RHASH_SIZE(keyword_hash) > (unsigned int)(values ? 0 : j)) {
148
+ unknown_keyword_error(keyword_hash, table, required+optional);
149
+ }
150
+ }
151
+ return j;
152
+ #undef extract_kwarg
153
+ }
@@ -0,0 +1,142 @@
1
+ #include <ruby.h>
2
+ #include "cumo/narray.h"
3
+
4
+ VALUE cumo_mNMath;
5
+ extern VALUE cumo_mDFloatMath, cumo_mDComplexMath;
6
+ extern VALUE cumo_mSFloatMath, cumo_mSComplexMath;
7
+ static ID id_send;
8
+ static ID id_UPCAST;
9
+ static ID id_DISPATCH;
10
+ static ID id_extract;
11
+
12
+ static VALUE
13
+ nary_type_s_upcast(VALUE type1, VALUE type2)
14
+ {
15
+ VALUE upcast_hash;
16
+ VALUE result_type;
17
+
18
+ if (type1==type2) return type1;
19
+ upcast_hash = rb_const_get(type1, id_UPCAST);
20
+ result_type = rb_hash_aref(upcast_hash, type2);
21
+ if (NIL_P(result_type)) {
22
+ if (TYPE(type2)==T_CLASS) {
23
+ if ( RTEST(rb_class_inherited_p(type2,cNArray)) ) {
24
+ upcast_hash = rb_const_get(type2, id_UPCAST);
25
+ result_type = rb_hash_aref(upcast_hash, type1);
26
+ }
27
+ }
28
+ }
29
+ return result_type;
30
+ }
31
+
32
+
33
+ static VALUE nary_math_cast2(VALUE type1, VALUE type2)
34
+ {
35
+ if ( RTEST(rb_class_inherited_p( type1, cNArray )) ){
36
+ return nary_type_s_upcast( type1, type2 );
37
+ }
38
+ if ( RTEST(rb_class_inherited_p( type2, cNArray )) ){
39
+ return nary_type_s_upcast( type2, type1 );
40
+ }
41
+ if ( RTEST(rb_class_inherited_p( type1, rb_cNumeric )) &&
42
+ RTEST(rb_class_inherited_p( type2, rb_cNumeric )) ){
43
+ if ( RTEST(rb_class_inherited_p( type1, rb_cComplex)) ||
44
+ RTEST(rb_class_inherited_p( type2, rb_cComplex )) ){
45
+ return rb_cComplex;
46
+ }
47
+ return rb_cFloat;
48
+ }
49
+ return type2;
50
+ }
51
+
52
+
53
+ VALUE na_ary_composition_dtype(VALUE);
54
+
55
+ static VALUE nary_mathcast(int argc, VALUE *argv)
56
+ {
57
+ VALUE type, type2;
58
+ int i;
59
+
60
+ type = na_ary_composition_dtype(argv[0]);
61
+ for (i=1; i<argc; i++) {
62
+ type2 = na_ary_composition_dtype(argv[i]);
63
+ type = nary_math_cast2(type, type2);
64
+ if (NIL_P(type)) {
65
+ rb_raise(rb_eTypeError,"includes unknown DataType for upcast");
66
+ }
67
+ }
68
+ return type;
69
+ }
70
+
71
+
72
+ /*
73
+ Dispatches method to Math module of upcasted type,
74
+ eg, Cumo::DFloat::Math.
75
+ @overload method_missing(name,x,...)
76
+ @param [Symbol] name method name.
77
+ @param [NArray,Numeric] x input array.
78
+ @return [NArray] result.
79
+ */
80
+ static VALUE nary_math_method_missing(int argc, VALUE *argv, VALUE mod)
81
+ {
82
+ VALUE type, ans, typemod, hash;
83
+ if (argc>1) {
84
+ type = nary_mathcast(argc-1,argv+1);
85
+
86
+ hash = rb_const_get(mod, id_DISPATCH);
87
+ typemod = rb_hash_aref( hash, type );
88
+ if (NIL_P(typemod)) {
89
+ rb_raise(rb_eTypeError,"%s is unknown for Cumo::NMath",
90
+ rb_class2name(type));
91
+ }
92
+
93
+ ans = rb_funcall2(typemod,id_send,argc,argv);
94
+
95
+ if (!RTEST(rb_class_inherited_p(type,cNArray)) &&
96
+ IsNArray(ans) ) {
97
+ ans = rb_funcall(ans,id_extract,0);
98
+ }
99
+ return ans;
100
+ }
101
+ rb_raise(rb_eArgError,"argument or method missing");
102
+ return Qnil;
103
+ }
104
+
105
+
106
+ void
107
+ Init_cumo_nary_math()
108
+ {
109
+ VALUE hCast;
110
+
111
+ cumo_mNMath = rb_define_module_under(mCumo, "NMath");
112
+ rb_define_singleton_method(cumo_mNMath, "method_missing", nary_math_method_missing, -1);
113
+
114
+ hCast = rb_hash_new();
115
+ rb_define_const(cumo_mNMath, "DISPATCH", hCast);
116
+ rb_hash_aset(hCast, cumo_cInt64, cumo_mDFloatMath);
117
+ rb_hash_aset(hCast, cumo_cInt32, cumo_mDFloatMath);
118
+ rb_hash_aset(hCast, cumo_cInt16, cumo_mDFloatMath);
119
+ rb_hash_aset(hCast, cumo_cInt8, cumo_mDFloatMath);
120
+ rb_hash_aset(hCast, cumo_cUInt64, cumo_mDFloatMath);
121
+ rb_hash_aset(hCast, cumo_cUInt32, cumo_mDFloatMath);
122
+ rb_hash_aset(hCast, cumo_cUInt16, cumo_mDFloatMath);
123
+ rb_hash_aset(hCast, cumo_cUInt8, cumo_mDFloatMath);
124
+ rb_hash_aset(hCast, cumo_cDFloat, cumo_mDFloatMath);
125
+ rb_hash_aset(hCast, cumo_cDFloat, cumo_mDFloatMath);
126
+ rb_hash_aset(hCast, cumo_cDComplex, cumo_mDComplexMath);
127
+ rb_hash_aset(hCast, cumo_cSFloat, cumo_mSFloatMath);
128
+ rb_hash_aset(hCast, cumo_cSComplex, cumo_mSComplexMath);
129
+ #ifdef RUBY_INTEGER_UNIFICATION
130
+ rb_hash_aset(hCast, rb_cInteger, rb_mMath);
131
+ #else
132
+ rb_hash_aset(hCast, rb_cFixnum, rb_mMath);
133
+ rb_hash_aset(hCast, rb_cBignum, rb_mMath);
134
+ #endif
135
+ rb_hash_aset(hCast, rb_cFloat, rb_mMath);
136
+ rb_hash_aset(hCast, rb_cComplex, cumo_mDComplexMath);
137
+
138
+ id_send = rb_intern("send");
139
+ id_UPCAST = rb_intern("UPCAST");
140
+ id_DISPATCH = rb_intern("DISPATCH");
141
+ id_extract = rb_intern("extract");
142
+ }
@@ -0,0 +1,1948 @@
1
+ #define CUMO_NARRAY_C
2
+ #include <ruby.h>
3
+ #include <assert.h>
4
+ #include "cumo.h"
5
+ #include "cumo/narray.h"
6
+ #include "cumo/cuda/memory_pool.h"
7
+ #include "cumo/cuda/runtime.h"
8
+
9
+ /* global variables within this module */
10
+ VALUE cumo_cNArray;
11
+ VALUE rb_mCumo;
12
+ VALUE nary_eCastError;
13
+ VALUE nary_eShapeError;
14
+ VALUE nary_eOperationError;
15
+ VALUE nary_eDimensionError;
16
+ VALUE nary_eValueError;
17
+
18
+ static ID id_contiguous_stride;
19
+ static ID id_allocate;
20
+ static ID id_element_byte_size;
21
+ static ID id_fill;
22
+ static ID id_seq;
23
+ static ID id_logseq;
24
+ static ID id_eye;
25
+ static ID id_UPCAST;
26
+ static ID id_cast;
27
+ static ID id_dup;
28
+ static ID id_to_host;
29
+ static ID id_bracket;
30
+ static ID id_shift_left;
31
+ static ID id_eq;
32
+ static ID id_count_false;
33
+ static ID id_count_false_cpu;
34
+ static ID id_axis;
35
+ static ID id_nan;
36
+ static ID id_keepdims;
37
+
38
+ VALUE sym_reduce;
39
+ VALUE sym_option;
40
+ VALUE sym_loop_opt;
41
+ VALUE sym_init;
42
+
43
+ VALUE na_cStep;
44
+ #ifndef HAVE_RB_CCOMPLEX
45
+ VALUE rb_cComplex;
46
+ #endif
47
+
48
+ int cumo_na_inspect_rows=20;
49
+ int cumo_na_inspect_cols=80;
50
+
51
+ const rb_data_type_t na_data_type = {
52
+ "Cumo::NArray",
53
+ {0, 0, 0,}, 0, 0, 0,
54
+ };
55
+
56
+ static void
57
+ nary_debug_info_nadata(VALUE self)
58
+ {
59
+ narray_data_t *na;
60
+ GetNArrayData(self,na);
61
+
62
+ printf(" ptr = 0x%"SZF"x\n", (size_t)(na->ptr));
63
+ }
64
+
65
+
66
+ static VALUE
67
+ nary_debug_info_naview(VALUE self)
68
+ {
69
+ int i;
70
+ narray_view_t *na;
71
+ size_t *idx;
72
+ size_t j;
73
+ GetNArrayView(self,na);
74
+
75
+ printf(" data = 0x%"SZF"x\n", (size_t)na->data);
76
+ printf(" offset = %"SZF"d\n", (size_t)na->offset);
77
+ printf(" stridx = 0x%"SZF"x\n", (size_t)na->stridx);
78
+
79
+ if (na->stridx) {
80
+ printf(" stridx = [");
81
+ for (i=0; i<na->base.ndim; i++) {
82
+ if (SDX_IS_INDEX(na->stridx[i])) {
83
+
84
+ idx = SDX_GET_INDEX(na->stridx[i]);
85
+ printf(" index[%d]=[", i);
86
+ for (j=0; j<na->base.shape[i]; j++) {
87
+ printf(" %"SZF"d", idx[j]);
88
+ }
89
+ printf(" ] ");
90
+
91
+ } else {
92
+ printf(" %"SZF"d", SDX_GET_STRIDE(na->stridx[i]));
93
+ }
94
+ }
95
+ printf(" ]\n");
96
+ }
97
+ return Qnil;
98
+ }
99
+
100
+
101
+ VALUE
102
+ nary_debug_info(VALUE self)
103
+ {
104
+ int i;
105
+ narray_t *na;
106
+ GetNArray(self,na);
107
+
108
+ printf("%s:\n",rb_class2name(CLASS_OF(self)));
109
+ printf(" id = 0x%"PRI_VALUE_PREFIX"x\n", self);
110
+ printf(" type = %d\n", na->type);
111
+ printf(" flag = [%d,%d]\n", na->flag[0], na->flag[1]);
112
+ printf(" size = %"SZF"d\n", na->size);
113
+ printf(" ndim = %d\n", na->ndim);
114
+ printf(" shape = 0x%"SZF"x\n", (size_t)na->shape);
115
+ if (na->shape) {
116
+ printf(" shape = [");
117
+ for (i=0;i<na->ndim;i++)
118
+ printf(" %"SZF"d", na->shape[i]);
119
+ printf(" ]\n");
120
+ }
121
+
122
+ switch(na->type) {
123
+ case NARRAY_DATA_T:
124
+ case NARRAY_FILEMAP_T:
125
+ nary_debug_info_nadata(self);
126
+ break;
127
+ case NARRAY_VIEW_T:
128
+ nary_debug_info_naview(self);
129
+ break;
130
+ }
131
+ return Qnil;
132
+ }
133
+
134
+
135
+ static size_t
136
+ na_view_memsize(const void* ptr)
137
+ {
138
+ int i;
139
+ size_t size = sizeof(narray_view_t);
140
+ const narray_view_t *na = ptr;
141
+
142
+ assert(na->base.type == NARRAY_VIEW_T);
143
+
144
+ if (na->stridx != NULL) {
145
+ for (i=0; i<na->base.ndim; i++) {
146
+ if (SDX_IS_INDEX(na->stridx[i])) {
147
+ size += sizeof(size_t) * na->base.shape[i];
148
+ }
149
+ }
150
+ size += sizeof(stridx_t) * na->base.ndim;
151
+ }
152
+ if (na->base.size > 0) {
153
+ if (na->base.shape != NULL && na->base.shape != &(na->base.size)) {
154
+ size += sizeof(size_t) * na->base.ndim;
155
+ }
156
+ }
157
+ return size;
158
+ }
159
+
160
+ static void
161
+ na_view_free(void* ptr)
162
+ {
163
+ int i;
164
+ narray_view_t *na = (narray_view_t*)ptr;
165
+
166
+ assert(na->base.type == NARRAY_VIEW_T);
167
+
168
+ if (na->stridx != NULL) {
169
+ for (i=0; i<na->base.ndim; i++) {
170
+ if (SDX_IS_INDEX(na->stridx[i])) {
171
+ void *p = SDX_GET_INDEX(na->stridx[i]);
172
+ if (cumo_cuda_runtime_is_device_memory(p)) {
173
+ cumo_cuda_runtime_free(p);
174
+ } else {
175
+ xfree(p);
176
+ }
177
+ }
178
+ }
179
+ xfree(na->stridx);
180
+ na->stridx = NULL;
181
+ }
182
+ if (na->base.size > 0) {
183
+ if (na->base.shape != NULL && na->base.shape != &(na->base.size)) {
184
+ xfree(na->base.shape);
185
+ na->base.shape = NULL;
186
+ }
187
+ }
188
+ xfree(na);
189
+ }
190
+
191
+ static void
192
+ na_view_gc_mark(void* na)
193
+ {
194
+ if (((narray_t*)na)->type == NARRAY_VIEW_T) {
195
+ rb_gc_mark(((narray_view_t*)na)->data);
196
+ }
197
+ }
198
+
199
+ const rb_data_type_t na_data_type_view = {
200
+ "Cumo::NArrayView",
201
+ {na_view_gc_mark, na_view_free, na_view_memsize,},
202
+ &na_data_type, 0, 0,
203
+ };
204
+
205
+ VALUE
206
+ na_s_allocate_view(VALUE klass)
207
+ {
208
+ narray_view_t *na = ALLOC(narray_view_t);
209
+
210
+ na->base.ndim = 0;
211
+ na->base.type = NARRAY_VIEW_T;
212
+ na->base.flag[0] = NA_FL0_INIT;
213
+ na->base.flag[1] = NA_FL1_INIT;
214
+ na->base.size = 0;
215
+ na->base.shape = NULL;
216
+ na->base.reduce = INT2FIX(0);
217
+ na->data = Qnil;
218
+ na->offset = 0;
219
+ na->stridx = NULL;
220
+ return TypedData_Wrap_Struct(klass, &na_data_type_view, (void*)na);
221
+ }
222
+
223
+
224
+ //static const size_t zero=0;
225
+
226
+ void
227
+ na_array_to_internal_shape(VALUE self, VALUE ary, size_t *shape)
228
+ {
229
+ size_t i, n, c, s;
230
+ ssize_t x;
231
+ VALUE v;
232
+ int flag = 0;
233
+
234
+ n = RARRAY_LEN(ary);
235
+
236
+ if (RTEST(self)) {
237
+ flag = TEST_COLUMN_MAJOR(self);
238
+ }
239
+ if (flag) {
240
+ c = n-1;
241
+ s = -1;
242
+ } else {
243
+ c = 0;
244
+ s = 1;
245
+ }
246
+ for (i=0; i<n; i++) {
247
+ v = RARRAY_AREF(ary,i);
248
+ x = NUM2SSIZET(v);
249
+ if (x < 0) {
250
+ rb_raise(rb_eArgError,"size must be non-negative");
251
+ }
252
+ shape[c] = x;
253
+ c += s;
254
+ }
255
+ }
256
+
257
+
258
+
259
+ void
260
+ na_alloc_shape(narray_t *na, int ndim)
261
+ {
262
+ na->ndim = ndim;
263
+ na->size = 0;
264
+ switch(ndim) {
265
+ case 0:
266
+ case 1:
267
+ na->shape = &(na->size);
268
+ break;
269
+ default:
270
+ if (ndim < 0) {
271
+ rb_raise(nary_eDimensionError,"ndim=%d is negative", ndim);
272
+ }
273
+ if (ndim > NA_MAX_DIMENSION) {
274
+ rb_raise(nary_eDimensionError,"ndim=%d is too many", ndim);
275
+ }
276
+ na->shape = ALLOC_N(size_t, ndim);
277
+ }
278
+ }
279
+
280
+ void
281
+ na_setup_shape(narray_t *na, int ndim, size_t *shape)
282
+ {
283
+ int i;
284
+ size_t size;
285
+
286
+ na_alloc_shape(na, ndim);
287
+
288
+ if (ndim==0) {
289
+ na->size = 1;
290
+ }
291
+ else if (ndim==1) {
292
+ na->size = shape[0];
293
+ }
294
+ else {
295
+ for (i=0, size=1; i<ndim; i++) {
296
+ na->shape[i] = shape[i];
297
+ size *= shape[i];
298
+ }
299
+ na->size = size;
300
+ }
301
+ }
302
+
303
+ static void
304
+ na_setup(VALUE self, int ndim, size_t *shape)
305
+ {
306
+ narray_t *na;
307
+ GetNArray(self,na);
308
+ na_setup_shape(na, ndim, shape);
309
+ }
310
+
311
+
312
+ /*
313
+ @overload initialize(shape)
314
+ @overload initialize(size0, size1, ...)
315
+ @param [Array] shape (array of sizes along each dimension)
316
+ @param [Integer] sizeN (size along Nth-dimension)
317
+ @return [Cumo::NArray] unallocated narray.
318
+
319
+ Constructs an instance of NArray class using the given
320
+ and <i>shape</i> or <i>sizes</i>.
321
+ Note that NArray itself is an abstract super class and
322
+ not suitable to create instances.
323
+ Use Typed Subclasses of NArray (DFloat, Int32, etc) to create instances.
324
+ This method does not allocate memory for array data.
325
+ Memory is allocated on write method such as #fill, #store, #seq, etc.
326
+
327
+ @example
328
+ i = Cumo::Int64.new([2,4,3])
329
+ #=> Cumo::Int64#shape=[2,4,3](empty)
330
+
331
+ f = Cumo::DFloat.new(3,4)
332
+ #=> Cumo::DFloat#shape=[3,4](empty)
333
+
334
+ f.fill(2)
335
+ #=> Cumo::DFloat#shape=[3,4]
336
+ # [[2, 2, 2, 2],
337
+ # [2, 2, 2, 2],
338
+ # [2, 2, 2, 2]]
339
+
340
+ x = Cumo::NArray.new(5)
341
+ #=> in `new': allocator undefined for Cumo::NArray (TypeError)
342
+ # from t.rb:9:in `<main>'
343
+
344
+ */
345
+ static VALUE
346
+ na_initialize(VALUE self, VALUE args)
347
+ {
348
+ VALUE v;
349
+ size_t *shape=NULL;
350
+ int ndim;
351
+
352
+ if (RARRAY_LEN(args) == 1) {
353
+ v = RARRAY_AREF(args,0);
354
+ if (TYPE(v) != T_ARRAY) {
355
+ v = args;
356
+ }
357
+ } else {
358
+ v = args;
359
+ }
360
+ ndim = RARRAY_LEN(v);
361
+ if (ndim > NA_MAX_DIMENSION) {
362
+ rb_raise(rb_eArgError,"ndim=%d exceeds maximum dimension",ndim);
363
+ }
364
+ shape = ALLOCA_N(size_t, ndim);
365
+ // setup size_t shape[] from VALUE shape argument
366
+ na_array_to_internal_shape(self, v, shape);
367
+ na_setup(self, ndim, shape);
368
+
369
+ return self;
370
+ }
371
+
372
+
373
+ VALUE
374
+ nary_new(VALUE klass, int ndim, size_t *shape)
375
+ {
376
+ volatile VALUE obj;
377
+
378
+ obj = rb_funcall(klass, id_allocate, 0);
379
+ na_setup(obj, ndim, shape);
380
+ return obj;
381
+ }
382
+
383
+
384
+ VALUE
385
+ nary_view_new(VALUE klass, int ndim, size_t *shape)
386
+ {
387
+ volatile VALUE obj;
388
+
389
+ obj = na_s_allocate_view(klass);
390
+ na_setup(obj, ndim, shape);
391
+ return obj;
392
+ }
393
+
394
+
395
+ /*
396
+ Replaces the contents of self with the contents of other narray.
397
+ Used in dup and clone method.
398
+ @overload initialize_copy(other)
399
+ @param [Cumo::NArray] other
400
+ @return [Cumo::NArray] self
401
+ */
402
+ static VALUE
403
+ na_initialize_copy(VALUE self, VALUE orig)
404
+ {
405
+ narray_t *na;
406
+ GetNArray(orig,na);
407
+
408
+ na_setup(self,NA_NDIM(na),NA_SHAPE(na));
409
+ na_store(self,orig);
410
+ na_copy_flags(orig,self);
411
+ return self;
412
+ }
413
+
414
+
415
+ /*
416
+ * call-seq:
417
+ * zeros(shape) => narray
418
+ * zeros(size1,size2,...) => narray
419
+ *
420
+ * Returns a zero-filled narray with <i>shape</i>.
421
+ * This singleton method is valid not for NArray class itself
422
+ * but for typed NArray subclasses, e.g., DFloat, Int64.
423
+ * @example
424
+ * a = Cumo::DFloat.zeros(3,5)
425
+ * => Cumo::DFloat#shape=[3,5]
426
+ * [[0, 0, 0, 0, 0],
427
+ * [0, 0, 0, 0, 0],
428
+ * [0, 0, 0, 0, 0]]
429
+ */
430
+ static VALUE
431
+ na_s_zeros(int argc, VALUE *argv, VALUE klass)
432
+ {
433
+ VALUE obj;
434
+ obj = rb_class_new_instance(argc, argv, klass);
435
+ return rb_funcall(obj, id_fill, 1, INT2FIX(0));
436
+ }
437
+
438
+
439
+ /*
440
+ * call-seq:
441
+ * ones(shape) => narray
442
+ * ones(size1,size2,...) => narray
443
+ *
444
+ * Returns a one-filled narray with <i>shape</i>.
445
+ * This singleton method is valid not for NArray class itself
446
+ * but for typed NArray subclasses, e.g., DFloat, Int64.
447
+ * @example
448
+ * a = Cumo::DFloat.ones(3,5)
449
+ * => Cumo::DFloat#shape=[3,5]
450
+ * [[1, 1, 1, 1, 1],
451
+ * [1, 1, 1, 1, 1],
452
+ * [1, 1, 1, 1, 1]]
453
+ */
454
+ static VALUE
455
+ na_s_ones(int argc, VALUE *argv, VALUE klass)
456
+ {
457
+ VALUE obj;
458
+ obj = rb_class_new_instance(argc, argv, klass);
459
+ return rb_funcall(obj, id_fill, 1, INT2FIX(1));
460
+ }
461
+
462
+
463
+ /*
464
+ Returns an array of N linearly spaced points between x1 and x2.
465
+ This singleton method is valid not for NArray class itself
466
+ but for typed NArray subclasses, e.g., DFloat, Int64.
467
+
468
+ @overload linspace(x1, x2, [n])
469
+ @param [Numeric] x1 The start value
470
+ @param [Numeric] x2 The end value
471
+ @param [Integer] n The number of elements. (default is 100).
472
+ @return [Cumo::NArray] result array.
473
+
474
+ @example
475
+ a = Cumo::DFloat.linspace(-5,5,7)
476
+ => Cumo::DFloat#shape=[7]
477
+ [-5, -3.33333, -1.66667, 0, 1.66667, 3.33333, 5]
478
+ */
479
+ static VALUE
480
+ na_s_linspace(int argc, VALUE *argv, VALUE klass)
481
+ {
482
+ VALUE obj, vx1, vx2, vstep, vsize;
483
+ double n;
484
+ int narg;
485
+
486
+ narg = rb_scan_args(argc,argv,"21",&vx1,&vx2,&vsize);
487
+ if (narg==3) {
488
+ n = NUM2DBL(vsize);
489
+ } else {
490
+ n = 100;
491
+ vsize = INT2FIX(100);
492
+ }
493
+
494
+ obj = rb_funcall(vx2, '-', 1, vx1);
495
+ vstep = rb_funcall(obj, '/', 1, DBL2NUM(n-1));
496
+
497
+ obj = rb_class_new_instance(1, &vsize, klass);
498
+ return rb_funcall(obj, id_seq, 2, vx1, vstep);
499
+ }
500
+
501
+ /*
502
+ Returns an array of N logarithmically spaced points between 10^a and 10^b.
503
+ This singleton method is valid not for NArray having +logseq+ method,
504
+ i.e., DFloat, SFloat, DComplex, and SComplex.
505
+
506
+ @overload logspace(a, b, [n, base])
507
+ @param [Numeric] a The start value
508
+ @param [Numeric] b The end value
509
+ @param [Integer] n The number of elements. (default is 50)
510
+ @param [Numeric] base The base of log space. (default is 10)
511
+ @return [Cumo::NArray] result array.
512
+
513
+ @example
514
+ Cumo::DFloat.logspace(4,0,5,2)
515
+ => Cumo::DFloat#shape=[5]
516
+ [16, 8, 4, 2, 1]
517
+ Cumo::DComplex.logspace(0,1i*Math::PI,5,Math::E)
518
+ => Cumo::DComplex#shape=[5]
519
+ [1+4.44659e-323i, 0.707107+0.707107i, 6.12323e-17+1i, -0.707107+0.707107i, ...]
520
+ */
521
+ static VALUE
522
+ na_s_logspace(int argc, VALUE *argv, VALUE klass)
523
+ {
524
+ VALUE obj, vx1, vx2, vstep, vsize, vbase;
525
+ double n;
526
+
527
+ rb_scan_args(argc,argv,"22",&vx1,&vx2,&vsize,&vbase);
528
+ if (vsize == Qnil) {
529
+ vsize = INT2FIX(50);
530
+ n = 50;
531
+ } else {
532
+ n = NUM2DBL(vsize);
533
+ }
534
+ if (vbase == Qnil) {
535
+ vbase = DBL2NUM(10);
536
+ }
537
+
538
+ obj = rb_funcall(vx2, '-', 1, vx1);
539
+ vstep = rb_funcall(obj, '/', 1, DBL2NUM(n-1));
540
+
541
+ obj = rb_class_new_instance(1, &vsize, klass);
542
+ return rb_funcall(obj, id_logseq, 3, vx1, vstep, vbase);
543
+ }
544
+
545
+
546
+ /*
547
+ Returns a NArray with shape=(n,n) whose diagonal elements are 1, otherwise 0.
548
+ @overload eye(n)
549
+ @param [Integer] n Size of NArray. Creates 2-D NArray with shape=(n,n)
550
+ @return [Cumo::NArray] created NArray.
551
+ @example
552
+ a = Cumo::DFloat.eye(3)
553
+ => Cumo::DFloat#shape=[3,3]
554
+ [[1, 0, 0],
555
+ [0, 1, 0],
556
+ [0, 0, 1]]
557
+ */
558
+ static VALUE
559
+ na_s_eye(int argc, VALUE *argv, VALUE klass)
560
+ {
561
+ VALUE obj;
562
+ VALUE tmp[2];
563
+
564
+ if (argc==0) {
565
+ rb_raise(rb_eArgError,"No argument");
566
+ }
567
+ else if (argc==1) {
568
+ tmp[0] = tmp[1] = argv[0];
569
+ argv = tmp;
570
+ argc = 2;
571
+ }
572
+ obj = rb_class_new_instance(argc, argv, klass);
573
+ return rb_funcall(obj, id_eye, 0);
574
+ }
575
+
576
+
577
+
578
+ #define READ 1
579
+ #define WRITE 2
580
+
581
+ static char *
582
+ na_get_pointer_for_rw(VALUE self, int flag)
583
+ {
584
+ char *ptr;
585
+ VALUE obj;
586
+ narray_t *na;
587
+
588
+ if ((flag & WRITE) && OBJ_FROZEN(self)) {
589
+ rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
590
+ }
591
+
592
+ GetNArray(self,na);
593
+
594
+ switch(NA_TYPE(na)) {
595
+ case NARRAY_DATA_T:
596
+ ptr = NA_DATA_PTR(na);
597
+ if (NA_SIZE(na) > 0 && ptr == NULL) {
598
+ if (flag & READ) {
599
+ rb_raise(rb_eRuntimeError,"cannot read unallocated NArray");
600
+ }
601
+ if (flag & WRITE) {
602
+ rb_funcall(self, id_allocate, 0);
603
+ ptr = NA_DATA_PTR(na);
604
+ }
605
+ }
606
+ return ptr;
607
+ case NARRAY_VIEW_T:
608
+ obj = NA_VIEW_DATA(na);
609
+ if ((flag & WRITE) && OBJ_FROZEN(obj)) {
610
+ rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
611
+ }
612
+ GetNArray(obj,na);
613
+ switch(NA_TYPE(na)) {
614
+ case NARRAY_DATA_T:
615
+ ptr = NA_DATA_PTR(na);
616
+ if (flag & (READ|WRITE)) {
617
+ if (NA_SIZE(na) > 0 && ptr == NULL) {
618
+ rb_raise(rb_eRuntimeError,"cannot read/write unallocated NArray");
619
+ }
620
+ }
621
+ return ptr;
622
+ default:
623
+ rb_raise(rb_eRuntimeError,"invalid NA_TYPE of view: %d",NA_TYPE(na));
624
+ }
625
+ default:
626
+ rb_raise(rb_eRuntimeError,"invalid NA_TYPE: %d",NA_TYPE(na));
627
+ }
628
+
629
+ return NULL;
630
+ }
631
+
632
+ char *
633
+ na_get_pointer_for_read(VALUE self)
634
+ {
635
+ return na_get_pointer_for_rw(self, READ);
636
+ }
637
+
638
+ char *
639
+ na_get_pointer_for_write(VALUE self)
640
+ {
641
+ return na_get_pointer_for_rw(self, WRITE);
642
+ }
643
+
644
+ char *
645
+ na_get_pointer_for_read_write(VALUE self)
646
+ {
647
+ return na_get_pointer_for_rw(self, READ|WRITE);
648
+ }
649
+
650
+ char *
651
+ na_get_pointer(VALUE self)
652
+ {
653
+ return na_get_pointer_for_rw(self, 0);
654
+ }
655
+
656
+
657
+ void
658
+ na_release_lock(VALUE self)
659
+ {
660
+ narray_t *na;
661
+
662
+ UNSET_LOCK(self);
663
+ GetNArray(self,na);
664
+
665
+ switch(NA_TYPE(na)) {
666
+ case NARRAY_VIEW_T:
667
+ na_release_lock(NA_VIEW_DATA(na));
668
+ break;
669
+ }
670
+ }
671
+
672
+
673
+ /* method: size() -- returns the total number of typeents */
674
+ static VALUE
675
+ na_size(VALUE self)
676
+ {
677
+ narray_t *na;
678
+ GetNArray(self,na);
679
+ return SIZET2NUM(na->size);
680
+ }
681
+
682
+
683
+ /* method: size() -- returns the total number of typeents */
684
+ static VALUE
685
+ na_ndim(VALUE self)
686
+ {
687
+ narray_t *na;
688
+ GetNArray(self,na);
689
+ return INT2NUM(na->ndim);
690
+ }
691
+
692
+
693
+ /*
694
+ Returns true if self.size == 0.
695
+ @overload empty?
696
+ */
697
+ static VALUE
698
+ na_empty_p(VALUE self)
699
+ {
700
+ narray_t *na;
701
+ GetNArray(self,na);
702
+ if (NA_SIZE(na)==0) {
703
+ return Qtrue;
704
+ }
705
+ return Qfalse;
706
+ }
707
+
708
+
709
+ /* method: shape() -- returns shape, array of the size of dimensions */
710
+ static VALUE
711
+ na_shape(VALUE self)
712
+ {
713
+ volatile VALUE v;
714
+ narray_t *na;
715
+ size_t i, n, c, s;
716
+
717
+ GetNArray(self,na);
718
+ n = NA_NDIM(na);
719
+ if (TEST_COLUMN_MAJOR(self)) {
720
+ c = n-1;
721
+ s = -1;
722
+ } else {
723
+ c = 0;
724
+ s = 1;
725
+ }
726
+ v = rb_ary_new2(n);
727
+ for (i=0; i<n; i++) {
728
+ rb_ary_push(v, SIZET2NUM(na->shape[c]));
729
+ c += s;
730
+ }
731
+ return v;
732
+ }
733
+
734
+
735
+ unsigned int
736
+ nary_element_stride(VALUE v)
737
+ {
738
+ narray_type_info_t *info;
739
+ narray_t *na;
740
+
741
+ GetNArray(v,na);
742
+ if (na->type == NARRAY_VIEW_T) {
743
+ v = NA_VIEW_DATA(na);
744
+ GetNArray(v,na);
745
+ }
746
+ assert(na->type == NARRAY_DATA_T);
747
+
748
+ info = (narray_type_info_t *)(RTYPEDDATA_TYPE(v)->data);
749
+ return info->element_stride;
750
+ }
751
+
752
+ size_t
753
+ na_dtype_elmsz(VALUE klass)
754
+ {
755
+ return NUM2SIZET(rb_const_get(klass, id_contiguous_stride));
756
+ }
757
+
758
+ size_t
759
+ na_get_offset(VALUE self)
760
+ {
761
+ narray_t *na;
762
+ GetNArray(self,na);
763
+
764
+ switch(na->type) {
765
+ case NARRAY_DATA_T:
766
+ case NARRAY_FILEMAP_T:
767
+ return 0;
768
+ case NARRAY_VIEW_T:
769
+ return NA_VIEW_OFFSET(na);
770
+ }
771
+ return 0;
772
+ }
773
+
774
+
775
+ void
776
+ na_index_arg_to_internal_order(int argc, VALUE *argv, VALUE self)
777
+ {
778
+ int i,j;
779
+ VALUE tmp;
780
+
781
+ if (TEST_COLUMN_MAJOR(self)) {
782
+ for (i=0,j=argc-1; i<argc/2; i++,j--) {
783
+ tmp = argv[i];
784
+ argv[i] = argv[j];
785
+ argv[j] = tmp;
786
+ }
787
+ }
788
+ }
789
+
790
+ void
791
+ na_copy_flags(VALUE src, VALUE dst)
792
+ {
793
+ narray_t *na1, *na2;
794
+
795
+ GetNArray(src,na1);
796
+ GetNArray(dst,na2);
797
+
798
+ na2->flag[0] = na1->flag[0];
799
+ //na2->flag[1] = NA_FL1_INIT;
800
+
801
+ RBASIC(dst)->flags |= (RBASIC(src)->flags) &
802
+ (FL_USER1|FL_USER2|FL_USER3|FL_USER4|FL_USER5|FL_USER6|FL_USER7);
803
+ }
804
+
805
+
806
+ // fix name, ex, allow_stride_for_flatten_view
807
+ VALUE
808
+ na_check_ladder(VALUE self, int start_dim)
809
+ {
810
+ int i;
811
+ ssize_t st0, st1;
812
+ narray_t *na;
813
+ GetNArray(self,na);
814
+
815
+ if (start_dim < -na->ndim || start_dim >= na->ndim) {
816
+ rb_bug("start_dim (%d) out of range",start_dim);
817
+ }
818
+
819
+ switch(na->type) {
820
+ case NARRAY_DATA_T:
821
+ case NARRAY_FILEMAP_T:
822
+ return Qtrue;
823
+ case NARRAY_VIEW_T:
824
+ // negative dim -> position from last dim
825
+ if (start_dim < 0) {
826
+ start_dim += NA_NDIM(na);
827
+ }
828
+ // not ladder if it has index
829
+ for (i=start_dim; i<NA_NDIM(na); i++) {
830
+ if (NA_IS_INDEX_AT(na,i))
831
+ return Qfalse;
832
+ }
833
+ // check stride
834
+ st0 = NA_STRIDE_AT(na,start_dim);
835
+ for (i=start_dim+1; i<NA_NDIM(na); i++) {
836
+ st1 = NA_STRIDE_AT(na,i);
837
+ if (st0 != (ssize_t)(st1 * NA_SHAPE(na)[i])) {
838
+ return Qfalse;
839
+ }
840
+ st0 = st1;
841
+ }
842
+ }
843
+ return Qtrue;
844
+ }
845
+
846
+ VALUE
847
+ na_check_contiguous(VALUE self)
848
+ {
849
+ ssize_t elmsz;
850
+ narray_t *na;
851
+ GetNArray(self,na);
852
+
853
+ switch(na->type) {
854
+ case NARRAY_DATA_T:
855
+ case NARRAY_FILEMAP_T:
856
+ return Qtrue;
857
+ case NARRAY_VIEW_T:
858
+ if (NA_VIEW_STRIDX(na)==0) {
859
+ return Qtrue;
860
+ }
861
+ if (na_check_ladder(self,0)==Qtrue) {
862
+ elmsz = nary_element_stride(self);
863
+ if (elmsz == NA_STRIDE_AT(na,NA_NDIM(na)-1)) {
864
+ return Qtrue;
865
+ }
866
+ }
867
+ }
868
+ return Qfalse;
869
+ }
870
+
871
+ //----------------------------------------------------------------------
872
+
873
+ /*
874
+ * call-seq:
875
+ * narray.view => narray
876
+ *
877
+ * Return view of NArray
878
+ */
879
+ VALUE
880
+ na_make_view(VALUE self)
881
+ {
882
+ int i, nd;
883
+ size_t j;
884
+ size_t *idx1, *idx2;
885
+ ssize_t stride;
886
+ narray_t *na;
887
+ narray_view_t *na1, *na2;
888
+ volatile VALUE view;
889
+
890
+ GetNArray(self,na);
891
+ nd = na->ndim;
892
+
893
+ view = na_s_allocate_view(CLASS_OF(self));
894
+
895
+ na_copy_flags(self, view);
896
+ GetNArrayView(view, na2);
897
+
898
+ na_setup_shape((narray_t*)na2, nd, na->shape);
899
+ na2->stridx = ALLOC_N(stridx_t,nd);
900
+
901
+ switch(na->type) {
902
+ case NARRAY_DATA_T:
903
+ case NARRAY_FILEMAP_T:
904
+ stride = nary_element_stride(self);
905
+ for (i=nd; i--;) {
906
+ SDX_SET_STRIDE(na2->stridx[i],stride);
907
+ stride *= na->shape[i];
908
+ }
909
+ na2->offset = 0;
910
+ na2->data = self;
911
+ break;
912
+ case NARRAY_VIEW_T:
913
+ GetNArrayView(self, na1);
914
+ for (i=0; i<nd; i++) {
915
+ if (SDX_IS_INDEX(na1->stridx[i])) {
916
+ idx1 = SDX_GET_INDEX(na1->stridx[i]);
917
+ idx2 = ALLOC_N(size_t,na1->base.shape[i]);
918
+ for (j=0; j<na1->base.shape[i]; j++) {
919
+ idx2[j] = idx1[j];
920
+ }
921
+ SDX_SET_INDEX(na2->stridx[i],idx2);
922
+ } else {
923
+ na2->stridx[i] = na1->stridx[i];
924
+ }
925
+ }
926
+ na2->offset = na1->offset;
927
+ na2->data = na1->data;
928
+ break;
929
+ }
930
+
931
+ return view;
932
+ }
933
+
934
+
935
+ //----------------------------------------------------------------------
936
+
937
+ /*
938
+ * call-seq:
939
+ * narray.expand_dims(dim) => narray view
940
+ *
941
+ * Expand the shape of an array. Insert a new axis with size=1
942
+ * at a given dimension.
943
+ * @param [Integer] dim dimension at which new axis is inserted.
944
+ * @return [Cumo::NArray] result narray view.
945
+ */
946
+ static VALUE
947
+ na_expand_dims(VALUE self, VALUE vdim)
948
+ {
949
+ int i, j, nd, dim;
950
+ size_t *shape, *na_shape;
951
+ stridx_t *stridx, *na_stridx;
952
+ narray_t *na;
953
+ narray_view_t *na2;
954
+ VALUE view;
955
+
956
+ GetNArray(self,na);
957
+ nd = na->ndim;
958
+
959
+ dim = NUM2INT(vdim);
960
+ if (dim < -nd-1 || dim > nd) {
961
+ rb_raise(nary_eDimensionError,"invalid axis (%d for %dD NArray)",
962
+ dim,nd);
963
+ }
964
+ if (dim < 0) {
965
+ dim += nd+1;
966
+ }
967
+
968
+ view = na_make_view(self);
969
+ GetNArrayView(view, na2);
970
+
971
+ shape = ALLOC_N(size_t,nd+1);
972
+ stridx = ALLOC_N(stridx_t,nd+1);
973
+ na_shape = na2->base.shape;
974
+ na_stridx = na2->stridx;
975
+
976
+ for (i=j=0; i<=nd; i++) {
977
+ if (i==dim) {
978
+ shape[i] = 1;
979
+ SDX_SET_STRIDE(stridx[i],0);
980
+ } else {
981
+ shape[i] = na_shape[j];
982
+ stridx[i] = na_stridx[j];
983
+ j++;
984
+ }
985
+ }
986
+
987
+ na2->stridx = stridx;
988
+ xfree(na_stridx);
989
+ na2->base.shape = shape;
990
+ if (na_shape != &(na2->base.size)) {
991
+ xfree(na_shape);
992
+ }
993
+ na2->base.ndim++;
994
+ return view;
995
+ }
996
+
997
+ //----------------------------------------------------------------------
998
+
999
+ /*
1000
+ * call-seq:
1001
+ * narray.reverse([dim0,dim1,..]) => narray
1002
+ *
1003
+ * Return reversed view along specified dimeinsion
1004
+ */
1005
+ static VALUE
1006
+ nary_reverse(int argc, VALUE *argv, VALUE self)
1007
+ {
1008
+ int i, nd;
1009
+ size_t j, n;
1010
+ size_t offset;
1011
+ size_t *idx1, *idx2;
1012
+ ssize_t stride;
1013
+ ssize_t sign;
1014
+ narray_t *na;
1015
+ narray_view_t *na1, *na2;
1016
+ VALUE view;
1017
+ VALUE reduce;
1018
+
1019
+ reduce = na_reduce_dimension(argc, argv, 1, &self, 0, 0);
1020
+
1021
+ GetNArray(self,na);
1022
+ nd = na->ndim;
1023
+
1024
+ view = na_s_allocate_view(CLASS_OF(self));
1025
+
1026
+ na_copy_flags(self, view);
1027
+ GetNArrayView(view, na2);
1028
+
1029
+ na_setup_shape((narray_t*)na2, nd, na->shape);
1030
+ na2->stridx = ALLOC_N(stridx_t,nd);
1031
+
1032
+ switch(na->type) {
1033
+ case NARRAY_DATA_T:
1034
+ case NARRAY_FILEMAP_T:
1035
+ stride = nary_element_stride(self);
1036
+ offset = 0;
1037
+ for (i=nd; i--;) {
1038
+ if (na_test_reduce(reduce,i)) {
1039
+ offset += (na->shape[i]-1)*stride;
1040
+ sign = -1;
1041
+ } else {
1042
+ sign = 1;
1043
+ }
1044
+ SDX_SET_STRIDE(na2->stridx[i],stride*sign);
1045
+ stride *= na->shape[i];
1046
+ }
1047
+ na2->offset = offset;
1048
+ na2->data = self;
1049
+ break;
1050
+ case NARRAY_VIEW_T:
1051
+ GetNArrayView(self, na1);
1052
+ offset = na1->offset;
1053
+ for (i=0; i<nd; i++) {
1054
+ n = na1->base.shape[i];
1055
+ if (SDX_IS_INDEX(na1->stridx[i])) {
1056
+ idx1 = SDX_GET_INDEX(na1->stridx[i]);
1057
+ idx2 = ALLOC_N(size_t,n);
1058
+ if (na_test_reduce(reduce,i)) {
1059
+ for (j=0; j<n; j++) {
1060
+ idx2[n-1-j] = idx1[j];
1061
+ }
1062
+ } else {
1063
+ for (j=0; j<n; j++) {
1064
+ idx2[j] = idx1[j];
1065
+ }
1066
+ }
1067
+ SDX_SET_INDEX(na2->stridx[i],idx2);
1068
+ } else {
1069
+ stride = SDX_GET_STRIDE(na1->stridx[i]);
1070
+ if (na_test_reduce(reduce,i)) {
1071
+ offset += (n-1)*stride;
1072
+ SDX_SET_STRIDE(na2->stridx[i],-stride);
1073
+ } else {
1074
+ na2->stridx[i] = na1->stridx[i];
1075
+ }
1076
+ }
1077
+ }
1078
+ na2->offset = offset;
1079
+ na2->data = na1->data;
1080
+ break;
1081
+ }
1082
+
1083
+ return view;
1084
+ }
1085
+
1086
+ //----------------------------------------------------------------------
1087
+
1088
+ VALUE
1089
+ cumo_na_upcast(VALUE type1, VALUE type2)
1090
+ {
1091
+ VALUE upcast_hash;
1092
+ VALUE result_type;
1093
+
1094
+ if (type1==type2) {
1095
+ return type1;
1096
+ }
1097
+ upcast_hash = rb_const_get(type1, id_UPCAST);
1098
+ result_type = rb_hash_aref(upcast_hash, type2);
1099
+ if (NIL_P(result_type)) {
1100
+ if (TYPE(type2)==T_CLASS) {
1101
+ if (RTEST(rb_class_inherited_p(type2,cNArray))) {
1102
+ upcast_hash = rb_const_get(type2, id_UPCAST);
1103
+ result_type = rb_hash_aref(upcast_hash, type1);
1104
+ }
1105
+ }
1106
+ }
1107
+ return result_type;
1108
+ }
1109
+
1110
+ /*
1111
+ Returns an array containing other and self,
1112
+ both are converted to upcasted type of NArray.
1113
+ Note that NArray has distinct UPCAST mechanism.
1114
+ Coerce is used for operation between non-NArray and NArray.
1115
+ @overload coerce(other)
1116
+ @param [Object] other numeric object.
1117
+ @return [Array] NArray-casted [other,self]
1118
+ */
1119
+ static VALUE
1120
+ nary_coerce(VALUE x, VALUE y)
1121
+ {
1122
+ VALUE type;
1123
+
1124
+ type = cumo_na_upcast(CLASS_OF(x), CLASS_OF(y));
1125
+ y = rb_funcall(type,id_cast,1,y);
1126
+ return rb_assoc_new(y , x);
1127
+ }
1128
+
1129
+
1130
+ /*
1131
+ Returns total byte size of NArray.
1132
+ @return [Integer] byte size.
1133
+ */
1134
+ static VALUE
1135
+ nary_byte_size(VALUE self)
1136
+ {
1137
+ VALUE velmsz;
1138
+ narray_t *na;
1139
+
1140
+ GetNArray(self,na);
1141
+ velmsz = rb_const_get(CLASS_OF(self), id_element_byte_size);
1142
+ if (FIXNUM_P(velmsz)) {
1143
+ return SIZET2NUM(NUM2SIZET(velmsz) * na->size);
1144
+ }
1145
+ return SIZET2NUM(ceil(NUM2DBL(velmsz) * na->size));
1146
+ }
1147
+
1148
+ /*
1149
+ Returns byte size of one element of NArray.
1150
+ @return [Numeric] byte size.
1151
+ */
1152
+ static VALUE
1153
+ nary_s_byte_size(VALUE type)
1154
+ {
1155
+ return rb_const_get(type, id_element_byte_size);
1156
+ }
1157
+
1158
+
1159
+ /*
1160
+ Returns a new 1-D array initialized from binary raw data in a string.
1161
+ @overload from_binary(string,[shape])
1162
+ @param [String] string Binary raw data.
1163
+ @param [Array] shape array of integers representing array shape.
1164
+ @return [Cumo::NArray] NArray containing binary data.
1165
+ */
1166
+ static VALUE
1167
+ nary_s_from_binary(int argc, VALUE *argv, VALUE type)
1168
+ {
1169
+ size_t len, str_len, byte_size;
1170
+ size_t *shape;
1171
+ char *ptr;
1172
+ int i, nd, narg;
1173
+ VALUE vstr, vshape, vna;
1174
+ VALUE velmsz;
1175
+
1176
+ narg = rb_scan_args(argc,argv,"11",&vstr,&vshape);
1177
+ Check_Type(vstr,T_STRING);
1178
+ str_len = RSTRING_LEN(vstr);
1179
+ velmsz = rb_const_get(type, id_element_byte_size);
1180
+ if (narg==2) {
1181
+ switch(TYPE(vshape)) {
1182
+ case T_FIXNUM:
1183
+ nd = 1;
1184
+ len = NUM2SIZET(vshape);
1185
+ shape = &len;
1186
+ break;
1187
+ case T_ARRAY:
1188
+ nd = RARRAY_LEN(vshape);
1189
+ if (nd == 0 || nd > NA_MAX_DIMENSION) {
1190
+ rb_raise(nary_eDimensionError,"too long or empty shape (%d)", nd);
1191
+ }
1192
+ shape = ALLOCA_N(size_t,nd);
1193
+ len = 1;
1194
+ for (i=0; i<nd; ++i) {
1195
+ len *= shape[i] = NUM2SIZET(RARRAY_AREF(vshape,i));
1196
+ }
1197
+ break;
1198
+ default:
1199
+ rb_raise(rb_eArgError,"second argument must be size or shape");
1200
+ }
1201
+ if (FIXNUM_P(velmsz)) {
1202
+ byte_size = len * NUM2SIZET(velmsz);
1203
+ } else {
1204
+ byte_size = ceil(len * NUM2DBL(velmsz));
1205
+ }
1206
+ if (byte_size > str_len) {
1207
+ rb_raise(rb_eArgError, "specified size is too large");
1208
+ }
1209
+ } else {
1210
+ nd = 1;
1211
+ if (FIXNUM_P(velmsz)) {
1212
+ len = str_len / NUM2SIZET(velmsz);
1213
+ byte_size = len * NUM2SIZET(velmsz);
1214
+ } else {
1215
+ len = floor(str_len / NUM2DBL(velmsz));
1216
+ byte_size = str_len;
1217
+ }
1218
+ if (len == 0) {
1219
+ rb_raise(rb_eArgError, "string is empty or too short");
1220
+ }
1221
+ shape = ALLOCA_N(size_t,nd);
1222
+ shape[0] = len;
1223
+ }
1224
+
1225
+ vna = nary_new(type, nd, shape);
1226
+ ptr = na_get_pointer_for_write(vna);
1227
+
1228
+ memcpy(ptr, RSTRING_PTR(vstr), byte_size);
1229
+
1230
+ return vna;
1231
+ }
1232
+
1233
+ /*
1234
+ Returns a new 1-D array initialized from binary raw data in a string.
1235
+ @overload store_binary(string,[offset])
1236
+ @param [String] string Binary raw data.
1237
+ @param [Integer] (optional) offset Byte offset in string.
1238
+ @return [Integer] stored length.
1239
+ */
1240
+ static VALUE
1241
+ nary_store_binary(int argc, VALUE *argv, VALUE self)
1242
+ {
1243
+ size_t size, str_len, byte_size, offset;
1244
+ char *ptr;
1245
+ int narg;
1246
+ VALUE vstr, voffset;
1247
+ VALUE velmsz;
1248
+ narray_t *na;
1249
+
1250
+ narg = rb_scan_args(argc,argv,"11",&vstr,&voffset);
1251
+ str_len = RSTRING_LEN(vstr);
1252
+ if (narg==2) {
1253
+ offset = NUM2SIZET(voffset);
1254
+ if (str_len < offset) {
1255
+ rb_raise(rb_eArgError, "offset is larger than string length");
1256
+ }
1257
+ str_len -= offset;
1258
+ } else {
1259
+ offset = 0;
1260
+ }
1261
+
1262
+ GetNArray(self,na);
1263
+ size = NA_SIZE(na);
1264
+ velmsz = rb_const_get(CLASS_OF(self), id_element_byte_size);
1265
+ if (FIXNUM_P(velmsz)) {
1266
+ byte_size = size * NUM2SIZET(velmsz);
1267
+ } else {
1268
+ byte_size = ceil(size * NUM2DBL(velmsz));
1269
+ }
1270
+ if (byte_size > str_len) {
1271
+ rb_raise(rb_eArgError, "string is too short to store");
1272
+ }
1273
+
1274
+ ptr = na_get_pointer_for_write(self);
1275
+ memcpy(ptr, RSTRING_PTR(vstr)+offset, byte_size);
1276
+
1277
+ return SIZET2NUM(byte_size);
1278
+ }
1279
+
1280
+ /*
1281
+ Returns string containing the raw data bytes in NArray.
1282
+ @overload to_binary()
1283
+ @return [String] String object containing binary raw data.
1284
+ */
1285
+ static VALUE
1286
+ nary_to_binary(VALUE self)
1287
+ {
1288
+ size_t len, offset=0;
1289
+ char *ptr;
1290
+ VALUE str;
1291
+ narray_t *na;
1292
+
1293
+ SHOW_SYNCHRONIZE_WARNING_ONCE("nary_to_binary", "any");
1294
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
1295
+
1296
+ GetNArray(self,na);
1297
+ if (na->type == NARRAY_VIEW_T) {
1298
+ if (na_check_contiguous(self)==Qtrue) {
1299
+ offset = NA_VIEW_OFFSET(na);
1300
+ } else {
1301
+ self = rb_funcall(self,id_dup,0);
1302
+ }
1303
+ }
1304
+ len = NUM2SIZET(nary_byte_size(self));
1305
+ ptr = na_get_pointer_for_read(self);
1306
+ str = rb_usascii_str_new(ptr+offset,len);
1307
+ RB_GC_GUARD(self);
1308
+ return str;
1309
+ }
1310
+
1311
+ /*
1312
+ Dump marshal data.
1313
+ @overload marshal_dump()
1314
+ @return [Array] Array containing marshal data.
1315
+ */
1316
+ static VALUE
1317
+ nary_marshal_dump(VALUE self)
1318
+ {
1319
+ VALUE a;
1320
+
1321
+ SHOW_SYNCHRONIZE_WARNING_ONCE("nary_marshal_dump", "any");
1322
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
1323
+
1324
+ a = rb_ary_new();
1325
+ rb_ary_push(a, INT2FIX(1)); // version
1326
+ rb_ary_push(a, na_shape(self));
1327
+ rb_ary_push(a, INT2FIX(NA_FLAG0(self)));
1328
+ if (CLASS_OF(self) == cumo_cRObject) {
1329
+ narray_t *na;
1330
+ VALUE *ptr;
1331
+ size_t offset=0;
1332
+ GetNArray(self,na);
1333
+ if (na->type == NARRAY_VIEW_T) {
1334
+ if (na_check_contiguous(self)==Qtrue) {
1335
+ offset = NA_VIEW_OFFSET(na);
1336
+ } else {
1337
+ self = rb_funcall(self,id_dup,0);
1338
+ }
1339
+ }
1340
+ ptr = (VALUE*)na_get_pointer_for_read(self);
1341
+ rb_ary_push(a, rb_ary_new4(NA_SIZE(na), ptr+offset));
1342
+ } else {
1343
+ rb_ary_push(a, nary_to_binary(self));
1344
+ }
1345
+ RB_GC_GUARD(self);
1346
+ return a;
1347
+ }
1348
+
1349
+ static VALUE na_inplace( VALUE self );
1350
+ /*
1351
+ Load marshal data.
1352
+ @overload marshal_load(data)
1353
+ @params [Array] Array containing marshal data.
1354
+ @return [nil]
1355
+ */
1356
+ static VALUE
1357
+ nary_marshal_load(VALUE self, VALUE a)
1358
+ {
1359
+ VALUE v;
1360
+
1361
+ if (TYPE(a) != T_ARRAY) {
1362
+ rb_raise(rb_eArgError,"marshal argument should be array");
1363
+ }
1364
+ if (RARRAY_LEN(a) != 4) {
1365
+ rb_raise(rb_eArgError,"marshal array size should be 4");
1366
+ }
1367
+ if (RARRAY_AREF(a,0) != INT2FIX(1)) {
1368
+ rb_raise(rb_eArgError,"NArray marshal version %d is not supported "
1369
+ "(only version 1)", NUM2INT(RARRAY_AREF(a,0)));
1370
+ }
1371
+ na_initialize(self,RARRAY_AREF(a,1));
1372
+ NA_FL0_SET(self,FIX2INT(RARRAY_AREF(a,2)));
1373
+ v = RARRAY_AREF(a,3);
1374
+ if (CLASS_OF(self) == cumo_cRObject) {
1375
+ narray_t *na;
1376
+ char *ptr;
1377
+ if (TYPE(v) != T_ARRAY) {
1378
+ rb_raise(rb_eArgError,"RObject content should be array");
1379
+ }
1380
+ GetNArray(self,na);
1381
+ if (RARRAY_LEN(v) != (long)NA_SIZE(na)) {
1382
+ rb_raise(rb_eArgError,"RObject content size mismatch");
1383
+ }
1384
+ ptr = na_get_pointer_for_write(self);
1385
+ memcpy(ptr, RARRAY_PTR(v), NA_SIZE(na)*sizeof(VALUE));
1386
+ } else {
1387
+ nary_store_binary(1,&v,self);
1388
+ if (TEST_BYTE_SWAPPED(self)) {
1389
+ rb_funcall(na_inplace(self),id_to_host,0);
1390
+ REVERSE_ENDIAN(self); // correct behavior??
1391
+ }
1392
+ }
1393
+ RB_GC_GUARD(a);
1394
+ return self;
1395
+ }
1396
+
1397
+
1398
+ /*
1399
+ Cast self to another NArray datatype.
1400
+ @overload cast_to(datatype)
1401
+ @param [Class] datatype NArray datatype.
1402
+ @return [Cumo::NArray]
1403
+ */
1404
+ static VALUE
1405
+ nary_cast_to(VALUE obj, VALUE type)
1406
+ {
1407
+ return rb_funcall(type, id_cast, 1, obj);
1408
+ }
1409
+
1410
+
1411
+
1412
+ // reduce is dimension indicies to reduce in reduction kernel (in bits), e.g., for an array of shape:
1413
+ // [2,3,4], 111b for sum(), 010b for sum(axis: 1), 110b for sum(axis: [1,2])
1414
+ bool
1415
+ na_test_reduce(VALUE reduce, int dim)
1416
+ {
1417
+ size_t m;
1418
+
1419
+ if (!RTEST(reduce))
1420
+ return 0;
1421
+ if (FIXNUM_P(reduce)) {
1422
+ m = FIX2LONG(reduce);
1423
+ if (m==0) return 1;
1424
+ return (m & (1u<<dim)) ? 1 : 0;
1425
+ } else {
1426
+ return (rb_funcall(reduce,id_bracket,1,INT2FIX(dim))==INT2FIX(1)) ?
1427
+ 1 : 0 ;
1428
+ }
1429
+ }
1430
+
1431
+
1432
+ static VALUE
1433
+ na_get_reduce_flag_from_narray(int naryc, VALUE *naryv, int *max_arg)
1434
+ {
1435
+ int ndim, ndim0;
1436
+ int rowmaj;
1437
+ int i;
1438
+ size_t j;
1439
+ narray_t *na;
1440
+ VALUE reduce;
1441
+
1442
+ if (naryc<1) {
1443
+ rb_raise(rb_eRuntimeError,"must be positive: naryc=%d", naryc);
1444
+ }
1445
+ GetNArray(naryv[0],na);
1446
+ if (na->size==0) {
1447
+ rb_raise(nary_eShapeError,"cannot reduce empty NArray");
1448
+ }
1449
+ reduce = na->reduce;
1450
+ ndim = ndim0 = na->ndim;
1451
+ if (max_arg) *max_arg = 0;
1452
+ rowmaj = TEST_COLUMN_MAJOR(naryv[0]);
1453
+ for (i=0; i<naryc; i++) {
1454
+ GetNArray(naryv[i],na);
1455
+ if (na->size==0) {
1456
+ rb_raise(nary_eShapeError,"cannot reduce empty NArray");
1457
+ }
1458
+ if (TEST_COLUMN_MAJOR(naryv[i]) != rowmaj) {
1459
+ rb_raise(nary_eDimensionError,"dimension order is different");
1460
+ }
1461
+ if (na->ndim > ndim) { // maximum dimension
1462
+ ndim = na->ndim;
1463
+ if (max_arg) *max_arg = i;
1464
+ }
1465
+ }
1466
+ if (ndim != ndim0) {
1467
+ j = NUM2SIZET(reduce) << (ndim-ndim0);
1468
+ reduce = SIZET2NUM(j);
1469
+ }
1470
+ return reduce;
1471
+ }
1472
+
1473
+
1474
+ static VALUE
1475
+ na_get_reduce_flag_from_axes(VALUE na_obj, VALUE axes)
1476
+ {
1477
+ int i, r;
1478
+ int ndim, rowmaj;
1479
+ long narg;
1480
+ size_t j;
1481
+ size_t len;
1482
+ ssize_t beg, step;
1483
+ VALUE v;
1484
+ size_t m;
1485
+ VALUE reduce;
1486
+ narray_t *na;
1487
+
1488
+ GetNArray(na_obj,na);
1489
+ ndim = na->ndim;
1490
+ rowmaj = TEST_COLUMN_MAJOR(na_obj);
1491
+
1492
+ m = 0;
1493
+ reduce = Qnil;
1494
+ narg = RARRAY_LEN(axes);
1495
+ for (i=0; i<narg; i++) {
1496
+ v = RARRAY_AREF(axes,i);
1497
+ //printf("argv[%d]=",i);rb_p(v);
1498
+ if (TYPE(v)==T_FIXNUM) {
1499
+ beg = FIX2INT(v);
1500
+ if (beg<0) beg+=ndim;
1501
+ if (beg>=ndim || beg<0) {
1502
+ rb_raise(nary_eDimensionError,"dimension is out of range");
1503
+ }
1504
+ len = 1;
1505
+ step = 0;
1506
+ //printf("beg=%d step=%d len=%d\n",beg,step,len);
1507
+ } else if (rb_obj_is_kind_of(v,rb_cRange) ||
1508
+ rb_obj_is_kind_of(v,na_cStep)) {
1509
+ nary_step_array_index( v, ndim, &len, &beg, &step );
1510
+ } else {
1511
+ rb_raise(nary_eDimensionError, "invalid dimension argument %s",
1512
+ rb_obj_classname(v));
1513
+ }
1514
+ for (j=0; j<len; j++) {
1515
+ r = beg + step*j;
1516
+ if (rowmaj) {
1517
+ r = ndim-1-r;
1518
+ }
1519
+ if (reduce==Qnil) {
1520
+ if ( r < (ssize_t)sizeof(size_t) ) {
1521
+ m |= ((size_t)1) << r;
1522
+ continue;
1523
+ } else {
1524
+ reduce = SIZET2NUM(m);
1525
+ }
1526
+ }
1527
+ v = rb_funcall( INT2FIX(1), id_shift_left, 1, INT2FIX(r) );
1528
+ reduce = rb_funcall( reduce, '|', 1, v );
1529
+ }
1530
+ }
1531
+ if (NIL_P(reduce)) reduce = SIZET2NUM(m);
1532
+ return reduce;
1533
+ }
1534
+
1535
+
1536
+ VALUE
1537
+ nary_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
1538
+ ndfunc_t *ndf)
1539
+ {
1540
+ int max_arg;
1541
+ VALUE reduce;
1542
+
1543
+ // option: axis
1544
+ if (opts[0] != Qundef && RTEST(opts[0])) {
1545
+ if (!NIL_P(axes)) {
1546
+ rb_raise(rb_eArgError,
1547
+ "cannot specify axis-arguments and axis-keyword simultaneously");
1548
+ }
1549
+ if (TYPE(opts[0]) == T_ARRAY) {
1550
+ axes = opts[0];
1551
+ } else {
1552
+ axes = rb_ary_new3(1,opts[0]);
1553
+ }
1554
+ }
1555
+ if (ndf) {
1556
+ // option: keepdims
1557
+ if (opts[1] != Qundef) {
1558
+ if (RTEST(opts[1]))
1559
+ ndf->flag |= NDF_KEEP_DIM;
1560
+ }
1561
+ }
1562
+
1563
+ reduce = na_get_reduce_flag_from_narray(naryc, naryv, &max_arg);
1564
+
1565
+ if (NIL_P(axes)) return reduce;
1566
+
1567
+ return na_get_reduce_flag_from_axes(naryv[max_arg], axes);
1568
+ }
1569
+
1570
+
1571
+ VALUE
1572
+ nary_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
1573
+ ndfunc_t *ndf, na_iter_func_t iter_nan)
1574
+ {
1575
+ long narg;
1576
+ VALUE axes;
1577
+ VALUE kw_hash = Qnil;
1578
+ ID kw_table[3] = {id_axis,id_keepdims,id_nan};
1579
+ VALUE opts[3] = {Qundef,Qundef,Qundef};
1580
+
1581
+ narg = rb_scan_args(argc, argv, "*:", &axes, &kw_hash);
1582
+ rb_get_kwargs(kw_hash, kw_table, 0, 3, opts);
1583
+
1584
+ if (ndf) {
1585
+ // option: nan
1586
+ if (iter_nan && opts[2] != Qundef) {
1587
+ if (RTEST(opts[2]))
1588
+ ndf->func = iter_nan; // replace to nan-aware iterator function
1589
+ }
1590
+ }
1591
+
1592
+ return na_reduce_options((narg)?axes:Qnil, opts, naryc, naryv, ndf);
1593
+ }
1594
+
1595
+ /*
1596
+ Return true if column major.
1597
+ */
1598
+ static VALUE na_column_major_p( VALUE self )
1599
+ {
1600
+ if (TEST_COLUMN_MAJOR(self))
1601
+ return Qtrue;
1602
+ else
1603
+ return Qfalse;
1604
+ }
1605
+
1606
+ /*
1607
+ Return true if row major.
1608
+ */
1609
+ static VALUE na_row_major_p( VALUE self )
1610
+ {
1611
+ if (TEST_ROW_MAJOR(self))
1612
+ return Qtrue;
1613
+ else
1614
+ return Qfalse;
1615
+ }
1616
+
1617
+
1618
+ /*
1619
+ Return true if byte swapped.
1620
+ */
1621
+ static VALUE na_byte_swapped_p( VALUE self )
1622
+ {
1623
+ if (TEST_BYTE_SWAPPED(self))
1624
+ return Qtrue;
1625
+ return Qfalse;
1626
+ }
1627
+
1628
+ /*
1629
+ Return true if not byte swapped.
1630
+ */
1631
+ static VALUE na_host_order_p( VALUE self )
1632
+ {
1633
+ if (TEST_BYTE_SWAPPED(self))
1634
+ return Qfalse;
1635
+ return Qtrue;
1636
+ }
1637
+
1638
+
1639
+ /*
1640
+ Returns view of narray with inplace flagged.
1641
+ @return [Cumo::NArray] view of narray with inplace flag.
1642
+ */
1643
+ static VALUE na_inplace( VALUE self )
1644
+ {
1645
+ VALUE view = self;
1646
+ view = na_make_view(self);
1647
+ SET_INPLACE(view);
1648
+ return view;
1649
+ }
1650
+
1651
+ /*
1652
+ Set inplace flag to self.
1653
+ @return [Cumo::NArray] self
1654
+ */
1655
+ static VALUE na_inplace_bang( VALUE self )
1656
+ {
1657
+ SET_INPLACE(self);
1658
+ return self;
1659
+ }
1660
+
1661
+ /*
1662
+ Return true if inplace flagged.
1663
+ */
1664
+ static VALUE na_inplace_p( VALUE self )
1665
+ {
1666
+ if (TEST_INPLACE(self))
1667
+ return Qtrue;
1668
+ else
1669
+ return Qfalse;
1670
+ }
1671
+
1672
+ /*
1673
+ Unset inplace flag to self.
1674
+ @return [Cumo::NArray] self
1675
+ */
1676
+ static VALUE na_out_of_place_bang( VALUE self )
1677
+ {
1678
+ UNSET_INPLACE(self);
1679
+ return self;
1680
+ }
1681
+
1682
+ int na_debug_flag=0;
1683
+
1684
+ static VALUE na_debug_set(VALUE mod, VALUE flag)
1685
+ {
1686
+ na_debug_flag = RTEST(flag);
1687
+ return Qnil;
1688
+ }
1689
+
1690
+ static double na_profile_value=0;
1691
+
1692
+ static VALUE na_profile(VALUE mod)
1693
+ {
1694
+ return rb_float_new(na_profile_value);
1695
+ }
1696
+
1697
+ static VALUE na_profile_set(VALUE mod, VALUE val)
1698
+ {
1699
+ na_profile_value = NUM2DBL(val);
1700
+ return val;
1701
+ }
1702
+
1703
+
1704
+ /*
1705
+ Returns the number of rows used for NArray#inspect
1706
+ @overload inspect_rows
1707
+ @return [Integer or nil] the number of rows.
1708
+ */
1709
+ static VALUE na_inspect_rows(VALUE mod)
1710
+ {
1711
+ if (cumo_na_inspect_rows > 0) {
1712
+ return INT2NUM(cumo_na_inspect_rows);
1713
+ } else {
1714
+ return Qnil;
1715
+ }
1716
+ }
1717
+
1718
+ /*
1719
+ Set the number of rows used for NArray#inspect
1720
+ @overload inspect_rows=(rows)
1721
+ @param [Integer or nil] rows the number of rows
1722
+ @return [nil]
1723
+ */
1724
+ static VALUE na_inspect_rows_set(VALUE mod, VALUE num)
1725
+ {
1726
+ if (RTEST(num)) {
1727
+ cumo_na_inspect_rows = NUM2INT(num);
1728
+ } else {
1729
+ cumo_na_inspect_rows = 0;
1730
+ }
1731
+ return Qnil;
1732
+ }
1733
+
1734
+ /*
1735
+ Returns the number of cols used for NArray#inspect
1736
+ @overload inspect_cols
1737
+ @return [Integer or nil] the number of cols.
1738
+ */
1739
+ static VALUE na_inspect_cols(VALUE mod)
1740
+ {
1741
+ if (cumo_na_inspect_cols > 0) {
1742
+ return INT2NUM(cumo_na_inspect_cols);
1743
+ } else {
1744
+ return Qnil;
1745
+ }
1746
+ }
1747
+
1748
+ /*
1749
+ Set the number of cols used for NArray#inspect
1750
+ @overload inspect_cols=(cols)
1751
+ @param [Integer or nil] cols the number of cols
1752
+ @return [nil]
1753
+ */
1754
+ static VALUE na_inspect_cols_set(VALUE mod, VALUE num)
1755
+ {
1756
+ if (RTEST(num)) {
1757
+ cumo_na_inspect_cols = NUM2INT(num);
1758
+ } else {
1759
+ cumo_na_inspect_cols = 0;
1760
+ }
1761
+ return Qnil;
1762
+ }
1763
+
1764
+
1765
+ /*
1766
+ Equality of self and other in view of numerical array.
1767
+ i.e., both arrays have same shape and corresponding elements are equal.
1768
+ @overload == other
1769
+ @param [Object] other
1770
+ @return [Boolean] true if self and other is equal.
1771
+ */
1772
+ static VALUE
1773
+ na_equal(VALUE self, volatile VALUE other)
1774
+ {
1775
+ volatile VALUE vbool;
1776
+ narray_t *na1, *na2;
1777
+ int i;
1778
+
1779
+ GetNArray(self,na1);
1780
+
1781
+ if (!rb_obj_is_kind_of(other,cNArray)) {
1782
+ other = rb_funcall(CLASS_OF(self), id_cast, 1, other);
1783
+ }
1784
+
1785
+ GetNArray(other,na2);
1786
+ if (na1->ndim != na2->ndim) {
1787
+ return Qfalse;
1788
+ }
1789
+ for (i=0; i<na1->ndim; i++) {
1790
+ if (na1->shape[i] != na2->shape[i]) {
1791
+ return Qfalse;
1792
+ }
1793
+ }
1794
+ vbool = rb_funcall(self, id_eq, 1, other);
1795
+ return (rb_funcall(vbool, id_count_false_cpu, 0)==INT2FIX(0)) ? Qtrue : Qfalse;
1796
+ }
1797
+
1798
+ /*
1799
+ Free data memory explicitly without waiting GC.
1800
+
1801
+ @return [Boolean] true if free
1802
+ */
1803
+ VALUE
1804
+ cumo_na_free_data(VALUE self)
1805
+ {
1806
+ narray_t *na;
1807
+ GetNArray(self, na);
1808
+
1809
+ if (na->type == NARRAY_DATA_T) {
1810
+ void *ptr = NA_DATA_PTR(na);
1811
+ if (ptr != NULL) {
1812
+ if (cumo_cuda_runtime_is_device_memory(ptr)) {
1813
+ cumo_cuda_runtime_free(ptr);
1814
+ } else {
1815
+ xfree(ptr);
1816
+ }
1817
+ NA_DATA_PTR(na) = NULL;
1818
+ return Qtrue;
1819
+ }
1820
+ }
1821
+
1822
+ return Qfalse;
1823
+ }
1824
+
1825
+ /* initialization of NArray Class */
1826
+ void
1827
+ Init_cumo_narray()
1828
+ {
1829
+ mCumo = rb_define_module("Cumo");
1830
+
1831
+ /*
1832
+ Document-class: Cumo::NArray
1833
+
1834
+ Cumo::NArray is the abstract super class for
1835
+ Numerical N-dimensional Array in the Ruby/Cumo module.
1836
+ Use Typed Subclasses of NArray (Cumo::DFloat, Int32, etc)
1837
+ to create data array instances.
1838
+ */
1839
+ cNArray = rb_define_class_under(mCumo, "NArray", rb_cObject);
1840
+
1841
+ #ifndef HAVE_RB_CCOMPLEX
1842
+ rb_require("complex");
1843
+ rb_cComplex = rb_const_get(rb_cObject, rb_intern("Complex"));
1844
+ #endif
1845
+
1846
+ rb_define_const(cNArray, "VERSION", rb_str_new2(CUMO_VERSION));
1847
+
1848
+ nary_eCastError = rb_define_class_under(cNArray, "CastError", rb_eStandardError);
1849
+ nary_eShapeError = rb_define_class_under(cNArray, "ShapeError", rb_eStandardError);
1850
+ nary_eOperationError = rb_define_class_under(cNArray, "OperationError", rb_eStandardError);
1851
+ nary_eDimensionError = rb_define_class_under(cNArray, "DimensionError", rb_eStandardError);
1852
+ nary_eValueError = rb_define_class_under(cNArray, "ValueError", rb_eStandardError);
1853
+
1854
+ rb_define_singleton_method(cNArray, "debug=", na_debug_set, 1);
1855
+ rb_define_singleton_method(cNArray, "profile", na_profile, 0);
1856
+ rb_define_singleton_method(cNArray, "profile=", na_profile_set, 1);
1857
+
1858
+ rb_define_singleton_method(cNArray, "inspect_rows", na_inspect_rows, 0);
1859
+ rb_define_singleton_method(cNArray, "inspect_rows=", na_inspect_rows_set, 1);
1860
+ rb_define_singleton_method(cNArray, "inspect_cols", na_inspect_cols, 0);
1861
+ rb_define_singleton_method(cNArray, "inspect_cols=", na_inspect_cols_set, 1);
1862
+
1863
+ /* Ruby allocation framework */
1864
+ rb_undef_alloc_func(cNArray);
1865
+ rb_define_method(cNArray, "initialize", na_initialize, -2);
1866
+ rb_define_method(cNArray, "initialize_copy", na_initialize_copy, 1);
1867
+
1868
+ rb_define_method(cNArray, "free", cumo_na_free_data, 0);
1869
+
1870
+ rb_define_singleton_method(cNArray, "zeros", na_s_zeros, -1);
1871
+ rb_define_singleton_method(cNArray, "ones", na_s_ones, -1);
1872
+ rb_define_singleton_method(cNArray, "linspace", na_s_linspace, -1);
1873
+ rb_define_singleton_method(cNArray, "logspace", na_s_logspace, -1);
1874
+ rb_define_singleton_method(cNArray, "eye", na_s_eye, -1);
1875
+
1876
+ rb_define_method(cNArray, "size", na_size, 0);
1877
+ rb_define_alias (cNArray, "length","size");
1878
+ rb_define_alias (cNArray, "total","size");
1879
+ rb_define_method(cNArray, "shape", na_shape, 0);
1880
+ rb_define_method(cNArray, "ndim", na_ndim,0);
1881
+ rb_define_alias (cNArray, "rank","ndim");
1882
+ rb_define_method(cNArray, "empty?", na_empty_p, 0);
1883
+
1884
+ rb_define_method(cNArray, "debug_info", nary_debug_info, 0);
1885
+
1886
+ rb_define_method(cNArray, "contiguous?", na_check_contiguous, 0);
1887
+
1888
+ rb_define_method(cNArray, "view", na_make_view, 0);
1889
+ rb_define_method(cNArray, "expand_dims", na_expand_dims, 1);
1890
+ rb_define_method(cNArray, "reverse", nary_reverse, -1);
1891
+
1892
+ rb_define_singleton_method(cNArray, "upcast", cumo_na_upcast, 1);
1893
+ rb_define_singleton_method(cNArray, "byte_size", nary_s_byte_size, 0);
1894
+
1895
+ rb_define_singleton_method(cNArray, "from_binary", nary_s_from_binary, -1);
1896
+ rb_define_alias (rb_singleton_class(cNArray), "from_string", "from_binary");
1897
+ rb_define_method(cNArray, "store_binary", nary_store_binary, -1);
1898
+ rb_define_method(cNArray, "to_binary", nary_to_binary, 0);
1899
+ rb_define_alias (cNArray, "to_string", "to_binary");
1900
+ rb_define_method(cNArray, "marshal_dump", nary_marshal_dump, 0);
1901
+ rb_define_method(cNArray, "marshal_load", nary_marshal_load, 1);
1902
+
1903
+ rb_define_method(cNArray, "byte_size", nary_byte_size, 0);
1904
+
1905
+ rb_define_method(cNArray, "cast_to", nary_cast_to, 1);
1906
+
1907
+ rb_define_method(cNArray, "coerce", nary_coerce, 1);
1908
+
1909
+ rb_define_method(cNArray, "column_major?", na_column_major_p, 0);
1910
+ rb_define_method(cNArray, "row_major?", na_row_major_p, 0);
1911
+ rb_define_method(cNArray, "byte_swapped?", na_byte_swapped_p, 0);
1912
+ rb_define_method(cNArray, "host_order?", na_host_order_p, 0);
1913
+
1914
+ rb_define_method(cNArray, "inplace", na_inplace, 0);
1915
+ rb_define_method(cNArray, "inplace?", na_inplace_p, 0);
1916
+ rb_define_method(cNArray, "inplace!", na_inplace_bang, 0);
1917
+ rb_define_method(cNArray, "out_of_place!", na_out_of_place_bang, 0);
1918
+ rb_define_alias (cNArray, "not_inplace!", "out_of_place!");
1919
+
1920
+ rb_define_method(cNArray, "==", na_equal, 1);
1921
+
1922
+ id_allocate = rb_intern("allocate");
1923
+ id_contiguous_stride = rb_intern(CONTIGUOUS_STRIDE);
1924
+ //id_element_bit_size = rb_intern(ELEMENT_BIT_SIZE);
1925
+ id_element_byte_size = rb_intern(ELEMENT_BYTE_SIZE);
1926
+
1927
+ id_fill = rb_intern("fill");
1928
+ id_seq = rb_intern("seq");
1929
+ id_logseq = rb_intern("logseq");
1930
+ id_eye = rb_intern("eye");
1931
+ id_UPCAST = rb_intern("UPCAST");
1932
+ id_cast = rb_intern("cast");
1933
+ id_dup = rb_intern("dup");
1934
+ id_to_host = rb_intern("to_host");
1935
+ id_bracket = rb_intern("[]");
1936
+ id_shift_left = rb_intern("<<");
1937
+ id_eq = rb_intern("eq");
1938
+ id_count_false = rb_intern("count_false");
1939
+ id_count_false_cpu = rb_intern("count_false_cpu");
1940
+ id_axis = rb_intern("axis");
1941
+ id_nan = rb_intern("nan");
1942
+ id_keepdims = rb_intern("keepdims");
1943
+
1944
+ sym_reduce = ID2SYM(rb_intern("reduce"));
1945
+ sym_option = ID2SYM(rb_intern("option"));
1946
+ sym_loop_opt = ID2SYM(rb_intern("loop_opt"));
1947
+ sym_init = ID2SYM(rb_intern("init"));
1948
+ }