cumo 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,153 @@
1
+ /**********************************************************************
2
+
3
+ Function to extract Keyword argument for ruby-2.1.x
4
+ Copied from class.c in ruby-2.4.2
5
+
6
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
7
+
8
+ **********************************************************************/
9
+ #include <ruby.h>
10
+ #define rb_hash_tbl_raw(hash) rb_hash_tbl(hash)
11
+
12
+ /* from internal.h */
13
+ struct RBasicRaw {
14
+ VALUE flags;
15
+ VALUE klass;
16
+ };
17
+
18
+ #define RBASIC_SET_CLASS(obj, cls) do { \
19
+ VALUE _obj_ = (obj); \
20
+ RB_OBJ_WRITE(_obj_, &((struct RBasicRaw *)(_obj_))->klass, cls); \
21
+ } while (0)
22
+
23
+ /* from class.c */
24
+ VALUE
25
+ rb_keyword_error_new(const char *error, VALUE keys)
26
+ {
27
+ const char *msg = "";
28
+ VALUE error_message;
29
+
30
+ if (RARRAY_LEN(keys) == 1) {
31
+ keys = RARRAY_AREF(keys, 0);
32
+ }
33
+ else {
34
+ keys = rb_ary_join(keys, rb_usascii_str_new2(", "));
35
+ msg = "s";
36
+ }
37
+
38
+ error_message = rb_sprintf("%s keyword%s: %"PRIsVALUE, error, msg, keys);
39
+
40
+ return rb_exc_new_str(rb_eArgError, error_message);
41
+ }
42
+
43
+ NORETURN(static void rb_keyword_error(const char *error, VALUE keys));
44
+ static void
45
+ rb_keyword_error(const char *error, VALUE keys)
46
+ {
47
+ rb_exc_raise(rb_keyword_error_new(error, keys));
48
+ }
49
+
50
+ NORETURN(static void unknown_keyword_error(VALUE hash, const ID *table, int keywords));
51
+ static void
52
+ unknown_keyword_error(VALUE hash, const ID *table, int keywords)
53
+ {
54
+ st_table *tbl = rb_hash_tbl_raw(hash);
55
+ VALUE keys;
56
+ int i;
57
+ for (i = 0; i < keywords; i++) {
58
+ st_data_t key = ID2SYM(table[i]);
59
+ st_delete(tbl, &key, NULL);
60
+ }
61
+ keys = rb_funcallv(hash, rb_intern("keys"), 0, 0);
62
+ if (!RB_TYPE_P(keys, T_ARRAY)) rb_raise(rb_eArgError, "unknown keyword");
63
+ rb_keyword_error("unknown", keys);
64
+ }
65
+
66
+ static int
67
+ separate_symbol(st_data_t key, st_data_t value, st_data_t arg)
68
+ {
69
+ VALUE *kwdhash = (VALUE *)arg;
70
+
71
+ if (!SYMBOL_P(key)) kwdhash++;
72
+ if (!*kwdhash) *kwdhash = rb_hash_new();
73
+ rb_hash_aset(*kwdhash, (VALUE)key, (VALUE)value);
74
+ return ST_CONTINUE;
75
+ }
76
+
77
+ VALUE
78
+ rb_extract_keywords(VALUE *orighash)
79
+ {
80
+ VALUE parthash[2] = {0, 0};
81
+ VALUE hash = *orighash;
82
+
83
+ if (RHASH_EMPTY_P(hash)) {
84
+ *orighash = 0;
85
+ return hash;
86
+ }
87
+ st_foreach(rb_hash_tbl_raw(hash), separate_symbol, (st_data_t)&parthash);
88
+ *orighash = parthash[1];
89
+ if (parthash[1] && RBASIC_CLASS(hash) != rb_cHash) {
90
+ RBASIC_SET_CLASS(parthash[1], RBASIC_CLASS(hash));
91
+ }
92
+ return parthash[0];
93
+ }
94
+
95
+ int
96
+ rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
97
+ {
98
+ int i = 0, j;
99
+ int rest = 0;
100
+ VALUE missing = Qnil;
101
+ st_data_t key;
102
+
103
+ #define extract_kwarg(keyword, val) \
104
+ (key = (st_data_t)(keyword), values ? \
105
+ st_delete(rb_hash_tbl_raw(keyword_hash), &key, (val)) : \
106
+ st_lookup(rb_hash_tbl_raw(keyword_hash), key, (val)))
107
+
108
+ if (NIL_P(keyword_hash)) keyword_hash = 0;
109
+
110
+ if (optional < 0) {
111
+ rest = 1;
112
+ optional = -1-optional;
113
+ }
114
+ if (values) {
115
+ for (j = 0; j < required + optional; j++) {
116
+ values[j] = Qundef;
117
+ }
118
+ }
119
+ if (required) {
120
+ for (; i < required; i++) {
121
+ VALUE keyword = ID2SYM(table[i]);
122
+ if (keyword_hash) {
123
+ st_data_t val;
124
+ if (extract_kwarg(keyword, &val)) {
125
+ if (values) values[i] = (VALUE)val;
126
+ continue;
127
+ }
128
+ }
129
+ if (NIL_P(missing)) missing = rb_ary_tmp_new(1);
130
+ rb_ary_push(missing, keyword);
131
+ }
132
+ if (!NIL_P(missing)) {
133
+ rb_keyword_error("missing", missing);
134
+ }
135
+ }
136
+ j = i;
137
+ if (optional && keyword_hash) {
138
+ for (i = 0; i < optional; i++) {
139
+ st_data_t val;
140
+ if (extract_kwarg(ID2SYM(table[required+i]), &val)) {
141
+ if (values) values[required+i] = (VALUE)val;
142
+ j++;
143
+ }
144
+ }
145
+ }
146
+ if (!rest && keyword_hash) {
147
+ if (RHASH_SIZE(keyword_hash) > (unsigned int)(values ? 0 : j)) {
148
+ unknown_keyword_error(keyword_hash, table, required+optional);
149
+ }
150
+ }
151
+ return j;
152
+ #undef extract_kwarg
153
+ }
@@ -0,0 +1,142 @@
1
+ #include <ruby.h>
2
+ #include "cumo/narray.h"
3
+
4
+ VALUE cumo_mNMath;
5
+ extern VALUE cumo_mDFloatMath, cumo_mDComplexMath;
6
+ extern VALUE cumo_mSFloatMath, cumo_mSComplexMath;
7
+ static ID id_send;
8
+ static ID id_UPCAST;
9
+ static ID id_DISPATCH;
10
+ static ID id_extract;
11
+
12
+ static VALUE
13
+ nary_type_s_upcast(VALUE type1, VALUE type2)
14
+ {
15
+ VALUE upcast_hash;
16
+ VALUE result_type;
17
+
18
+ if (type1==type2) return type1;
19
+ upcast_hash = rb_const_get(type1, id_UPCAST);
20
+ result_type = rb_hash_aref(upcast_hash, type2);
21
+ if (NIL_P(result_type)) {
22
+ if (TYPE(type2)==T_CLASS) {
23
+ if ( RTEST(rb_class_inherited_p(type2,cNArray)) ) {
24
+ upcast_hash = rb_const_get(type2, id_UPCAST);
25
+ result_type = rb_hash_aref(upcast_hash, type1);
26
+ }
27
+ }
28
+ }
29
+ return result_type;
30
+ }
31
+
32
+
33
+ static VALUE nary_math_cast2(VALUE type1, VALUE type2)
34
+ {
35
+ if ( RTEST(rb_class_inherited_p( type1, cNArray )) ){
36
+ return nary_type_s_upcast( type1, type2 );
37
+ }
38
+ if ( RTEST(rb_class_inherited_p( type2, cNArray )) ){
39
+ return nary_type_s_upcast( type2, type1 );
40
+ }
41
+ if ( RTEST(rb_class_inherited_p( type1, rb_cNumeric )) &&
42
+ RTEST(rb_class_inherited_p( type2, rb_cNumeric )) ){
43
+ if ( RTEST(rb_class_inherited_p( type1, rb_cComplex)) ||
44
+ RTEST(rb_class_inherited_p( type2, rb_cComplex )) ){
45
+ return rb_cComplex;
46
+ }
47
+ return rb_cFloat;
48
+ }
49
+ return type2;
50
+ }
51
+
52
+
53
+ VALUE na_ary_composition_dtype(VALUE);
54
+
55
+ static VALUE nary_mathcast(int argc, VALUE *argv)
56
+ {
57
+ VALUE type, type2;
58
+ int i;
59
+
60
+ type = na_ary_composition_dtype(argv[0]);
61
+ for (i=1; i<argc; i++) {
62
+ type2 = na_ary_composition_dtype(argv[i]);
63
+ type = nary_math_cast2(type, type2);
64
+ if (NIL_P(type)) {
65
+ rb_raise(rb_eTypeError,"includes unknown DataType for upcast");
66
+ }
67
+ }
68
+ return type;
69
+ }
70
+
71
+
72
+ /*
73
+ Dispatches method to Math module of upcasted type,
74
+ eg, Cumo::DFloat::Math.
75
+ @overload method_missing(name,x,...)
76
+ @param [Symbol] name method name.
77
+ @param [NArray,Numeric] x input array.
78
+ @return [NArray] result.
79
+ */
80
+ static VALUE nary_math_method_missing(int argc, VALUE *argv, VALUE mod)
81
+ {
82
+ VALUE type, ans, typemod, hash;
83
+ if (argc>1) {
84
+ type = nary_mathcast(argc-1,argv+1);
85
+
86
+ hash = rb_const_get(mod, id_DISPATCH);
87
+ typemod = rb_hash_aref( hash, type );
88
+ if (NIL_P(typemod)) {
89
+ rb_raise(rb_eTypeError,"%s is unknown for Cumo::NMath",
90
+ rb_class2name(type));
91
+ }
92
+
93
+ ans = rb_funcall2(typemod,id_send,argc,argv);
94
+
95
+ if (!RTEST(rb_class_inherited_p(type,cNArray)) &&
96
+ IsNArray(ans) ) {
97
+ ans = rb_funcall(ans,id_extract,0);
98
+ }
99
+ return ans;
100
+ }
101
+ rb_raise(rb_eArgError,"argument or method missing");
102
+ return Qnil;
103
+ }
104
+
105
+
106
+ void
107
+ Init_cumo_nary_math()
108
+ {
109
+ VALUE hCast;
110
+
111
+ cumo_mNMath = rb_define_module_under(mCumo, "NMath");
112
+ rb_define_singleton_method(cumo_mNMath, "method_missing", nary_math_method_missing, -1);
113
+
114
+ hCast = rb_hash_new();
115
+ rb_define_const(cumo_mNMath, "DISPATCH", hCast);
116
+ rb_hash_aset(hCast, cumo_cInt64, cumo_mDFloatMath);
117
+ rb_hash_aset(hCast, cumo_cInt32, cumo_mDFloatMath);
118
+ rb_hash_aset(hCast, cumo_cInt16, cumo_mDFloatMath);
119
+ rb_hash_aset(hCast, cumo_cInt8, cumo_mDFloatMath);
120
+ rb_hash_aset(hCast, cumo_cUInt64, cumo_mDFloatMath);
121
+ rb_hash_aset(hCast, cumo_cUInt32, cumo_mDFloatMath);
122
+ rb_hash_aset(hCast, cumo_cUInt16, cumo_mDFloatMath);
123
+ rb_hash_aset(hCast, cumo_cUInt8, cumo_mDFloatMath);
124
+ rb_hash_aset(hCast, cumo_cDFloat, cumo_mDFloatMath);
125
+ rb_hash_aset(hCast, cumo_cDFloat, cumo_mDFloatMath);
126
+ rb_hash_aset(hCast, cumo_cDComplex, cumo_mDComplexMath);
127
+ rb_hash_aset(hCast, cumo_cSFloat, cumo_mSFloatMath);
128
+ rb_hash_aset(hCast, cumo_cSComplex, cumo_mSComplexMath);
129
+ #ifdef RUBY_INTEGER_UNIFICATION
130
+ rb_hash_aset(hCast, rb_cInteger, rb_mMath);
131
+ #else
132
+ rb_hash_aset(hCast, rb_cFixnum, rb_mMath);
133
+ rb_hash_aset(hCast, rb_cBignum, rb_mMath);
134
+ #endif
135
+ rb_hash_aset(hCast, rb_cFloat, rb_mMath);
136
+ rb_hash_aset(hCast, rb_cComplex, cumo_mDComplexMath);
137
+
138
+ id_send = rb_intern("send");
139
+ id_UPCAST = rb_intern("UPCAST");
140
+ id_DISPATCH = rb_intern("DISPATCH");
141
+ id_extract = rb_intern("extract");
142
+ }
@@ -0,0 +1,1948 @@
1
+ #define CUMO_NARRAY_C
2
+ #include <ruby.h>
3
+ #include <assert.h>
4
+ #include "cumo.h"
5
+ #include "cumo/narray.h"
6
+ #include "cumo/cuda/memory_pool.h"
7
+ #include "cumo/cuda/runtime.h"
8
+
9
+ /* global variables within this module */
10
+ VALUE cumo_cNArray;
11
+ VALUE rb_mCumo;
12
+ VALUE nary_eCastError;
13
+ VALUE nary_eShapeError;
14
+ VALUE nary_eOperationError;
15
+ VALUE nary_eDimensionError;
16
+ VALUE nary_eValueError;
17
+
18
+ static ID id_contiguous_stride;
19
+ static ID id_allocate;
20
+ static ID id_element_byte_size;
21
+ static ID id_fill;
22
+ static ID id_seq;
23
+ static ID id_logseq;
24
+ static ID id_eye;
25
+ static ID id_UPCAST;
26
+ static ID id_cast;
27
+ static ID id_dup;
28
+ static ID id_to_host;
29
+ static ID id_bracket;
30
+ static ID id_shift_left;
31
+ static ID id_eq;
32
+ static ID id_count_false;
33
+ static ID id_count_false_cpu;
34
+ static ID id_axis;
35
+ static ID id_nan;
36
+ static ID id_keepdims;
37
+
38
+ VALUE sym_reduce;
39
+ VALUE sym_option;
40
+ VALUE sym_loop_opt;
41
+ VALUE sym_init;
42
+
43
+ VALUE na_cStep;
44
+ #ifndef HAVE_RB_CCOMPLEX
45
+ VALUE rb_cComplex;
46
+ #endif
47
+
48
+ int cumo_na_inspect_rows=20;
49
+ int cumo_na_inspect_cols=80;
50
+
51
+ const rb_data_type_t na_data_type = {
52
+ "Cumo::NArray",
53
+ {0, 0, 0,}, 0, 0, 0,
54
+ };
55
+
56
+ static void
57
+ nary_debug_info_nadata(VALUE self)
58
+ {
59
+ narray_data_t *na;
60
+ GetNArrayData(self,na);
61
+
62
+ printf(" ptr = 0x%"SZF"x\n", (size_t)(na->ptr));
63
+ }
64
+
65
+
66
+ static VALUE
67
+ nary_debug_info_naview(VALUE self)
68
+ {
69
+ int i;
70
+ narray_view_t *na;
71
+ size_t *idx;
72
+ size_t j;
73
+ GetNArrayView(self,na);
74
+
75
+ printf(" data = 0x%"SZF"x\n", (size_t)na->data);
76
+ printf(" offset = %"SZF"d\n", (size_t)na->offset);
77
+ printf(" stridx = 0x%"SZF"x\n", (size_t)na->stridx);
78
+
79
+ if (na->stridx) {
80
+ printf(" stridx = [");
81
+ for (i=0; i<na->base.ndim; i++) {
82
+ if (SDX_IS_INDEX(na->stridx[i])) {
83
+
84
+ idx = SDX_GET_INDEX(na->stridx[i]);
85
+ printf(" index[%d]=[", i);
86
+ for (j=0; j<na->base.shape[i]; j++) {
87
+ printf(" %"SZF"d", idx[j]);
88
+ }
89
+ printf(" ] ");
90
+
91
+ } else {
92
+ printf(" %"SZF"d", SDX_GET_STRIDE(na->stridx[i]));
93
+ }
94
+ }
95
+ printf(" ]\n");
96
+ }
97
+ return Qnil;
98
+ }
99
+
100
+
101
+ VALUE
102
+ nary_debug_info(VALUE self)
103
+ {
104
+ int i;
105
+ narray_t *na;
106
+ GetNArray(self,na);
107
+
108
+ printf("%s:\n",rb_class2name(CLASS_OF(self)));
109
+ printf(" id = 0x%"PRI_VALUE_PREFIX"x\n", self);
110
+ printf(" type = %d\n", na->type);
111
+ printf(" flag = [%d,%d]\n", na->flag[0], na->flag[1]);
112
+ printf(" size = %"SZF"d\n", na->size);
113
+ printf(" ndim = %d\n", na->ndim);
114
+ printf(" shape = 0x%"SZF"x\n", (size_t)na->shape);
115
+ if (na->shape) {
116
+ printf(" shape = [");
117
+ for (i=0;i<na->ndim;i++)
118
+ printf(" %"SZF"d", na->shape[i]);
119
+ printf(" ]\n");
120
+ }
121
+
122
+ switch(na->type) {
123
+ case NARRAY_DATA_T:
124
+ case NARRAY_FILEMAP_T:
125
+ nary_debug_info_nadata(self);
126
+ break;
127
+ case NARRAY_VIEW_T:
128
+ nary_debug_info_naview(self);
129
+ break;
130
+ }
131
+ return Qnil;
132
+ }
133
+
134
+
135
+ static size_t
136
+ na_view_memsize(const void* ptr)
137
+ {
138
+ int i;
139
+ size_t size = sizeof(narray_view_t);
140
+ const narray_view_t *na = ptr;
141
+
142
+ assert(na->base.type == NARRAY_VIEW_T);
143
+
144
+ if (na->stridx != NULL) {
145
+ for (i=0; i<na->base.ndim; i++) {
146
+ if (SDX_IS_INDEX(na->stridx[i])) {
147
+ size += sizeof(size_t) * na->base.shape[i];
148
+ }
149
+ }
150
+ size += sizeof(stridx_t) * na->base.ndim;
151
+ }
152
+ if (na->base.size > 0) {
153
+ if (na->base.shape != NULL && na->base.shape != &(na->base.size)) {
154
+ size += sizeof(size_t) * na->base.ndim;
155
+ }
156
+ }
157
+ return size;
158
+ }
159
+
160
+ static void
161
+ na_view_free(void* ptr)
162
+ {
163
+ int i;
164
+ narray_view_t *na = (narray_view_t*)ptr;
165
+
166
+ assert(na->base.type == NARRAY_VIEW_T);
167
+
168
+ if (na->stridx != NULL) {
169
+ for (i=0; i<na->base.ndim; i++) {
170
+ if (SDX_IS_INDEX(na->stridx[i])) {
171
+ void *p = SDX_GET_INDEX(na->stridx[i]);
172
+ if (cumo_cuda_runtime_is_device_memory(p)) {
173
+ cumo_cuda_runtime_free(p);
174
+ } else {
175
+ xfree(p);
176
+ }
177
+ }
178
+ }
179
+ xfree(na->stridx);
180
+ na->stridx = NULL;
181
+ }
182
+ if (na->base.size > 0) {
183
+ if (na->base.shape != NULL && na->base.shape != &(na->base.size)) {
184
+ xfree(na->base.shape);
185
+ na->base.shape = NULL;
186
+ }
187
+ }
188
+ xfree(na);
189
+ }
190
+
191
+ static void
192
+ na_view_gc_mark(void* na)
193
+ {
194
+ if (((narray_t*)na)->type == NARRAY_VIEW_T) {
195
+ rb_gc_mark(((narray_view_t*)na)->data);
196
+ }
197
+ }
198
+
199
+ const rb_data_type_t na_data_type_view = {
200
+ "Cumo::NArrayView",
201
+ {na_view_gc_mark, na_view_free, na_view_memsize,},
202
+ &na_data_type, 0, 0,
203
+ };
204
+
205
+ VALUE
206
+ na_s_allocate_view(VALUE klass)
207
+ {
208
+ narray_view_t *na = ALLOC(narray_view_t);
209
+
210
+ na->base.ndim = 0;
211
+ na->base.type = NARRAY_VIEW_T;
212
+ na->base.flag[0] = NA_FL0_INIT;
213
+ na->base.flag[1] = NA_FL1_INIT;
214
+ na->base.size = 0;
215
+ na->base.shape = NULL;
216
+ na->base.reduce = INT2FIX(0);
217
+ na->data = Qnil;
218
+ na->offset = 0;
219
+ na->stridx = NULL;
220
+ return TypedData_Wrap_Struct(klass, &na_data_type_view, (void*)na);
221
+ }
222
+
223
+
224
+ //static const size_t zero=0;
225
+
226
+ void
227
+ na_array_to_internal_shape(VALUE self, VALUE ary, size_t *shape)
228
+ {
229
+ size_t i, n, c, s;
230
+ ssize_t x;
231
+ VALUE v;
232
+ int flag = 0;
233
+
234
+ n = RARRAY_LEN(ary);
235
+
236
+ if (RTEST(self)) {
237
+ flag = TEST_COLUMN_MAJOR(self);
238
+ }
239
+ if (flag) {
240
+ c = n-1;
241
+ s = -1;
242
+ } else {
243
+ c = 0;
244
+ s = 1;
245
+ }
246
+ for (i=0; i<n; i++) {
247
+ v = RARRAY_AREF(ary,i);
248
+ x = NUM2SSIZET(v);
249
+ if (x < 0) {
250
+ rb_raise(rb_eArgError,"size must be non-negative");
251
+ }
252
+ shape[c] = x;
253
+ c += s;
254
+ }
255
+ }
256
+
257
+
258
+
259
+ void
260
+ na_alloc_shape(narray_t *na, int ndim)
261
+ {
262
+ na->ndim = ndim;
263
+ na->size = 0;
264
+ switch(ndim) {
265
+ case 0:
266
+ case 1:
267
+ na->shape = &(na->size);
268
+ break;
269
+ default:
270
+ if (ndim < 0) {
271
+ rb_raise(nary_eDimensionError,"ndim=%d is negative", ndim);
272
+ }
273
+ if (ndim > NA_MAX_DIMENSION) {
274
+ rb_raise(nary_eDimensionError,"ndim=%d is too many", ndim);
275
+ }
276
+ na->shape = ALLOC_N(size_t, ndim);
277
+ }
278
+ }
279
+
280
+ void
281
+ na_setup_shape(narray_t *na, int ndim, size_t *shape)
282
+ {
283
+ int i;
284
+ size_t size;
285
+
286
+ na_alloc_shape(na, ndim);
287
+
288
+ if (ndim==0) {
289
+ na->size = 1;
290
+ }
291
+ else if (ndim==1) {
292
+ na->size = shape[0];
293
+ }
294
+ else {
295
+ for (i=0, size=1; i<ndim; i++) {
296
+ na->shape[i] = shape[i];
297
+ size *= shape[i];
298
+ }
299
+ na->size = size;
300
+ }
301
+ }
302
+
303
+ static void
304
+ na_setup(VALUE self, int ndim, size_t *shape)
305
+ {
306
+ narray_t *na;
307
+ GetNArray(self,na);
308
+ na_setup_shape(na, ndim, shape);
309
+ }
310
+
311
+
312
+ /*
313
+ @overload initialize(shape)
314
+ @overload initialize(size0, size1, ...)
315
+ @param [Array] shape (array of sizes along each dimension)
316
+ @param [Integer] sizeN (size along Nth-dimension)
317
+ @return [Cumo::NArray] unallocated narray.
318
+
319
+ Constructs an instance of NArray class using the given
320
+ and <i>shape</i> or <i>sizes</i>.
321
+ Note that NArray itself is an abstract super class and
322
+ not suitable to create instances.
323
+ Use Typed Subclasses of NArray (DFloat, Int32, etc) to create instances.
324
+ This method does not allocate memory for array data.
325
+ Memory is allocated on write method such as #fill, #store, #seq, etc.
326
+
327
+ @example
328
+ i = Cumo::Int64.new([2,4,3])
329
+ #=> Cumo::Int64#shape=[2,4,3](empty)
330
+
331
+ f = Cumo::DFloat.new(3,4)
332
+ #=> Cumo::DFloat#shape=[3,4](empty)
333
+
334
+ f.fill(2)
335
+ #=> Cumo::DFloat#shape=[3,4]
336
+ # [[2, 2, 2, 2],
337
+ # [2, 2, 2, 2],
338
+ # [2, 2, 2, 2]]
339
+
340
+ x = Cumo::NArray.new(5)
341
+ #=> in `new': allocator undefined for Cumo::NArray (TypeError)
342
+ # from t.rb:9:in `<main>'
343
+
344
+ */
345
+ static VALUE
346
+ na_initialize(VALUE self, VALUE args)
347
+ {
348
+ VALUE v;
349
+ size_t *shape=NULL;
350
+ int ndim;
351
+
352
+ if (RARRAY_LEN(args) == 1) {
353
+ v = RARRAY_AREF(args,0);
354
+ if (TYPE(v) != T_ARRAY) {
355
+ v = args;
356
+ }
357
+ } else {
358
+ v = args;
359
+ }
360
+ ndim = RARRAY_LEN(v);
361
+ if (ndim > NA_MAX_DIMENSION) {
362
+ rb_raise(rb_eArgError,"ndim=%d exceeds maximum dimension",ndim);
363
+ }
364
+ shape = ALLOCA_N(size_t, ndim);
365
+ // setup size_t shape[] from VALUE shape argument
366
+ na_array_to_internal_shape(self, v, shape);
367
+ na_setup(self, ndim, shape);
368
+
369
+ return self;
370
+ }
371
+
372
+
373
+ VALUE
374
+ nary_new(VALUE klass, int ndim, size_t *shape)
375
+ {
376
+ volatile VALUE obj;
377
+
378
+ obj = rb_funcall(klass, id_allocate, 0);
379
+ na_setup(obj, ndim, shape);
380
+ return obj;
381
+ }
382
+
383
+
384
+ VALUE
385
+ nary_view_new(VALUE klass, int ndim, size_t *shape)
386
+ {
387
+ volatile VALUE obj;
388
+
389
+ obj = na_s_allocate_view(klass);
390
+ na_setup(obj, ndim, shape);
391
+ return obj;
392
+ }
393
+
394
+
395
+ /*
396
+ Replaces the contents of self with the contents of other narray.
397
+ Used in dup and clone method.
398
+ @overload initialize_copy(other)
399
+ @param [Cumo::NArray] other
400
+ @return [Cumo::NArray] self
401
+ */
402
+ static VALUE
403
+ na_initialize_copy(VALUE self, VALUE orig)
404
+ {
405
+ narray_t *na;
406
+ GetNArray(orig,na);
407
+
408
+ na_setup(self,NA_NDIM(na),NA_SHAPE(na));
409
+ na_store(self,orig);
410
+ na_copy_flags(orig,self);
411
+ return self;
412
+ }
413
+
414
+
415
+ /*
416
+ * call-seq:
417
+ * zeros(shape) => narray
418
+ * zeros(size1,size2,...) => narray
419
+ *
420
+ * Returns a zero-filled narray with <i>shape</i>.
421
+ * This singleton method is valid not for NArray class itself
422
+ * but for typed NArray subclasses, e.g., DFloat, Int64.
423
+ * @example
424
+ * a = Cumo::DFloat.zeros(3,5)
425
+ * => Cumo::DFloat#shape=[3,5]
426
+ * [[0, 0, 0, 0, 0],
427
+ * [0, 0, 0, 0, 0],
428
+ * [0, 0, 0, 0, 0]]
429
+ */
430
+ static VALUE
431
+ na_s_zeros(int argc, VALUE *argv, VALUE klass)
432
+ {
433
+ VALUE obj;
434
+ obj = rb_class_new_instance(argc, argv, klass);
435
+ return rb_funcall(obj, id_fill, 1, INT2FIX(0));
436
+ }
437
+
438
+
439
+ /*
440
+ * call-seq:
441
+ * ones(shape) => narray
442
+ * ones(size1,size2,...) => narray
443
+ *
444
+ * Returns a one-filled narray with <i>shape</i>.
445
+ * This singleton method is valid not for NArray class itself
446
+ * but for typed NArray subclasses, e.g., DFloat, Int64.
447
+ * @example
448
+ * a = Cumo::DFloat.ones(3,5)
449
+ * => Cumo::DFloat#shape=[3,5]
450
+ * [[1, 1, 1, 1, 1],
451
+ * [1, 1, 1, 1, 1],
452
+ * [1, 1, 1, 1, 1]]
453
+ */
454
+ static VALUE
455
+ na_s_ones(int argc, VALUE *argv, VALUE klass)
456
+ {
457
+ VALUE obj;
458
+ obj = rb_class_new_instance(argc, argv, klass);
459
+ return rb_funcall(obj, id_fill, 1, INT2FIX(1));
460
+ }
461
+
462
+
463
+ /*
464
+ Returns an array of N linearly spaced points between x1 and x2.
465
+ This singleton method is valid not for NArray class itself
466
+ but for typed NArray subclasses, e.g., DFloat, Int64.
467
+
468
+ @overload linspace(x1, x2, [n])
469
+ @param [Numeric] x1 The start value
470
+ @param [Numeric] x2 The end value
471
+ @param [Integer] n The number of elements. (default is 100).
472
+ @return [Cumo::NArray] result array.
473
+
474
+ @example
475
+ a = Cumo::DFloat.linspace(-5,5,7)
476
+ => Cumo::DFloat#shape=[7]
477
+ [-5, -3.33333, -1.66667, 0, 1.66667, 3.33333, 5]
478
+ */
479
+ static VALUE
480
+ na_s_linspace(int argc, VALUE *argv, VALUE klass)
481
+ {
482
+ VALUE obj, vx1, vx2, vstep, vsize;
483
+ double n;
484
+ int narg;
485
+
486
+ narg = rb_scan_args(argc,argv,"21",&vx1,&vx2,&vsize);
487
+ if (narg==3) {
488
+ n = NUM2DBL(vsize);
489
+ } else {
490
+ n = 100;
491
+ vsize = INT2FIX(100);
492
+ }
493
+
494
+ obj = rb_funcall(vx2, '-', 1, vx1);
495
+ vstep = rb_funcall(obj, '/', 1, DBL2NUM(n-1));
496
+
497
+ obj = rb_class_new_instance(1, &vsize, klass);
498
+ return rb_funcall(obj, id_seq, 2, vx1, vstep);
499
+ }
500
+
501
+ /*
502
+ Returns an array of N logarithmically spaced points between 10^a and 10^b.
503
+ This singleton method is valid not for NArray having +logseq+ method,
504
+ i.e., DFloat, SFloat, DComplex, and SComplex.
505
+
506
+ @overload logspace(a, b, [n, base])
507
+ @param [Numeric] a The start value
508
+ @param [Numeric] b The end value
509
+ @param [Integer] n The number of elements. (default is 50)
510
+ @param [Numeric] base The base of log space. (default is 10)
511
+ @return [Cumo::NArray] result array.
512
+
513
+ @example
514
+ Cumo::DFloat.logspace(4,0,5,2)
515
+ => Cumo::DFloat#shape=[5]
516
+ [16, 8, 4, 2, 1]
517
+ Cumo::DComplex.logspace(0,1i*Math::PI,5,Math::E)
518
+ => Cumo::DComplex#shape=[5]
519
+ [1+4.44659e-323i, 0.707107+0.707107i, 6.12323e-17+1i, -0.707107+0.707107i, ...]
520
+ */
521
+ static VALUE
522
+ na_s_logspace(int argc, VALUE *argv, VALUE klass)
523
+ {
524
+ VALUE obj, vx1, vx2, vstep, vsize, vbase;
525
+ double n;
526
+
527
+ rb_scan_args(argc,argv,"22",&vx1,&vx2,&vsize,&vbase);
528
+ if (vsize == Qnil) {
529
+ vsize = INT2FIX(50);
530
+ n = 50;
531
+ } else {
532
+ n = NUM2DBL(vsize);
533
+ }
534
+ if (vbase == Qnil) {
535
+ vbase = DBL2NUM(10);
536
+ }
537
+
538
+ obj = rb_funcall(vx2, '-', 1, vx1);
539
+ vstep = rb_funcall(obj, '/', 1, DBL2NUM(n-1));
540
+
541
+ obj = rb_class_new_instance(1, &vsize, klass);
542
+ return rb_funcall(obj, id_logseq, 3, vx1, vstep, vbase);
543
+ }
544
+
545
+
546
+ /*
547
+ Returns a NArray with shape=(n,n) whose diagonal elements are 1, otherwise 0.
548
+ @overload eye(n)
549
+ @param [Integer] n Size of NArray. Creates 2-D NArray with shape=(n,n)
550
+ @return [Cumo::NArray] created NArray.
551
+ @example
552
+ a = Cumo::DFloat.eye(3)
553
+ => Cumo::DFloat#shape=[3,3]
554
+ [[1, 0, 0],
555
+ [0, 1, 0],
556
+ [0, 0, 1]]
557
+ */
558
+ static VALUE
559
+ na_s_eye(int argc, VALUE *argv, VALUE klass)
560
+ {
561
+ VALUE obj;
562
+ VALUE tmp[2];
563
+
564
+ if (argc==0) {
565
+ rb_raise(rb_eArgError,"No argument");
566
+ }
567
+ else if (argc==1) {
568
+ tmp[0] = tmp[1] = argv[0];
569
+ argv = tmp;
570
+ argc = 2;
571
+ }
572
+ obj = rb_class_new_instance(argc, argv, klass);
573
+ return rb_funcall(obj, id_eye, 0);
574
+ }
575
+
576
+
577
+
578
+ #define READ 1
579
+ #define WRITE 2
580
+
581
+ static char *
582
+ na_get_pointer_for_rw(VALUE self, int flag)
583
+ {
584
+ char *ptr;
585
+ VALUE obj;
586
+ narray_t *na;
587
+
588
+ if ((flag & WRITE) && OBJ_FROZEN(self)) {
589
+ rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
590
+ }
591
+
592
+ GetNArray(self,na);
593
+
594
+ switch(NA_TYPE(na)) {
595
+ case NARRAY_DATA_T:
596
+ ptr = NA_DATA_PTR(na);
597
+ if (NA_SIZE(na) > 0 && ptr == NULL) {
598
+ if (flag & READ) {
599
+ rb_raise(rb_eRuntimeError,"cannot read unallocated NArray");
600
+ }
601
+ if (flag & WRITE) {
602
+ rb_funcall(self, id_allocate, 0);
603
+ ptr = NA_DATA_PTR(na);
604
+ }
605
+ }
606
+ return ptr;
607
+ case NARRAY_VIEW_T:
608
+ obj = NA_VIEW_DATA(na);
609
+ if ((flag & WRITE) && OBJ_FROZEN(obj)) {
610
+ rb_raise(rb_eRuntimeError, "cannot write to frozen NArray.");
611
+ }
612
+ GetNArray(obj,na);
613
+ switch(NA_TYPE(na)) {
614
+ case NARRAY_DATA_T:
615
+ ptr = NA_DATA_PTR(na);
616
+ if (flag & (READ|WRITE)) {
617
+ if (NA_SIZE(na) > 0 && ptr == NULL) {
618
+ rb_raise(rb_eRuntimeError,"cannot read/write unallocated NArray");
619
+ }
620
+ }
621
+ return ptr;
622
+ default:
623
+ rb_raise(rb_eRuntimeError,"invalid NA_TYPE of view: %d",NA_TYPE(na));
624
+ }
625
+ default:
626
+ rb_raise(rb_eRuntimeError,"invalid NA_TYPE: %d",NA_TYPE(na));
627
+ }
628
+
629
+ return NULL;
630
+ }
631
+
632
+ char *
633
+ na_get_pointer_for_read(VALUE self)
634
+ {
635
+ return na_get_pointer_for_rw(self, READ);
636
+ }
637
+
638
+ char *
639
+ na_get_pointer_for_write(VALUE self)
640
+ {
641
+ return na_get_pointer_for_rw(self, WRITE);
642
+ }
643
+
644
+ char *
645
+ na_get_pointer_for_read_write(VALUE self)
646
+ {
647
+ return na_get_pointer_for_rw(self, READ|WRITE);
648
+ }
649
+
650
+ char *
651
+ na_get_pointer(VALUE self)
652
+ {
653
+ return na_get_pointer_for_rw(self, 0);
654
+ }
655
+
656
+
657
+ void
658
+ na_release_lock(VALUE self)
659
+ {
660
+ narray_t *na;
661
+
662
+ UNSET_LOCK(self);
663
+ GetNArray(self,na);
664
+
665
+ switch(NA_TYPE(na)) {
666
+ case NARRAY_VIEW_T:
667
+ na_release_lock(NA_VIEW_DATA(na));
668
+ break;
669
+ }
670
+ }
671
+
672
+
673
+ /* method: size() -- returns the total number of typeents */
674
+ static VALUE
675
+ na_size(VALUE self)
676
+ {
677
+ narray_t *na;
678
+ GetNArray(self,na);
679
+ return SIZET2NUM(na->size);
680
+ }
681
+
682
+
683
+ /* method: size() -- returns the total number of typeents */
684
+ static VALUE
685
+ na_ndim(VALUE self)
686
+ {
687
+ narray_t *na;
688
+ GetNArray(self,na);
689
+ return INT2NUM(na->ndim);
690
+ }
691
+
692
+
693
+ /*
694
+ Returns true if self.size == 0.
695
+ @overload empty?
696
+ */
697
+ static VALUE
698
+ na_empty_p(VALUE self)
699
+ {
700
+ narray_t *na;
701
+ GetNArray(self,na);
702
+ if (NA_SIZE(na)==0) {
703
+ return Qtrue;
704
+ }
705
+ return Qfalse;
706
+ }
707
+
708
+
709
+ /* method: shape() -- returns shape, array of the size of dimensions */
710
+ static VALUE
711
+ na_shape(VALUE self)
712
+ {
713
+ volatile VALUE v;
714
+ narray_t *na;
715
+ size_t i, n, c, s;
716
+
717
+ GetNArray(self,na);
718
+ n = NA_NDIM(na);
719
+ if (TEST_COLUMN_MAJOR(self)) {
720
+ c = n-1;
721
+ s = -1;
722
+ } else {
723
+ c = 0;
724
+ s = 1;
725
+ }
726
+ v = rb_ary_new2(n);
727
+ for (i=0; i<n; i++) {
728
+ rb_ary_push(v, SIZET2NUM(na->shape[c]));
729
+ c += s;
730
+ }
731
+ return v;
732
+ }
733
+
734
+
735
+ unsigned int
736
+ nary_element_stride(VALUE v)
737
+ {
738
+ narray_type_info_t *info;
739
+ narray_t *na;
740
+
741
+ GetNArray(v,na);
742
+ if (na->type == NARRAY_VIEW_T) {
743
+ v = NA_VIEW_DATA(na);
744
+ GetNArray(v,na);
745
+ }
746
+ assert(na->type == NARRAY_DATA_T);
747
+
748
+ info = (narray_type_info_t *)(RTYPEDDATA_TYPE(v)->data);
749
+ return info->element_stride;
750
+ }
751
+
752
+ size_t
753
+ na_dtype_elmsz(VALUE klass)
754
+ {
755
+ return NUM2SIZET(rb_const_get(klass, id_contiguous_stride));
756
+ }
757
+
758
+ size_t
759
+ na_get_offset(VALUE self)
760
+ {
761
+ narray_t *na;
762
+ GetNArray(self,na);
763
+
764
+ switch(na->type) {
765
+ case NARRAY_DATA_T:
766
+ case NARRAY_FILEMAP_T:
767
+ return 0;
768
+ case NARRAY_VIEW_T:
769
+ return NA_VIEW_OFFSET(na);
770
+ }
771
+ return 0;
772
+ }
773
+
774
+
775
+ void
776
+ na_index_arg_to_internal_order(int argc, VALUE *argv, VALUE self)
777
+ {
778
+ int i,j;
779
+ VALUE tmp;
780
+
781
+ if (TEST_COLUMN_MAJOR(self)) {
782
+ for (i=0,j=argc-1; i<argc/2; i++,j--) {
783
+ tmp = argv[i];
784
+ argv[i] = argv[j];
785
+ argv[j] = tmp;
786
+ }
787
+ }
788
+ }
789
+
790
+ void
791
+ na_copy_flags(VALUE src, VALUE dst)
792
+ {
793
+ narray_t *na1, *na2;
794
+
795
+ GetNArray(src,na1);
796
+ GetNArray(dst,na2);
797
+
798
+ na2->flag[0] = na1->flag[0];
799
+ //na2->flag[1] = NA_FL1_INIT;
800
+
801
+ RBASIC(dst)->flags |= (RBASIC(src)->flags) &
802
+ (FL_USER1|FL_USER2|FL_USER3|FL_USER4|FL_USER5|FL_USER6|FL_USER7);
803
+ }
804
+
805
+
806
+ // fix name, ex, allow_stride_for_flatten_view
807
+ VALUE
808
+ na_check_ladder(VALUE self, int start_dim)
809
+ {
810
+ int i;
811
+ ssize_t st0, st1;
812
+ narray_t *na;
813
+ GetNArray(self,na);
814
+
815
+ if (start_dim < -na->ndim || start_dim >= na->ndim) {
816
+ rb_bug("start_dim (%d) out of range",start_dim);
817
+ }
818
+
819
+ switch(na->type) {
820
+ case NARRAY_DATA_T:
821
+ case NARRAY_FILEMAP_T:
822
+ return Qtrue;
823
+ case NARRAY_VIEW_T:
824
+ // negative dim -> position from last dim
825
+ if (start_dim < 0) {
826
+ start_dim += NA_NDIM(na);
827
+ }
828
+ // not ladder if it has index
829
+ for (i=start_dim; i<NA_NDIM(na); i++) {
830
+ if (NA_IS_INDEX_AT(na,i))
831
+ return Qfalse;
832
+ }
833
+ // check stride
834
+ st0 = NA_STRIDE_AT(na,start_dim);
835
+ for (i=start_dim+1; i<NA_NDIM(na); i++) {
836
+ st1 = NA_STRIDE_AT(na,i);
837
+ if (st0 != (ssize_t)(st1 * NA_SHAPE(na)[i])) {
838
+ return Qfalse;
839
+ }
840
+ st0 = st1;
841
+ }
842
+ }
843
+ return Qtrue;
844
+ }
845
+
846
+ VALUE
847
+ na_check_contiguous(VALUE self)
848
+ {
849
+ ssize_t elmsz;
850
+ narray_t *na;
851
+ GetNArray(self,na);
852
+
853
+ switch(na->type) {
854
+ case NARRAY_DATA_T:
855
+ case NARRAY_FILEMAP_T:
856
+ return Qtrue;
857
+ case NARRAY_VIEW_T:
858
+ if (NA_VIEW_STRIDX(na)==0) {
859
+ return Qtrue;
860
+ }
861
+ if (na_check_ladder(self,0)==Qtrue) {
862
+ elmsz = nary_element_stride(self);
863
+ if (elmsz == NA_STRIDE_AT(na,NA_NDIM(na)-1)) {
864
+ return Qtrue;
865
+ }
866
+ }
867
+ }
868
+ return Qfalse;
869
+ }
870
+
871
+ //----------------------------------------------------------------------
872
+
873
+ /*
874
+ * call-seq:
875
+ * narray.view => narray
876
+ *
877
+ * Return view of NArray
878
+ */
879
+ VALUE
880
+ na_make_view(VALUE self)
881
+ {
882
+ int i, nd;
883
+ size_t j;
884
+ size_t *idx1, *idx2;
885
+ ssize_t stride;
886
+ narray_t *na;
887
+ narray_view_t *na1, *na2;
888
+ volatile VALUE view;
889
+
890
+ GetNArray(self,na);
891
+ nd = na->ndim;
892
+
893
+ view = na_s_allocate_view(CLASS_OF(self));
894
+
895
+ na_copy_flags(self, view);
896
+ GetNArrayView(view, na2);
897
+
898
+ na_setup_shape((narray_t*)na2, nd, na->shape);
899
+ na2->stridx = ALLOC_N(stridx_t,nd);
900
+
901
+ switch(na->type) {
902
+ case NARRAY_DATA_T:
903
+ case NARRAY_FILEMAP_T:
904
+ stride = nary_element_stride(self);
905
+ for (i=nd; i--;) {
906
+ SDX_SET_STRIDE(na2->stridx[i],stride);
907
+ stride *= na->shape[i];
908
+ }
909
+ na2->offset = 0;
910
+ na2->data = self;
911
+ break;
912
+ case NARRAY_VIEW_T:
913
+ GetNArrayView(self, na1);
914
+ for (i=0; i<nd; i++) {
915
+ if (SDX_IS_INDEX(na1->stridx[i])) {
916
+ idx1 = SDX_GET_INDEX(na1->stridx[i]);
917
+ idx2 = ALLOC_N(size_t,na1->base.shape[i]);
918
+ for (j=0; j<na1->base.shape[i]; j++) {
919
+ idx2[j] = idx1[j];
920
+ }
921
+ SDX_SET_INDEX(na2->stridx[i],idx2);
922
+ } else {
923
+ na2->stridx[i] = na1->stridx[i];
924
+ }
925
+ }
926
+ na2->offset = na1->offset;
927
+ na2->data = na1->data;
928
+ break;
929
+ }
930
+
931
+ return view;
932
+ }
933
+
934
+
935
+ //----------------------------------------------------------------------
936
+
937
+ /*
938
+ * call-seq:
939
+ * narray.expand_dims(dim) => narray view
940
+ *
941
+ * Expand the shape of an array. Insert a new axis with size=1
942
+ * at a given dimension.
943
+ * @param [Integer] dim dimension at which new axis is inserted.
944
+ * @return [Cumo::NArray] result narray view.
945
+ */
946
+ static VALUE
947
+ na_expand_dims(VALUE self, VALUE vdim)
948
+ {
949
+ int i, j, nd, dim;
950
+ size_t *shape, *na_shape;
951
+ stridx_t *stridx, *na_stridx;
952
+ narray_t *na;
953
+ narray_view_t *na2;
954
+ VALUE view;
955
+
956
+ GetNArray(self,na);
957
+ nd = na->ndim;
958
+
959
+ dim = NUM2INT(vdim);
960
+ if (dim < -nd-1 || dim > nd) {
961
+ rb_raise(nary_eDimensionError,"invalid axis (%d for %dD NArray)",
962
+ dim,nd);
963
+ }
964
+ if (dim < 0) {
965
+ dim += nd+1;
966
+ }
967
+
968
+ view = na_make_view(self);
969
+ GetNArrayView(view, na2);
970
+
971
+ shape = ALLOC_N(size_t,nd+1);
972
+ stridx = ALLOC_N(stridx_t,nd+1);
973
+ na_shape = na2->base.shape;
974
+ na_stridx = na2->stridx;
975
+
976
+ for (i=j=0; i<=nd; i++) {
977
+ if (i==dim) {
978
+ shape[i] = 1;
979
+ SDX_SET_STRIDE(stridx[i],0);
980
+ } else {
981
+ shape[i] = na_shape[j];
982
+ stridx[i] = na_stridx[j];
983
+ j++;
984
+ }
985
+ }
986
+
987
+ na2->stridx = stridx;
988
+ xfree(na_stridx);
989
+ na2->base.shape = shape;
990
+ if (na_shape != &(na2->base.size)) {
991
+ xfree(na_shape);
992
+ }
993
+ na2->base.ndim++;
994
+ return view;
995
+ }
996
+
997
+ //----------------------------------------------------------------------
998
+
999
+ /*
1000
+ * call-seq:
1001
+ * narray.reverse([dim0,dim1,..]) => narray
1002
+ *
1003
+ * Return reversed view along specified dimeinsion
1004
+ */
1005
+ static VALUE
1006
+ nary_reverse(int argc, VALUE *argv, VALUE self)
1007
+ {
1008
+ int i, nd;
1009
+ size_t j, n;
1010
+ size_t offset;
1011
+ size_t *idx1, *idx2;
1012
+ ssize_t stride;
1013
+ ssize_t sign;
1014
+ narray_t *na;
1015
+ narray_view_t *na1, *na2;
1016
+ VALUE view;
1017
+ VALUE reduce;
1018
+
1019
+ reduce = na_reduce_dimension(argc, argv, 1, &self, 0, 0);
1020
+
1021
+ GetNArray(self,na);
1022
+ nd = na->ndim;
1023
+
1024
+ view = na_s_allocate_view(CLASS_OF(self));
1025
+
1026
+ na_copy_flags(self, view);
1027
+ GetNArrayView(view, na2);
1028
+
1029
+ na_setup_shape((narray_t*)na2, nd, na->shape);
1030
+ na2->stridx = ALLOC_N(stridx_t,nd);
1031
+
1032
+ switch(na->type) {
1033
+ case NARRAY_DATA_T:
1034
+ case NARRAY_FILEMAP_T:
1035
+ stride = nary_element_stride(self);
1036
+ offset = 0;
1037
+ for (i=nd; i--;) {
1038
+ if (na_test_reduce(reduce,i)) {
1039
+ offset += (na->shape[i]-1)*stride;
1040
+ sign = -1;
1041
+ } else {
1042
+ sign = 1;
1043
+ }
1044
+ SDX_SET_STRIDE(na2->stridx[i],stride*sign);
1045
+ stride *= na->shape[i];
1046
+ }
1047
+ na2->offset = offset;
1048
+ na2->data = self;
1049
+ break;
1050
+ case NARRAY_VIEW_T:
1051
+ GetNArrayView(self, na1);
1052
+ offset = na1->offset;
1053
+ for (i=0; i<nd; i++) {
1054
+ n = na1->base.shape[i];
1055
+ if (SDX_IS_INDEX(na1->stridx[i])) {
1056
+ idx1 = SDX_GET_INDEX(na1->stridx[i]);
1057
+ idx2 = ALLOC_N(size_t,n);
1058
+ if (na_test_reduce(reduce,i)) {
1059
+ for (j=0; j<n; j++) {
1060
+ idx2[n-1-j] = idx1[j];
1061
+ }
1062
+ } else {
1063
+ for (j=0; j<n; j++) {
1064
+ idx2[j] = idx1[j];
1065
+ }
1066
+ }
1067
+ SDX_SET_INDEX(na2->stridx[i],idx2);
1068
+ } else {
1069
+ stride = SDX_GET_STRIDE(na1->stridx[i]);
1070
+ if (na_test_reduce(reduce,i)) {
1071
+ offset += (n-1)*stride;
1072
+ SDX_SET_STRIDE(na2->stridx[i],-stride);
1073
+ } else {
1074
+ na2->stridx[i] = na1->stridx[i];
1075
+ }
1076
+ }
1077
+ }
1078
+ na2->offset = offset;
1079
+ na2->data = na1->data;
1080
+ break;
1081
+ }
1082
+
1083
+ return view;
1084
+ }
1085
+
1086
+ //----------------------------------------------------------------------
1087
+
1088
+ VALUE
1089
+ cumo_na_upcast(VALUE type1, VALUE type2)
1090
+ {
1091
+ VALUE upcast_hash;
1092
+ VALUE result_type;
1093
+
1094
+ if (type1==type2) {
1095
+ return type1;
1096
+ }
1097
+ upcast_hash = rb_const_get(type1, id_UPCAST);
1098
+ result_type = rb_hash_aref(upcast_hash, type2);
1099
+ if (NIL_P(result_type)) {
1100
+ if (TYPE(type2)==T_CLASS) {
1101
+ if (RTEST(rb_class_inherited_p(type2,cNArray))) {
1102
+ upcast_hash = rb_const_get(type2, id_UPCAST);
1103
+ result_type = rb_hash_aref(upcast_hash, type1);
1104
+ }
1105
+ }
1106
+ }
1107
+ return result_type;
1108
+ }
1109
+
1110
+ /*
1111
+ Returns an array containing other and self,
1112
+ both are converted to upcasted type of NArray.
1113
+ Note that NArray has distinct UPCAST mechanism.
1114
+ Coerce is used for operation between non-NArray and NArray.
1115
+ @overload coerce(other)
1116
+ @param [Object] other numeric object.
1117
+ @return [Array] NArray-casted [other,self]
1118
+ */
1119
+ static VALUE
1120
+ nary_coerce(VALUE x, VALUE y)
1121
+ {
1122
+ VALUE type;
1123
+
1124
+ type = cumo_na_upcast(CLASS_OF(x), CLASS_OF(y));
1125
+ y = rb_funcall(type,id_cast,1,y);
1126
+ return rb_assoc_new(y , x);
1127
+ }
1128
+
1129
+
1130
+ /*
1131
+ Returns total byte size of NArray.
1132
+ @return [Integer] byte size.
1133
+ */
1134
+ static VALUE
1135
+ nary_byte_size(VALUE self)
1136
+ {
1137
+ VALUE velmsz;
1138
+ narray_t *na;
1139
+
1140
+ GetNArray(self,na);
1141
+ velmsz = rb_const_get(CLASS_OF(self), id_element_byte_size);
1142
+ if (FIXNUM_P(velmsz)) {
1143
+ return SIZET2NUM(NUM2SIZET(velmsz) * na->size);
1144
+ }
1145
+ return SIZET2NUM(ceil(NUM2DBL(velmsz) * na->size));
1146
+ }
1147
+
1148
+ /*
1149
+ Returns byte size of one element of NArray.
1150
+ @return [Numeric] byte size.
1151
+ */
1152
+ static VALUE
1153
+ nary_s_byte_size(VALUE type)
1154
+ {
1155
+ return rb_const_get(type, id_element_byte_size);
1156
+ }
1157
+
1158
+
1159
+ /*
1160
+ Returns a new 1-D array initialized from binary raw data in a string.
1161
+ @overload from_binary(string,[shape])
1162
+ @param [String] string Binary raw data.
1163
+ @param [Array] shape array of integers representing array shape.
1164
+ @return [Cumo::NArray] NArray containing binary data.
1165
+ */
1166
+ static VALUE
1167
+ nary_s_from_binary(int argc, VALUE *argv, VALUE type)
1168
+ {
1169
+ size_t len, str_len, byte_size;
1170
+ size_t *shape;
1171
+ char *ptr;
1172
+ int i, nd, narg;
1173
+ VALUE vstr, vshape, vna;
1174
+ VALUE velmsz;
1175
+
1176
+ narg = rb_scan_args(argc,argv,"11",&vstr,&vshape);
1177
+ Check_Type(vstr,T_STRING);
1178
+ str_len = RSTRING_LEN(vstr);
1179
+ velmsz = rb_const_get(type, id_element_byte_size);
1180
+ if (narg==2) {
1181
+ switch(TYPE(vshape)) {
1182
+ case T_FIXNUM:
1183
+ nd = 1;
1184
+ len = NUM2SIZET(vshape);
1185
+ shape = &len;
1186
+ break;
1187
+ case T_ARRAY:
1188
+ nd = RARRAY_LEN(vshape);
1189
+ if (nd == 0 || nd > NA_MAX_DIMENSION) {
1190
+ rb_raise(nary_eDimensionError,"too long or empty shape (%d)", nd);
1191
+ }
1192
+ shape = ALLOCA_N(size_t,nd);
1193
+ len = 1;
1194
+ for (i=0; i<nd; ++i) {
1195
+ len *= shape[i] = NUM2SIZET(RARRAY_AREF(vshape,i));
1196
+ }
1197
+ break;
1198
+ default:
1199
+ rb_raise(rb_eArgError,"second argument must be size or shape");
1200
+ }
1201
+ if (FIXNUM_P(velmsz)) {
1202
+ byte_size = len * NUM2SIZET(velmsz);
1203
+ } else {
1204
+ byte_size = ceil(len * NUM2DBL(velmsz));
1205
+ }
1206
+ if (byte_size > str_len) {
1207
+ rb_raise(rb_eArgError, "specified size is too large");
1208
+ }
1209
+ } else {
1210
+ nd = 1;
1211
+ if (FIXNUM_P(velmsz)) {
1212
+ len = str_len / NUM2SIZET(velmsz);
1213
+ byte_size = len * NUM2SIZET(velmsz);
1214
+ } else {
1215
+ len = floor(str_len / NUM2DBL(velmsz));
1216
+ byte_size = str_len;
1217
+ }
1218
+ if (len == 0) {
1219
+ rb_raise(rb_eArgError, "string is empty or too short");
1220
+ }
1221
+ shape = ALLOCA_N(size_t,nd);
1222
+ shape[0] = len;
1223
+ }
1224
+
1225
+ vna = nary_new(type, nd, shape);
1226
+ ptr = na_get_pointer_for_write(vna);
1227
+
1228
+ memcpy(ptr, RSTRING_PTR(vstr), byte_size);
1229
+
1230
+ return vna;
1231
+ }
1232
+
1233
+ /*
1234
+ Returns a new 1-D array initialized from binary raw data in a string.
1235
+ @overload store_binary(string,[offset])
1236
+ @param [String] string Binary raw data.
1237
+ @param [Integer] (optional) offset Byte offset in string.
1238
+ @return [Integer] stored length.
1239
+ */
1240
+ static VALUE
1241
+ nary_store_binary(int argc, VALUE *argv, VALUE self)
1242
+ {
1243
+ size_t size, str_len, byte_size, offset;
1244
+ char *ptr;
1245
+ int narg;
1246
+ VALUE vstr, voffset;
1247
+ VALUE velmsz;
1248
+ narray_t *na;
1249
+
1250
+ narg = rb_scan_args(argc,argv,"11",&vstr,&voffset);
1251
+ str_len = RSTRING_LEN(vstr);
1252
+ if (narg==2) {
1253
+ offset = NUM2SIZET(voffset);
1254
+ if (str_len < offset) {
1255
+ rb_raise(rb_eArgError, "offset is larger than string length");
1256
+ }
1257
+ str_len -= offset;
1258
+ } else {
1259
+ offset = 0;
1260
+ }
1261
+
1262
+ GetNArray(self,na);
1263
+ size = NA_SIZE(na);
1264
+ velmsz = rb_const_get(CLASS_OF(self), id_element_byte_size);
1265
+ if (FIXNUM_P(velmsz)) {
1266
+ byte_size = size * NUM2SIZET(velmsz);
1267
+ } else {
1268
+ byte_size = ceil(size * NUM2DBL(velmsz));
1269
+ }
1270
+ if (byte_size > str_len) {
1271
+ rb_raise(rb_eArgError, "string is too short to store");
1272
+ }
1273
+
1274
+ ptr = na_get_pointer_for_write(self);
1275
+ memcpy(ptr, RSTRING_PTR(vstr)+offset, byte_size);
1276
+
1277
+ return SIZET2NUM(byte_size);
1278
+ }
1279
+
1280
+ /*
1281
+ Returns string containing the raw data bytes in NArray.
1282
+ @overload to_binary()
1283
+ @return [String] String object containing binary raw data.
1284
+ */
1285
+ static VALUE
1286
+ nary_to_binary(VALUE self)
1287
+ {
1288
+ size_t len, offset=0;
1289
+ char *ptr;
1290
+ VALUE str;
1291
+ narray_t *na;
1292
+
1293
+ SHOW_SYNCHRONIZE_WARNING_ONCE("nary_to_binary", "any");
1294
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
1295
+
1296
+ GetNArray(self,na);
1297
+ if (na->type == NARRAY_VIEW_T) {
1298
+ if (na_check_contiguous(self)==Qtrue) {
1299
+ offset = NA_VIEW_OFFSET(na);
1300
+ } else {
1301
+ self = rb_funcall(self,id_dup,0);
1302
+ }
1303
+ }
1304
+ len = NUM2SIZET(nary_byte_size(self));
1305
+ ptr = na_get_pointer_for_read(self);
1306
+ str = rb_usascii_str_new(ptr+offset,len);
1307
+ RB_GC_GUARD(self);
1308
+ return str;
1309
+ }
1310
+
1311
+ /*
1312
+ Dump marshal data.
1313
+ @overload marshal_dump()
1314
+ @return [Array] Array containing marshal data.
1315
+ */
1316
+ static VALUE
1317
+ nary_marshal_dump(VALUE self)
1318
+ {
1319
+ VALUE a;
1320
+
1321
+ SHOW_SYNCHRONIZE_WARNING_ONCE("nary_marshal_dump", "any");
1322
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
1323
+
1324
+ a = rb_ary_new();
1325
+ rb_ary_push(a, INT2FIX(1)); // version
1326
+ rb_ary_push(a, na_shape(self));
1327
+ rb_ary_push(a, INT2FIX(NA_FLAG0(self)));
1328
+ if (CLASS_OF(self) == cumo_cRObject) {
1329
+ narray_t *na;
1330
+ VALUE *ptr;
1331
+ size_t offset=0;
1332
+ GetNArray(self,na);
1333
+ if (na->type == NARRAY_VIEW_T) {
1334
+ if (na_check_contiguous(self)==Qtrue) {
1335
+ offset = NA_VIEW_OFFSET(na);
1336
+ } else {
1337
+ self = rb_funcall(self,id_dup,0);
1338
+ }
1339
+ }
1340
+ ptr = (VALUE*)na_get_pointer_for_read(self);
1341
+ rb_ary_push(a, rb_ary_new4(NA_SIZE(na), ptr+offset));
1342
+ } else {
1343
+ rb_ary_push(a, nary_to_binary(self));
1344
+ }
1345
+ RB_GC_GUARD(self);
1346
+ return a;
1347
+ }
1348
+
1349
+ static VALUE na_inplace( VALUE self );
1350
+ /*
1351
+ Load marshal data.
1352
+ @overload marshal_load(data)
1353
+ @params [Array] Array containing marshal data.
1354
+ @return [nil]
1355
+ */
1356
+ static VALUE
1357
+ nary_marshal_load(VALUE self, VALUE a)
1358
+ {
1359
+ VALUE v;
1360
+
1361
+ if (TYPE(a) != T_ARRAY) {
1362
+ rb_raise(rb_eArgError,"marshal argument should be array");
1363
+ }
1364
+ if (RARRAY_LEN(a) != 4) {
1365
+ rb_raise(rb_eArgError,"marshal array size should be 4");
1366
+ }
1367
+ if (RARRAY_AREF(a,0) != INT2FIX(1)) {
1368
+ rb_raise(rb_eArgError,"NArray marshal version %d is not supported "
1369
+ "(only version 1)", NUM2INT(RARRAY_AREF(a,0)));
1370
+ }
1371
+ na_initialize(self,RARRAY_AREF(a,1));
1372
+ NA_FL0_SET(self,FIX2INT(RARRAY_AREF(a,2)));
1373
+ v = RARRAY_AREF(a,3);
1374
+ if (CLASS_OF(self) == cumo_cRObject) {
1375
+ narray_t *na;
1376
+ char *ptr;
1377
+ if (TYPE(v) != T_ARRAY) {
1378
+ rb_raise(rb_eArgError,"RObject content should be array");
1379
+ }
1380
+ GetNArray(self,na);
1381
+ if (RARRAY_LEN(v) != (long)NA_SIZE(na)) {
1382
+ rb_raise(rb_eArgError,"RObject content size mismatch");
1383
+ }
1384
+ ptr = na_get_pointer_for_write(self);
1385
+ memcpy(ptr, RARRAY_PTR(v), NA_SIZE(na)*sizeof(VALUE));
1386
+ } else {
1387
+ nary_store_binary(1,&v,self);
1388
+ if (TEST_BYTE_SWAPPED(self)) {
1389
+ rb_funcall(na_inplace(self),id_to_host,0);
1390
+ REVERSE_ENDIAN(self); // correct behavior??
1391
+ }
1392
+ }
1393
+ RB_GC_GUARD(a);
1394
+ return self;
1395
+ }
1396
+
1397
+
1398
+ /*
1399
+ Cast self to another NArray datatype.
1400
+ @overload cast_to(datatype)
1401
+ @param [Class] datatype NArray datatype.
1402
+ @return [Cumo::NArray]
1403
+ */
1404
+ static VALUE
1405
+ nary_cast_to(VALUE obj, VALUE type)
1406
+ {
1407
+ return rb_funcall(type, id_cast, 1, obj);
1408
+ }
1409
+
1410
+
1411
+
1412
+ // reduce is dimension indicies to reduce in reduction kernel (in bits), e.g., for an array of shape:
1413
+ // [2,3,4], 111b for sum(), 010b for sum(axis: 1), 110b for sum(axis: [1,2])
1414
+ bool
1415
+ na_test_reduce(VALUE reduce, int dim)
1416
+ {
1417
+ size_t m;
1418
+
1419
+ if (!RTEST(reduce))
1420
+ return 0;
1421
+ if (FIXNUM_P(reduce)) {
1422
+ m = FIX2LONG(reduce);
1423
+ if (m==0) return 1;
1424
+ return (m & (1u<<dim)) ? 1 : 0;
1425
+ } else {
1426
+ return (rb_funcall(reduce,id_bracket,1,INT2FIX(dim))==INT2FIX(1)) ?
1427
+ 1 : 0 ;
1428
+ }
1429
+ }
1430
+
1431
+
1432
+ static VALUE
1433
+ na_get_reduce_flag_from_narray(int naryc, VALUE *naryv, int *max_arg)
1434
+ {
1435
+ int ndim, ndim0;
1436
+ int rowmaj;
1437
+ int i;
1438
+ size_t j;
1439
+ narray_t *na;
1440
+ VALUE reduce;
1441
+
1442
+ if (naryc<1) {
1443
+ rb_raise(rb_eRuntimeError,"must be positive: naryc=%d", naryc);
1444
+ }
1445
+ GetNArray(naryv[0],na);
1446
+ if (na->size==0) {
1447
+ rb_raise(nary_eShapeError,"cannot reduce empty NArray");
1448
+ }
1449
+ reduce = na->reduce;
1450
+ ndim = ndim0 = na->ndim;
1451
+ if (max_arg) *max_arg = 0;
1452
+ rowmaj = TEST_COLUMN_MAJOR(naryv[0]);
1453
+ for (i=0; i<naryc; i++) {
1454
+ GetNArray(naryv[i],na);
1455
+ if (na->size==0) {
1456
+ rb_raise(nary_eShapeError,"cannot reduce empty NArray");
1457
+ }
1458
+ if (TEST_COLUMN_MAJOR(naryv[i]) != rowmaj) {
1459
+ rb_raise(nary_eDimensionError,"dimension order is different");
1460
+ }
1461
+ if (na->ndim > ndim) { // maximum dimension
1462
+ ndim = na->ndim;
1463
+ if (max_arg) *max_arg = i;
1464
+ }
1465
+ }
1466
+ if (ndim != ndim0) {
1467
+ j = NUM2SIZET(reduce) << (ndim-ndim0);
1468
+ reduce = SIZET2NUM(j);
1469
+ }
1470
+ return reduce;
1471
+ }
1472
+
1473
+
1474
+ static VALUE
1475
+ na_get_reduce_flag_from_axes(VALUE na_obj, VALUE axes)
1476
+ {
1477
+ int i, r;
1478
+ int ndim, rowmaj;
1479
+ long narg;
1480
+ size_t j;
1481
+ size_t len;
1482
+ ssize_t beg, step;
1483
+ VALUE v;
1484
+ size_t m;
1485
+ VALUE reduce;
1486
+ narray_t *na;
1487
+
1488
+ GetNArray(na_obj,na);
1489
+ ndim = na->ndim;
1490
+ rowmaj = TEST_COLUMN_MAJOR(na_obj);
1491
+
1492
+ m = 0;
1493
+ reduce = Qnil;
1494
+ narg = RARRAY_LEN(axes);
1495
+ for (i=0; i<narg; i++) {
1496
+ v = RARRAY_AREF(axes,i);
1497
+ //printf("argv[%d]=",i);rb_p(v);
1498
+ if (TYPE(v)==T_FIXNUM) {
1499
+ beg = FIX2INT(v);
1500
+ if (beg<0) beg+=ndim;
1501
+ if (beg>=ndim || beg<0) {
1502
+ rb_raise(nary_eDimensionError,"dimension is out of range");
1503
+ }
1504
+ len = 1;
1505
+ step = 0;
1506
+ //printf("beg=%d step=%d len=%d\n",beg,step,len);
1507
+ } else if (rb_obj_is_kind_of(v,rb_cRange) ||
1508
+ rb_obj_is_kind_of(v,na_cStep)) {
1509
+ nary_step_array_index( v, ndim, &len, &beg, &step );
1510
+ } else {
1511
+ rb_raise(nary_eDimensionError, "invalid dimension argument %s",
1512
+ rb_obj_classname(v));
1513
+ }
1514
+ for (j=0; j<len; j++) {
1515
+ r = beg + step*j;
1516
+ if (rowmaj) {
1517
+ r = ndim-1-r;
1518
+ }
1519
+ if (reduce==Qnil) {
1520
+ if ( r < (ssize_t)sizeof(size_t) ) {
1521
+ m |= ((size_t)1) << r;
1522
+ continue;
1523
+ } else {
1524
+ reduce = SIZET2NUM(m);
1525
+ }
1526
+ }
1527
+ v = rb_funcall( INT2FIX(1), id_shift_left, 1, INT2FIX(r) );
1528
+ reduce = rb_funcall( reduce, '|', 1, v );
1529
+ }
1530
+ }
1531
+ if (NIL_P(reduce)) reduce = SIZET2NUM(m);
1532
+ return reduce;
1533
+ }
1534
+
1535
+
1536
+ VALUE
1537
+ nary_reduce_options(VALUE axes, VALUE *opts, int naryc, VALUE *naryv,
1538
+ ndfunc_t *ndf)
1539
+ {
1540
+ int max_arg;
1541
+ VALUE reduce;
1542
+
1543
+ // option: axis
1544
+ if (opts[0] != Qundef && RTEST(opts[0])) {
1545
+ if (!NIL_P(axes)) {
1546
+ rb_raise(rb_eArgError,
1547
+ "cannot specify axis-arguments and axis-keyword simultaneously");
1548
+ }
1549
+ if (TYPE(opts[0]) == T_ARRAY) {
1550
+ axes = opts[0];
1551
+ } else {
1552
+ axes = rb_ary_new3(1,opts[0]);
1553
+ }
1554
+ }
1555
+ if (ndf) {
1556
+ // option: keepdims
1557
+ if (opts[1] != Qundef) {
1558
+ if (RTEST(opts[1]))
1559
+ ndf->flag |= NDF_KEEP_DIM;
1560
+ }
1561
+ }
1562
+
1563
+ reduce = na_get_reduce_flag_from_narray(naryc, naryv, &max_arg);
1564
+
1565
+ if (NIL_P(axes)) return reduce;
1566
+
1567
+ return na_get_reduce_flag_from_axes(naryv[max_arg], axes);
1568
+ }
1569
+
1570
+
1571
+ VALUE
1572
+ nary_reduce_dimension(int argc, VALUE *argv, int naryc, VALUE *naryv,
1573
+ ndfunc_t *ndf, na_iter_func_t iter_nan)
1574
+ {
1575
+ long narg;
1576
+ VALUE axes;
1577
+ VALUE kw_hash = Qnil;
1578
+ ID kw_table[3] = {id_axis,id_keepdims,id_nan};
1579
+ VALUE opts[3] = {Qundef,Qundef,Qundef};
1580
+
1581
+ narg = rb_scan_args(argc, argv, "*:", &axes, &kw_hash);
1582
+ rb_get_kwargs(kw_hash, kw_table, 0, 3, opts);
1583
+
1584
+ if (ndf) {
1585
+ // option: nan
1586
+ if (iter_nan && opts[2] != Qundef) {
1587
+ if (RTEST(opts[2]))
1588
+ ndf->func = iter_nan; // replace to nan-aware iterator function
1589
+ }
1590
+ }
1591
+
1592
+ return na_reduce_options((narg)?axes:Qnil, opts, naryc, naryv, ndf);
1593
+ }
1594
+
1595
+ /*
1596
+ Return true if column major.
1597
+ */
1598
+ static VALUE na_column_major_p( VALUE self )
1599
+ {
1600
+ if (TEST_COLUMN_MAJOR(self))
1601
+ return Qtrue;
1602
+ else
1603
+ return Qfalse;
1604
+ }
1605
+
1606
+ /*
1607
+ Return true if row major.
1608
+ */
1609
+ static VALUE na_row_major_p( VALUE self )
1610
+ {
1611
+ if (TEST_ROW_MAJOR(self))
1612
+ return Qtrue;
1613
+ else
1614
+ return Qfalse;
1615
+ }
1616
+
1617
+
1618
+ /*
1619
+ Return true if byte swapped.
1620
+ */
1621
+ static VALUE na_byte_swapped_p( VALUE self )
1622
+ {
1623
+ if (TEST_BYTE_SWAPPED(self))
1624
+ return Qtrue;
1625
+ return Qfalse;
1626
+ }
1627
+
1628
+ /*
1629
+ Return true if not byte swapped.
1630
+ */
1631
+ static VALUE na_host_order_p( VALUE self )
1632
+ {
1633
+ if (TEST_BYTE_SWAPPED(self))
1634
+ return Qfalse;
1635
+ return Qtrue;
1636
+ }
1637
+
1638
+
1639
+ /*
1640
+ Returns view of narray with inplace flagged.
1641
+ @return [Cumo::NArray] view of narray with inplace flag.
1642
+ */
1643
+ static VALUE na_inplace( VALUE self )
1644
+ {
1645
+ VALUE view = self;
1646
+ view = na_make_view(self);
1647
+ SET_INPLACE(view);
1648
+ return view;
1649
+ }
1650
+
1651
+ /*
1652
+ Set inplace flag to self.
1653
+ @return [Cumo::NArray] self
1654
+ */
1655
+ static VALUE na_inplace_bang( VALUE self )
1656
+ {
1657
+ SET_INPLACE(self);
1658
+ return self;
1659
+ }
1660
+
1661
+ /*
1662
+ Return true if inplace flagged.
1663
+ */
1664
+ static VALUE na_inplace_p( VALUE self )
1665
+ {
1666
+ if (TEST_INPLACE(self))
1667
+ return Qtrue;
1668
+ else
1669
+ return Qfalse;
1670
+ }
1671
+
1672
+ /*
1673
+ Unset inplace flag to self.
1674
+ @return [Cumo::NArray] self
1675
+ */
1676
+ static VALUE na_out_of_place_bang( VALUE self )
1677
+ {
1678
+ UNSET_INPLACE(self);
1679
+ return self;
1680
+ }
1681
+
1682
+ int na_debug_flag=0;
1683
+
1684
+ static VALUE na_debug_set(VALUE mod, VALUE flag)
1685
+ {
1686
+ na_debug_flag = RTEST(flag);
1687
+ return Qnil;
1688
+ }
1689
+
1690
+ static double na_profile_value=0;
1691
+
1692
+ static VALUE na_profile(VALUE mod)
1693
+ {
1694
+ return rb_float_new(na_profile_value);
1695
+ }
1696
+
1697
+ static VALUE na_profile_set(VALUE mod, VALUE val)
1698
+ {
1699
+ na_profile_value = NUM2DBL(val);
1700
+ return val;
1701
+ }
1702
+
1703
+
1704
+ /*
1705
+ Returns the number of rows used for NArray#inspect
1706
+ @overload inspect_rows
1707
+ @return [Integer or nil] the number of rows.
1708
+ */
1709
+ static VALUE na_inspect_rows(VALUE mod)
1710
+ {
1711
+ if (cumo_na_inspect_rows > 0) {
1712
+ return INT2NUM(cumo_na_inspect_rows);
1713
+ } else {
1714
+ return Qnil;
1715
+ }
1716
+ }
1717
+
1718
+ /*
1719
+ Set the number of rows used for NArray#inspect
1720
+ @overload inspect_rows=(rows)
1721
+ @param [Integer or nil] rows the number of rows
1722
+ @return [nil]
1723
+ */
1724
+ static VALUE na_inspect_rows_set(VALUE mod, VALUE num)
1725
+ {
1726
+ if (RTEST(num)) {
1727
+ cumo_na_inspect_rows = NUM2INT(num);
1728
+ } else {
1729
+ cumo_na_inspect_rows = 0;
1730
+ }
1731
+ return Qnil;
1732
+ }
1733
+
1734
+ /*
1735
+ Returns the number of cols used for NArray#inspect
1736
+ @overload inspect_cols
1737
+ @return [Integer or nil] the number of cols.
1738
+ */
1739
+ static VALUE na_inspect_cols(VALUE mod)
1740
+ {
1741
+ if (cumo_na_inspect_cols > 0) {
1742
+ return INT2NUM(cumo_na_inspect_cols);
1743
+ } else {
1744
+ return Qnil;
1745
+ }
1746
+ }
1747
+
1748
+ /*
1749
+ Set the number of cols used for NArray#inspect
1750
+ @overload inspect_cols=(cols)
1751
+ @param [Integer or nil] cols the number of cols
1752
+ @return [nil]
1753
+ */
1754
+ static VALUE na_inspect_cols_set(VALUE mod, VALUE num)
1755
+ {
1756
+ if (RTEST(num)) {
1757
+ cumo_na_inspect_cols = NUM2INT(num);
1758
+ } else {
1759
+ cumo_na_inspect_cols = 0;
1760
+ }
1761
+ return Qnil;
1762
+ }
1763
+
1764
+
1765
+ /*
1766
+ Equality of self and other in view of numerical array.
1767
+ i.e., both arrays have same shape and corresponding elements are equal.
1768
+ @overload == other
1769
+ @param [Object] other
1770
+ @return [Boolean] true if self and other is equal.
1771
+ */
1772
+ static VALUE
1773
+ na_equal(VALUE self, volatile VALUE other)
1774
+ {
1775
+ volatile VALUE vbool;
1776
+ narray_t *na1, *na2;
1777
+ int i;
1778
+
1779
+ GetNArray(self,na1);
1780
+
1781
+ if (!rb_obj_is_kind_of(other,cNArray)) {
1782
+ other = rb_funcall(CLASS_OF(self), id_cast, 1, other);
1783
+ }
1784
+
1785
+ GetNArray(other,na2);
1786
+ if (na1->ndim != na2->ndim) {
1787
+ return Qfalse;
1788
+ }
1789
+ for (i=0; i<na1->ndim; i++) {
1790
+ if (na1->shape[i] != na2->shape[i]) {
1791
+ return Qfalse;
1792
+ }
1793
+ }
1794
+ vbool = rb_funcall(self, id_eq, 1, other);
1795
+ return (rb_funcall(vbool, id_count_false_cpu, 0)==INT2FIX(0)) ? Qtrue : Qfalse;
1796
+ }
1797
+
1798
+ /*
1799
+ Free data memory explicitly without waiting GC.
1800
+
1801
+ @return [Boolean] true if free
1802
+ */
1803
+ VALUE
1804
+ cumo_na_free_data(VALUE self)
1805
+ {
1806
+ narray_t *na;
1807
+ GetNArray(self, na);
1808
+
1809
+ if (na->type == NARRAY_DATA_T) {
1810
+ void *ptr = NA_DATA_PTR(na);
1811
+ if (ptr != NULL) {
1812
+ if (cumo_cuda_runtime_is_device_memory(ptr)) {
1813
+ cumo_cuda_runtime_free(ptr);
1814
+ } else {
1815
+ xfree(ptr);
1816
+ }
1817
+ NA_DATA_PTR(na) = NULL;
1818
+ return Qtrue;
1819
+ }
1820
+ }
1821
+
1822
+ return Qfalse;
1823
+ }
1824
+
1825
+ /* initialization of NArray Class */
1826
+ void
1827
+ Init_cumo_narray()
1828
+ {
1829
+ mCumo = rb_define_module("Cumo");
1830
+
1831
+ /*
1832
+ Document-class: Cumo::NArray
1833
+
1834
+ Cumo::NArray is the abstract super class for
1835
+ Numerical N-dimensional Array in the Ruby/Cumo module.
1836
+ Use Typed Subclasses of NArray (Cumo::DFloat, Int32, etc)
1837
+ to create data array instances.
1838
+ */
1839
+ cNArray = rb_define_class_under(mCumo, "NArray", rb_cObject);
1840
+
1841
+ #ifndef HAVE_RB_CCOMPLEX
1842
+ rb_require("complex");
1843
+ rb_cComplex = rb_const_get(rb_cObject, rb_intern("Complex"));
1844
+ #endif
1845
+
1846
+ rb_define_const(cNArray, "VERSION", rb_str_new2(CUMO_VERSION));
1847
+
1848
+ nary_eCastError = rb_define_class_under(cNArray, "CastError", rb_eStandardError);
1849
+ nary_eShapeError = rb_define_class_under(cNArray, "ShapeError", rb_eStandardError);
1850
+ nary_eOperationError = rb_define_class_under(cNArray, "OperationError", rb_eStandardError);
1851
+ nary_eDimensionError = rb_define_class_under(cNArray, "DimensionError", rb_eStandardError);
1852
+ nary_eValueError = rb_define_class_under(cNArray, "ValueError", rb_eStandardError);
1853
+
1854
+ rb_define_singleton_method(cNArray, "debug=", na_debug_set, 1);
1855
+ rb_define_singleton_method(cNArray, "profile", na_profile, 0);
1856
+ rb_define_singleton_method(cNArray, "profile=", na_profile_set, 1);
1857
+
1858
+ rb_define_singleton_method(cNArray, "inspect_rows", na_inspect_rows, 0);
1859
+ rb_define_singleton_method(cNArray, "inspect_rows=", na_inspect_rows_set, 1);
1860
+ rb_define_singleton_method(cNArray, "inspect_cols", na_inspect_cols, 0);
1861
+ rb_define_singleton_method(cNArray, "inspect_cols=", na_inspect_cols_set, 1);
1862
+
1863
+ /* Ruby allocation framework */
1864
+ rb_undef_alloc_func(cNArray);
1865
+ rb_define_method(cNArray, "initialize", na_initialize, -2);
1866
+ rb_define_method(cNArray, "initialize_copy", na_initialize_copy, 1);
1867
+
1868
+ rb_define_method(cNArray, "free", cumo_na_free_data, 0);
1869
+
1870
+ rb_define_singleton_method(cNArray, "zeros", na_s_zeros, -1);
1871
+ rb_define_singleton_method(cNArray, "ones", na_s_ones, -1);
1872
+ rb_define_singleton_method(cNArray, "linspace", na_s_linspace, -1);
1873
+ rb_define_singleton_method(cNArray, "logspace", na_s_logspace, -1);
1874
+ rb_define_singleton_method(cNArray, "eye", na_s_eye, -1);
1875
+
1876
+ rb_define_method(cNArray, "size", na_size, 0);
1877
+ rb_define_alias (cNArray, "length","size");
1878
+ rb_define_alias (cNArray, "total","size");
1879
+ rb_define_method(cNArray, "shape", na_shape, 0);
1880
+ rb_define_method(cNArray, "ndim", na_ndim,0);
1881
+ rb_define_alias (cNArray, "rank","ndim");
1882
+ rb_define_method(cNArray, "empty?", na_empty_p, 0);
1883
+
1884
+ rb_define_method(cNArray, "debug_info", nary_debug_info, 0);
1885
+
1886
+ rb_define_method(cNArray, "contiguous?", na_check_contiguous, 0);
1887
+
1888
+ rb_define_method(cNArray, "view", na_make_view, 0);
1889
+ rb_define_method(cNArray, "expand_dims", na_expand_dims, 1);
1890
+ rb_define_method(cNArray, "reverse", nary_reverse, -1);
1891
+
1892
+ rb_define_singleton_method(cNArray, "upcast", cumo_na_upcast, 1);
1893
+ rb_define_singleton_method(cNArray, "byte_size", nary_s_byte_size, 0);
1894
+
1895
+ rb_define_singleton_method(cNArray, "from_binary", nary_s_from_binary, -1);
1896
+ rb_define_alias (rb_singleton_class(cNArray), "from_string", "from_binary");
1897
+ rb_define_method(cNArray, "store_binary", nary_store_binary, -1);
1898
+ rb_define_method(cNArray, "to_binary", nary_to_binary, 0);
1899
+ rb_define_alias (cNArray, "to_string", "to_binary");
1900
+ rb_define_method(cNArray, "marshal_dump", nary_marshal_dump, 0);
1901
+ rb_define_method(cNArray, "marshal_load", nary_marshal_load, 1);
1902
+
1903
+ rb_define_method(cNArray, "byte_size", nary_byte_size, 0);
1904
+
1905
+ rb_define_method(cNArray, "cast_to", nary_cast_to, 1);
1906
+
1907
+ rb_define_method(cNArray, "coerce", nary_coerce, 1);
1908
+
1909
+ rb_define_method(cNArray, "column_major?", na_column_major_p, 0);
1910
+ rb_define_method(cNArray, "row_major?", na_row_major_p, 0);
1911
+ rb_define_method(cNArray, "byte_swapped?", na_byte_swapped_p, 0);
1912
+ rb_define_method(cNArray, "host_order?", na_host_order_p, 0);
1913
+
1914
+ rb_define_method(cNArray, "inplace", na_inplace, 0);
1915
+ rb_define_method(cNArray, "inplace?", na_inplace_p, 0);
1916
+ rb_define_method(cNArray, "inplace!", na_inplace_bang, 0);
1917
+ rb_define_method(cNArray, "out_of_place!", na_out_of_place_bang, 0);
1918
+ rb_define_alias (cNArray, "not_inplace!", "out_of_place!");
1919
+
1920
+ rb_define_method(cNArray, "==", na_equal, 1);
1921
+
1922
+ id_allocate = rb_intern("allocate");
1923
+ id_contiguous_stride = rb_intern(CONTIGUOUS_STRIDE);
1924
+ //id_element_bit_size = rb_intern(ELEMENT_BIT_SIZE);
1925
+ id_element_byte_size = rb_intern(ELEMENT_BYTE_SIZE);
1926
+
1927
+ id_fill = rb_intern("fill");
1928
+ id_seq = rb_intern("seq");
1929
+ id_logseq = rb_intern("logseq");
1930
+ id_eye = rb_intern("eye");
1931
+ id_UPCAST = rb_intern("UPCAST");
1932
+ id_cast = rb_intern("cast");
1933
+ id_dup = rb_intern("dup");
1934
+ id_to_host = rb_intern("to_host");
1935
+ id_bracket = rb_intern("[]");
1936
+ id_shift_left = rb_intern("<<");
1937
+ id_eq = rb_intern("eq");
1938
+ id_count_false = rb_intern("count_false");
1939
+ id_count_false_cpu = rb_intern("count_false_cpu");
1940
+ id_axis = rb_intern("axis");
1941
+ id_nan = rb_intern("nan");
1942
+ id_keepdims = rb_intern("keepdims");
1943
+
1944
+ sym_reduce = ID2SYM(rb_intern("reduce"));
1945
+ sym_option = ID2SYM(rb_intern("option"));
1946
+ sym_loop_opt = ID2SYM(rb_intern("loop_opt"));
1947
+ sym_init = ID2SYM(rb_intern("init"));
1948
+ }