cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,961 @@
1
+ #include <ruby.h>
2
+ #include "cumo/narray.h"
3
+ #include "cumo/template.h"
4
+
5
+ static VALUE sym_mulsum;
6
+ static ID id_mulsum;
7
+ static ID id_respond_to_p;
8
+ static ID id_store;
9
+ static ID id_swap_byte;
10
+
11
+ // ---------------------------------------------------------------------
12
+
13
+ #define LOOP_UNARY_PTR(lp,proc) \
14
+ { \
15
+ size_t i; \
16
+ ssize_t s1, s2; \
17
+ char *p1, *p2; \
18
+ size_t *idx1, *idx2; \
19
+ INIT_COUNTER(lp, i); \
20
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1); \
21
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2); \
22
+ if (idx1) { \
23
+ if (idx2) { \
24
+ for (; i--;) { \
25
+ proc((p1+*idx1), (p2+*idx2)); \
26
+ idx1++; \
27
+ idx2++; \
28
+ } \
29
+ } else { \
30
+ for (; i--;) { \
31
+ proc((p1+*idx1), p2); \
32
+ idx1++; \
33
+ p2 += s2; \
34
+ } \
35
+ } \
36
+ } else { \
37
+ if (idx2) { \
38
+ for (; i--;) { \
39
+ proc(p1, (p1+*idx2)); \
40
+ p1 += s1; \
41
+ idx2++; \
42
+ } \
43
+ } else { \
44
+ for (; i--;) { \
45
+ proc(p1, p2); \
46
+ p1 += s1; \
47
+ p2 += s2; \
48
+ } \
49
+ } \
50
+ } \
51
+ }
52
+
53
+ #define m_memcpy(src,dst) memcpy(dst,src,e)
54
+ static void
55
+ iter_copy_bytes(na_loop_t *const lp)
56
+ {
57
+ size_t e;
58
+ e = lp->args[0].elmsz;
59
+ // TODO(sonots): CUDA kernelize
60
+ LOOP_UNARY_PTR(lp,m_memcpy);
61
+ }
62
+
63
+ VALUE
64
+ na_copy(VALUE self)
65
+ {
66
+ VALUE v;
67
+ ndfunc_arg_in_t ain[1] = {{Qnil,0}};
68
+ ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
69
+ ndfunc_t ndf = { iter_copy_bytes, FULL_LOOP, 1, 1, ain, aout };
70
+
71
+ v = na_ndloop(&ndf, 1, self);
72
+ return v;
73
+ }
74
+
75
+ VALUE
76
+ na_store(VALUE self, VALUE src)
77
+ {
78
+ return rb_funcall(self,id_store,1,src);
79
+ }
80
+
81
+ // ---------------------------------------------------------------------
82
+
83
+ #define m_swap_byte(q1,q2) \
84
+ { \
85
+ size_t j; \
86
+ memcpy(b1,q1,e); \
87
+ for (j=0; j<e; j++) { \
88
+ b2[e-1-j] = b1[j]; \
89
+ } \
90
+ memcpy(q2,b2,e); \
91
+ }
92
+
93
+ static void
94
+ iter_swap_byte(na_loop_t *const lp)
95
+ {
96
+ char *b1, *b2;
97
+ size_t e;
98
+
99
+ e = lp->args[0].elmsz;
100
+ b1 = ALLOCA_N(char, e);
101
+ b2 = ALLOCA_N(char, e);
102
+ LOOP_UNARY_PTR(lp,m_swap_byte);
103
+ }
104
+
105
+ static VALUE
106
+ nary_swap_byte(VALUE self)
107
+ {
108
+ VALUE v;
109
+ ndfunc_arg_in_t ain[1] = {{Qnil,0}};
110
+ ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
111
+ ndfunc_t ndf = { iter_swap_byte, FULL_LOOP|NDF_ACCEPT_BYTESWAP,
112
+ 1, 1, ain, aout };
113
+
114
+ v = na_ndloop(&ndf, 1, self);
115
+ if (self!=v) {
116
+ na_copy_flags(self, v);
117
+ }
118
+ REVERSE_ENDIAN(v);
119
+ return v;
120
+ }
121
+
122
+
123
+ static VALUE
124
+ nary_to_network(VALUE self)
125
+ {
126
+ if (TEST_BIG_ENDIAN(self)) {
127
+ return self;
128
+ }
129
+ return rb_funcall(self, id_swap_byte, 0);
130
+ }
131
+
132
+ static VALUE
133
+ nary_to_vacs(VALUE self)
134
+ {
135
+ if (TEST_LITTLE_ENDIAN(self)) {
136
+ return self;
137
+ }
138
+ return rb_funcall(self, id_swap_byte, 0);
139
+ }
140
+
141
+ static VALUE
142
+ nary_to_host(VALUE self)
143
+ {
144
+ if (TEST_HOST_ORDER(self)) {
145
+ return self;
146
+ }
147
+ return rb_funcall(self, id_swap_byte, 0);
148
+ }
149
+
150
+ static VALUE
151
+ nary_to_swapped(VALUE self)
152
+ {
153
+ if (TEST_BYTE_SWAPPED(self)) {
154
+ return self;
155
+ }
156
+ return rb_funcall(self, id_swap_byte, 0);
157
+ }
158
+
159
+
160
+ //----------------------------------------------------------------------
161
+
162
+ static inline int
163
+ check_axis(int axis, int ndim)
164
+ {
165
+ if (axis < -ndim || axis >= ndim) {
166
+ rb_raise(nary_eDimensionError,"invalid axis (%d for %d-dimension)",
167
+ axis, ndim);
168
+ }
169
+ if (axis < 0) {
170
+ axis += ndim;
171
+ }
172
+ return axis;
173
+ }
174
+
175
+ /*
176
+ Interchange two axes.
177
+ @overload swapaxes(axis1,axis2)
178
+ @param [Integer] axis1
179
+ @param [Integer] axis2
180
+ @return [Cumo::NArray] view of NArray.
181
+ @example
182
+ x = Cumo::Int32[[1,2,3]]
183
+
184
+ p x.swapaxes(0,1)
185
+ # Cumo::Int32(view)#shape=[3,1]
186
+ # [[1],
187
+ # [2],
188
+ # [3]]
189
+
190
+ p x = Cumo::Int32[[[0,1],[2,3]],[[4,5],[6,7]]]
191
+ # Cumo::Int32#shape=[2,2,2]
192
+ # [[[0, 1],
193
+ # [2, 3]],
194
+ # [[4, 5],
195
+ # [6, 7]]]
196
+
197
+ p x.swapaxes(0,2)
198
+ # Cumo::Int32(view)#shape=[2,2,2]
199
+ # [[[0, 4],
200
+ # [2, 6]],
201
+ # [[1, 5],
202
+ # [3, 7]]]
203
+ */
204
+ static VALUE
205
+ na_swapaxes(VALUE self, VALUE a1, VALUE a2)
206
+ {
207
+ int i, j, ndim;
208
+ size_t tmp_shape;
209
+ stridx_t tmp_stridx;
210
+ narray_view_t *na;
211
+ volatile VALUE view;
212
+
213
+ view = na_make_view(self);
214
+ GetNArrayView(view,na);
215
+
216
+ ndim = na->base.ndim;
217
+ i = check_axis(NUM2INT(a1), ndim);
218
+ j = check_axis(NUM2INT(a2), ndim);
219
+
220
+ tmp_shape = na->base.shape[i];
221
+ tmp_stridx = na->stridx[i];
222
+ na->base.shape[i] = na->base.shape[j];
223
+ na->stridx[i] = na->stridx[j];
224
+ na->base.shape[j] = tmp_shape;
225
+ na->stridx[j] = tmp_stridx;
226
+
227
+ return view;
228
+ }
229
+
230
+ static VALUE
231
+ na_transpose_map(VALUE self, int *map)
232
+ {
233
+ int i, ndim;
234
+ size_t *shape;
235
+ stridx_t *stridx;
236
+ narray_view_t *na;
237
+ volatile VALUE view;
238
+
239
+ view = na_make_view(self);
240
+ GetNArrayView(view,na);
241
+
242
+ ndim = na->base.ndim;
243
+ shape = ALLOCA_N(size_t,ndim);
244
+ stridx = ALLOCA_N(stridx_t,ndim);
245
+
246
+ for (i=0; i<ndim; i++) {
247
+ shape[i] = na->base.shape[i];
248
+ stridx[i] = na->stridx[i];
249
+ }
250
+ for (i=0; i<ndim; i++) {
251
+ na->base.shape[i] = shape[map[i]];
252
+ na->stridx[i] = stridx[map[i]];
253
+ }
254
+ return view;
255
+ }
256
+
257
+
258
+ #define SWAP(a,b,tmp) {tmp=a;a=b;b=tmp;}
259
+
260
+ static VALUE
261
+ na_transpose(int argc, VALUE *argv, VALUE self)
262
+ {
263
+ int ndim, *map, *permute;
264
+ int i, d;
265
+ bool is_positive, is_negative;
266
+ narray_t *na1;
267
+
268
+ GetNArray(self,na1);
269
+ ndim = na1->ndim;
270
+ if (ndim < 2) {
271
+ if (argc > 0) {
272
+ rb_raise(rb_eArgError, "unnecessary argument for 1-d array");
273
+ }
274
+ return na_make_view(self);
275
+ }
276
+ map = ALLOCA_N(int,ndim);
277
+ if (argc == 0) {
278
+ for (i=0; i < ndim; i++) {
279
+ map[i] = ndim-1-i;
280
+ }
281
+ return na_transpose_map(self,map);
282
+ }
283
+ // with argument
284
+ if (argc > ndim) {
285
+ rb_raise(rb_eArgError, "more arguments than ndim");
286
+ }
287
+ for (i=0; i < ndim; i++) {
288
+ map[i] = i;
289
+ }
290
+ permute = ALLOCA_N(int,argc);
291
+ for (i=0; i < argc; i++) {
292
+ permute[i] = 0;
293
+ }
294
+ is_positive = is_negative = 0;
295
+ for (i=0; i < argc; i++) {
296
+ if (TYPE(argv[i]) != T_FIXNUM) {
297
+ rb_raise(rb_eArgError, "invalid argument");
298
+ }
299
+ d = FIX2INT(argv[i]);
300
+ if (d >= 0) {
301
+ if (d >= argc) {
302
+ rb_raise(rb_eArgError, "out of dimension range");
303
+ }
304
+ if (is_negative) {
305
+ rb_raise(rb_eArgError, "dimension must be non-negative only or negative only");
306
+ }
307
+ if (permute[d]) {
308
+ rb_raise(rb_eArgError, "not permutation");
309
+ }
310
+ map[i] = d;
311
+ permute[d] = 1;
312
+ is_positive = 1;
313
+ } else {
314
+ if (d < -argc) {
315
+ rb_raise(rb_eArgError, "out of dimension range");
316
+ }
317
+ if (is_positive) {
318
+ rb_raise(rb_eArgError, "dimension must be non-negative only or negative only");
319
+ }
320
+ if (permute[argc+d]) {
321
+ rb_raise(rb_eArgError, "not permutation");
322
+ }
323
+ map[ndim-argc+i] = ndim+d;
324
+ permute[argc+d] = 1;
325
+ is_negative = 1;
326
+ }
327
+ }
328
+ return na_transpose_map(self,map);
329
+ }
330
+
331
+ //----------------------------------------------------------------------
332
+
333
+ static void
334
+ na_check_reshape(int argc, VALUE *argv, VALUE self, size_t *shape)
335
+ {
336
+ int i, unfixed=-1;
337
+ size_t total=1;
338
+ narray_t *na;
339
+
340
+ if (argc == 0) {
341
+ rb_raise(rb_eArgError, "No argrument");
342
+ }
343
+ GetNArray(self,na);
344
+ if (NA_SIZE(na) == 0) {
345
+ rb_raise(rb_eRuntimeError, "cannot reshape empty array");
346
+ }
347
+
348
+ /* get shape from argument */
349
+ for (i=0; i<argc; ++i) {
350
+ switch(TYPE(argv[i])) {
351
+ case T_FIXNUM:
352
+ total *= shape[i] = NUM2INT(argv[i]);
353
+ break;
354
+ case T_NIL:
355
+ case T_TRUE:
356
+ if (unfixed >= 0) {
357
+ rb_raise(rb_eArgError,"multiple unfixed dimension");
358
+ }
359
+ unfixed = i;
360
+ break;
361
+ default:
362
+ rb_raise(rb_eArgError,"illegal type");
363
+ }
364
+ }
365
+
366
+ if (unfixed>=0) {
367
+ if (NA_SIZE(na) % total != 0) {
368
+ rb_raise(rb_eArgError, "Total size size must be divisor");
369
+ }
370
+ shape[unfixed] = NA_SIZE(na) / total;
371
+ }
372
+ else if (total != NA_SIZE(na)) {
373
+ rb_raise(rb_eArgError, "Total size must be same");
374
+ }
375
+ }
376
+
377
+ /*
378
+ Change the shape of self NArray without coping.
379
+ Raise exception if self is non-contiguous.
380
+
381
+ @overload reshape!(size0,size1,...)
382
+ @param sizeN [Integer] new shape
383
+ @return [Cumo::NArray] return self.
384
+ @example
385
+ */
386
+ static VALUE
387
+ na_reshape_bang(int argc, VALUE *argv, VALUE self)
388
+ {
389
+ size_t *shape;
390
+ narray_t *na;
391
+
392
+ if (na_check_contiguous(self)==Qfalse) {
393
+ rb_raise(rb_eStandardError, "cannot change shape of non-contiguous NArray");
394
+ }
395
+ shape = ALLOCA_N(size_t, argc);
396
+ na_check_reshape(argc, argv, self, shape);
397
+
398
+ GetNArray(self, na);
399
+ na_setup_shape(na, argc, shape);
400
+ return self;
401
+ }
402
+
403
+ /*
404
+ Copy and change the shape of NArray.
405
+ Returns a copied NArray.
406
+
407
+ @overload reshape(size0,size1,...)
408
+ @param sizeN [Integer] new shape
409
+ @return [Cumo::NArray] return self.
410
+ @example
411
+ */
412
+ static VALUE
413
+ na_reshape(int argc, VALUE *argv, VALUE self)
414
+ {
415
+ size_t *shape;
416
+ narray_t *na;
417
+ VALUE copy;
418
+
419
+ shape = ALLOCA_N(size_t, argc);
420
+ na_check_reshape(argc, argv, self, shape);
421
+
422
+ copy = rb_funcall(self, rb_intern("dup"), 0);
423
+ GetNArray(copy, na);
424
+ na_setup_shape(na, argc, shape);
425
+ return copy;
426
+ }
427
+
428
+ //----------------------------------------------------------------------
429
+
430
+ VALUE
431
+ na_flatten_dim(VALUE self, int sd)
432
+ {
433
+ int i, nd, fd;
434
+ size_t j;
435
+ size_t *c, *pos, *idx1, *idx2;
436
+ size_t stride;
437
+ size_t *shape, size;
438
+ stridx_t sdx;
439
+ narray_t *na;
440
+ narray_view_t *na1, *na2;
441
+ volatile VALUE view;
442
+
443
+ GetNArray(self,na);
444
+ nd = na->ndim;
445
+
446
+ if (nd==0) {
447
+ return na_make_view(self);
448
+ }
449
+ if (sd<0 || sd>=nd) {
450
+ rb_bug("na_flaten_dim: start_dim (%d) out of range",sd);
451
+ }
452
+
453
+ // new shape
454
+ shape = ALLOCA_N(size_t,sd+1);
455
+ for (i=0; i<sd; i++) {
456
+ shape[i] = na->shape[i];
457
+ }
458
+ size = 1;
459
+ for (i=sd; i<nd; i++) {
460
+ size *= na->shape[i];
461
+ }
462
+ shape[sd] = size;
463
+
464
+ // new object
465
+ view = na_s_allocate_view(CLASS_OF(self));
466
+ na_copy_flags(self, view);
467
+ GetNArrayView(view, na2);
468
+
469
+ // new stride
470
+ na_setup_shape((narray_t*)na2, sd+1, shape);
471
+ na2->stridx = ALLOC_N(stridx_t,sd+1);
472
+
473
+ switch(na->type) {
474
+ case NARRAY_DATA_T:
475
+ case NARRAY_FILEMAP_T:
476
+ stride = nary_element_stride(self);
477
+ for (i=sd+1; i--; ) {
478
+ //printf("data: i=%d shpae[i]=%ld stride=%ld\n",i,shape[i],stride);
479
+ SDX_SET_STRIDE(na2->stridx[i],stride);
480
+ stride *= shape[i];
481
+ }
482
+ na2->offset = 0;
483
+ na2->data = self;
484
+ break;
485
+ case NARRAY_VIEW_T:
486
+ GetNArrayView(self, na1);
487
+ na2->data = na1->data;
488
+ na2->offset = na1->offset;
489
+ for (i=0; i<sd; i++) {
490
+ if (SDX_IS_INDEX(na1->stridx[i])) {
491
+ idx1 = SDX_GET_INDEX(na1->stridx[i]);
492
+ idx2 = ALLOC_N(size_t, shape[i]);
493
+ for (j=0; j<shape[i]; j++) {
494
+ idx2[j] = idx1[j];
495
+ }
496
+ SDX_SET_INDEX(na2->stridx[i],idx2);
497
+ } else {
498
+ na2->stridx[i] = na1->stridx[i];
499
+ //printf("view: i=%d stridx=%d\n",i,SDX_GET_STRIDE(sdx));
500
+ }
501
+ }
502
+ // flat dimenion == last dimension
503
+ if (RTEST(na_check_ladder(self,sd))) {
504
+ //if (0) {
505
+ na2->stridx[sd] = na1->stridx[nd-1];
506
+ } else {
507
+ // set index
508
+ idx2 = ALLOC_N(size_t, shape[sd]);
509
+ SDX_SET_INDEX(na2->stridx[sd],idx2);
510
+ // init for md-loop
511
+ fd = nd-sd;
512
+ c = ALLOC_N(size_t, fd);
513
+ for (i=0; i<fd; i++) c[i]=0;
514
+ pos = ALLOC_N(size_t, fd+1);
515
+ pos[0] = 0;
516
+ // md-loop
517
+ for (i=j=0;;) {
518
+ for (; i<fd; i++) {
519
+ sdx = na1->stridx[i+sd];
520
+ if (SDX_IS_INDEX(sdx)) {
521
+ pos[i+1] = pos[i] + SDX_GET_INDEX(sdx)[c[i]];
522
+ } else {
523
+ pos[i+1] = pos[i] + SDX_GET_STRIDE(sdx)*c[i];
524
+ }
525
+ }
526
+ idx2[j++] = pos[i];
527
+ for (;;) {
528
+ if (i==0) goto loop_end;
529
+ i--;
530
+ c[i]++;
531
+ if (c[i] < na1->base.shape[i+sd]) break;
532
+ c[i] = 0;
533
+ }
534
+ }
535
+ loop_end:
536
+ xfree(pos);
537
+ xfree(c);
538
+ }
539
+ break;
540
+ }
541
+ return view;
542
+ }
543
+
544
+ VALUE
545
+ na_flatten(VALUE self)
546
+ {
547
+ return na_flatten_dim(self,0);
548
+ }
549
+
550
+ //----------------------------------------------------------------------
551
+
552
+ #define MIN(a,b) (((a)<(b))?(a):(b))
553
+
554
+ /*
555
+ Returns a diagonal view of NArray
556
+ @overload diagonal([offset,axes])
557
+ @param [Integer] offset Diagonal offset from the main diagonal.
558
+ The default is 0. k>0 for diagonals above the main diagonal,
559
+ and k<0 for diagonals below the main diagonal.
560
+ @param [Array] axes Array of axes to be used as the 2-d sub-arrays
561
+ from which the diagonals should be taken. Defaults to last-two
562
+ axes ([-2,-1]).
563
+ @return [Cumo::NArray] diagonal view of NArray.
564
+ @example
565
+ a = Cumo::DFloat.new(4,5).seq
566
+ => Cumo::DFloat#shape=[4,5]
567
+ [[0, 1, 2, 3, 4],
568
+ [5, 6, 7, 8, 9],
569
+ [10, 11, 12, 13, 14],
570
+ [15, 16, 17, 18, 19]]
571
+ b = a.diagonal(1)
572
+ => Cumo::DFloat(view)#shape=[4]
573
+ [1, 7, 13, 19]
574
+ b.store(0)
575
+ a
576
+ => Cumo::DFloat#shape=[4,5]
577
+ [[0, 0, 2, 3, 4],
578
+ [5, 6, 0, 8, 9],
579
+ [10, 11, 12, 0, 14],
580
+ [15, 16, 17, 18, 0]]
581
+ b.store([1,2,3,4])
582
+ a
583
+ => Cumo::DFloat#shape=[4,5]
584
+ [[0, 1, 2, 3, 4],
585
+ [5, 6, 2, 8, 9],
586
+ [10, 11, 12, 3, 14],
587
+ [15, 16, 17, 18, 4]]
588
+ */
589
+ static VALUE
590
+ na_diagonal(int argc, VALUE *argv, VALUE self)
591
+ {
592
+ int i, k, nd;
593
+ size_t j;
594
+ size_t *idx0, *idx1, *diag_idx;
595
+ size_t *shape;
596
+ size_t diag_size;
597
+ ssize_t stride, stride0, stride1;
598
+ narray_t *na;
599
+ narray_view_t *na1, *na2;
600
+ VALUE view;
601
+ VALUE vofs=0, vaxes=0;
602
+ ssize_t kofs;
603
+ size_t k0, k1;
604
+ int ax[2];
605
+
606
+ // check arguments
607
+ if (argc>2) {
608
+ rb_raise(rb_eArgError,"too many arguments (%d for 0..2)",argc);
609
+ }
610
+
611
+ for (i=0; i<argc; i++) {
612
+ switch(TYPE(argv[i])) {
613
+ case T_FIXNUM:
614
+ if (vofs) {
615
+ rb_raise(rb_eArgError,"offset is given twice");
616
+ }
617
+ vofs = argv[i];
618
+ break;
619
+ case T_ARRAY:
620
+ if (vaxes) {
621
+ rb_raise(rb_eArgError,"axes-array is given twice");
622
+ }
623
+ vaxes = argv[i];
624
+ break;
625
+ }
626
+ }
627
+
628
+ if (vofs) {
629
+ kofs = NUM2SSIZET(vofs);
630
+ } else {
631
+ kofs = 0;
632
+ }
633
+
634
+ GetNArray(self,na);
635
+ nd = na->ndim;
636
+ if (nd < 2) {
637
+ rb_raise(nary_eDimensionError,"less than 2-d array");
638
+ }
639
+
640
+ if (vaxes) {
641
+ if (RARRAY_LEN(vaxes) != 2) {
642
+ rb_raise(rb_eArgError,"axes must be 2-element array");
643
+ }
644
+ ax[0] = NUM2INT(RARRAY_AREF(vaxes,0));
645
+ ax[1] = NUM2INT(RARRAY_AREF(vaxes,1));
646
+ if (ax[0]<-nd || ax[0]>=nd || ax[1]<-nd || ax[1]>=nd) {
647
+ rb_raise(rb_eArgError,"axis out of range:[%d,%d]",ax[0],ax[1]);
648
+ }
649
+ if (ax[0]<0) {ax[0] += nd;}
650
+ if (ax[1]<0) {ax[1] += nd;}
651
+ if (ax[0]==ax[1]) {
652
+ rb_raise(rb_eArgError,"same axes:[%d,%d]",ax[0],ax[1]);
653
+ }
654
+ } else {
655
+ ax[0] = nd-2;
656
+ ax[1] = nd-1;
657
+ }
658
+
659
+ // Diagonal offset from the main diagonal.
660
+ if (kofs >= 0) {
661
+ k0 = 0;
662
+ k1 = kofs;
663
+ if (k1 >= na->shape[ax[1]]) {
664
+ rb_raise(rb_eArgError,"invalid diagonal offset(%"SZF"d) for "
665
+ "last dimension size(%"SZF"d)",kofs,na->shape[ax[1]]);
666
+ }
667
+ } else {
668
+ k0 = -kofs;
669
+ k1 = 0;
670
+ if (k0 >= na->shape[ax[0]]) {
671
+ rb_raise(rb_eArgError,"invalid diagonal offset(=%"SZF"d) for "
672
+ "last-1 dimension size(%"SZF"d)",kofs,na->shape[ax[0]]);
673
+ }
674
+ }
675
+
676
+ diag_size = MIN(na->shape[ax[0]]-k0,na->shape[ax[1]]-k1);
677
+
678
+ // new shape
679
+ shape = ALLOCA_N(size_t,nd-1);
680
+ for (i=k=0; i<nd; i++) {
681
+ if (i != ax[0] && i != ax[1]) {
682
+ shape[k++] = na->shape[i];
683
+ }
684
+ }
685
+ shape[k] = diag_size;
686
+
687
+ // new object
688
+ view = na_s_allocate_view(CLASS_OF(self));
689
+ na_copy_flags(self, view);
690
+ GetNArrayView(view, na2);
691
+
692
+ // new stride
693
+ na_setup_shape((narray_t*)na2, nd-1, shape);
694
+ na2->stridx = ALLOC_N(stridx_t, nd-1);
695
+
696
+ switch(na->type) {
697
+ case NARRAY_DATA_T:
698
+ case NARRAY_FILEMAP_T:
699
+ na2->offset = 0;
700
+ na2->data = self;
701
+ stride = stride0 = stride1 = nary_element_stride(self);
702
+ for (i=nd,k=nd-2; i--; ) {
703
+ if (i==ax[1]) {
704
+ stride1 = stride;
705
+ if (kofs > 0) {
706
+ na2->offset = kofs*stride;
707
+ }
708
+ } else if (i==ax[0]) {
709
+ stride0 = stride;
710
+ if (kofs < 0) {
711
+ na2->offset = (-kofs)*stride;
712
+ }
713
+ } else {
714
+ SDX_SET_STRIDE(na2->stridx[--k],stride);
715
+ }
716
+ stride *= na->shape[i];
717
+ }
718
+ SDX_SET_STRIDE(na2->stridx[nd-2],stride0+stride1);
719
+ break;
720
+
721
+ case NARRAY_VIEW_T:
722
+ GetNArrayView(self, na1);
723
+ na2->data = na1->data;
724
+ na2->offset = na1->offset;
725
+ for (i=k=0; i<nd; i++) {
726
+ if (i != ax[0] && i != ax[1]) {
727
+ if (SDX_IS_INDEX(na1->stridx[i])) {
728
+ idx0 = SDX_GET_INDEX(na1->stridx[i]);
729
+ idx1 = ALLOC_N(size_t, na->shape[i]);
730
+ for (j=0; j<na->shape[i]; j++) {
731
+ idx1[j] = idx0[j];
732
+ }
733
+ SDX_SET_INDEX(na2->stridx[k],idx1);
734
+ } else {
735
+ na2->stridx[k] = na1->stridx[i];
736
+ }
737
+ k++;
738
+ }
739
+ }
740
+ if (SDX_IS_INDEX(na1->stridx[ax[0]])) {
741
+ idx0 = SDX_GET_INDEX(na1->stridx[ax[0]]);
742
+ diag_idx = ALLOC_N(size_t, diag_size);
743
+ if (SDX_IS_INDEX(na1->stridx[ax[1]])) {
744
+ idx1 = SDX_GET_INDEX(na1->stridx[ax[1]]);
745
+ for (j=0; j<diag_size; j++) {
746
+ diag_idx[j] = idx0[j+k0] + idx1[j+k1];
747
+ }
748
+ } else {
749
+ stride1 = SDX_GET_STRIDE(na1->stridx[ax[1]]);
750
+ for (j=0; j<diag_size; j++) {
751
+ diag_idx[j] = idx0[j+k0] + stride1*(j+k1);
752
+ }
753
+ }
754
+ SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
755
+ } else {
756
+ stride0 = SDX_GET_STRIDE(na1->stridx[ax[0]]);
757
+ if (SDX_IS_INDEX(na1->stridx[ax[1]])) {
758
+ idx1 = SDX_GET_INDEX(na1->stridx[ax[1]]);
759
+ diag_idx = ALLOC_N(size_t, diag_size);
760
+ for (j=0; j<diag_size; j++) {
761
+ diag_idx[j] = stride0*(j+k0) + idx1[j+k1];
762
+ }
763
+ SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
764
+ } else {
765
+ stride1 = SDX_GET_STRIDE(na1->stridx[ax[1]]);
766
+ na2->offset += stride0*k0 + stride1*k1;
767
+ SDX_SET_STRIDE(na2->stridx[nd-2],stride0+stride1);
768
+ }
769
+ }
770
+ break;
771
+ }
772
+ return view;
773
+ }
774
+
775
+ //----------------------------------------------------------------------
776
+
777
+
778
+ #if 0
779
+ #ifdef SWAP
780
+ #undef SWAP
781
+ #endif
782
+ #define SWAP(a,b,t) {t=a;a=b;b=t;}
783
+
784
+ static VALUE
785
+ na_new_dimension_for_dot(VALUE self, int pos, int len, bool transpose)
786
+ {
787
+ int i, k, l, nd;
788
+ size_t j;
789
+ size_t *idx1, *idx2;
790
+ size_t *shape;
791
+ ssize_t stride;
792
+ narray_t *na;
793
+ narray_view_t *na1, *na2;
794
+ size_t shape_n;
795
+ stridx_t stridx_n;
796
+ volatile VALUE view;
797
+
798
+ GetNArray(self,na);
799
+ nd = na->ndim;
800
+
801
+ view = na_s_allocate_view(CLASS_OF(self));
802
+
803
+ na_copy_flags(self, view);
804
+ GetNArrayView(view, na2);
805
+
806
+ // new dimension
807
+ if (pos < 0) pos += nd;
808
+ if (pos > nd || pos < 0) {
809
+ rb_raise(rb_eRangeError,"new dimension is out of range");
810
+ }
811
+ nd += len;
812
+ shape = ALLOCA_N(size_t,nd);
813
+ na2->stridx = ALLOC_N(stridx_t,nd);
814
+
815
+ switch(na->type) {
816
+ case NARRAY_DATA_T:
817
+ case NARRAY_FILEMAP_T:
818
+ i = k = 0;
819
+ while (i < nd) {
820
+ if (i == pos && len > 0) {
821
+ for (l=0; l<len; l++) {
822
+ shape[i++] = 1;
823
+ }
824
+ } else {
825
+ shape[i++] = na->shape[k++];
826
+ }
827
+ }
828
+ na_setup_shape((narray_t*)na2, nd, shape);
829
+ stride = nary_element_stride(self);
830
+ for (i=nd; i--;) {
831
+ SDX_SET_STRIDE(na2->stridx[i], stride);
832
+ stride *= shape[i];
833
+ }
834
+ na2->offset = 0;
835
+ na2->data = self;
836
+ break;
837
+ case NARRAY_VIEW_T:
838
+ GetNArrayView(self, na1);
839
+ i = k = 0;
840
+ while (i < nd) {
841
+ if (i == pos && len > 0) {
842
+ if (SDX_IS_INDEX(na1->stridx[k])) {
843
+ stride = SDX_GET_INDEX(na1->stridx[k])[0];
844
+ } else {
845
+ stride = SDX_GET_STRIDE(na1->stridx[k]);
846
+ }
847
+ for (l=0; l<len; l++) {
848
+ shape[i] = 1;
849
+ SDX_SET_STRIDE(na2->stridx[i], stride);
850
+ i++;
851
+ }
852
+ } else {
853
+ shape[i] = na1->base.shape[k];
854
+ if (SDX_IS_INDEX(na1->stridx[k])) {
855
+ idx1 = SDX_GET_INDEX(na1->stridx[k]);
856
+ idx2 = ALLOC_N(size_t,na1->base.shape[k]);
857
+ for (j=0; j<na1->base.shape[k]; j++) {
858
+ idx2[j] = idx1[j];
859
+ }
860
+ SDX_SET_INDEX(na2->stridx[i], idx2);
861
+ } else {
862
+ na2->stridx[i] = na1->stridx[k];
863
+ }
864
+ i++; k++;
865
+ }
866
+ }
867
+ na_setup_shape((narray_t*)na2, nd, shape);
868
+ na2->offset = na1->offset;
869
+ na2->data = na1->data;
870
+ break;
871
+ }
872
+
873
+ if (transpose) {
874
+ SWAP(na2->base.shape[nd-1], na2->base.shape[nd-2], shape_n);
875
+ SWAP(na2->stridx[nd-1], na2->stridx[nd-2], stridx_n);
876
+ }
877
+
878
+ return view;
879
+ }
880
+
881
+
882
+ //----------------------------------------------------------------------
883
+
884
+ /*
885
+ * call-seq:
886
+ * narray.dot(other) => narray
887
+ *
888
+ * Returns dot product.
889
+ *
890
+ */
891
+
892
+ static VALUE
893
+ cumo_na_dot(VALUE self, VALUE other)
894
+ {
895
+ VALUE test;
896
+ volatile VALUE a1=self, a2=other;
897
+ narray_t *na1, *na2;
898
+
899
+ test = rb_funcall(a1, id_respond_to_p, 1, sym_mulsum);
900
+ if (!RTEST(test)) {
901
+ rb_raise(rb_eNoMethodError,"requires mulsum method for dot method");
902
+ }
903
+ GetNArray(a1,na1);
904
+ GetNArray(a2,na2);
905
+ if (na1->ndim==0 || na2->ndim==0) {
906
+ rb_raise(nary_eDimensionError,"zero dimensional narray");
907
+ }
908
+ if (na2->ndim > 1) {
909
+ if (na1->shape[na1->ndim-1] != na2->shape[na2->ndim-2]) {
910
+ rb_raise(nary_eShapeError,"shape mismatch: self.shape[-1](=%"SZF"d) != other.shape[-2](=%"SZF"d)",
911
+ na1->shape[na1->ndim-1], na2->shape[na2->ndim-2]);
912
+ }
913
+ // insert new axis [ ..., last-1-dim, newaxis*other.ndim, last-dim ]
914
+ a1 = na_new_dimension_for_dot(a1, na1->ndim-1, na2->ndim-1, 0);
915
+ // insert & transpose [ newaxis*self.ndim, ..., last-dim, last-1-dim ]
916
+ a2 = na_new_dimension_for_dot(a2, 0, na1->ndim-1, 1);
917
+ }
918
+ return rb_funcall(a1,id_mulsum,2,a2,INT2FIX(-1));
919
+ }
920
+ #endif
921
+
922
+ void
923
+ Init_cumo_nary_data()
924
+ {
925
+ rb_define_method(cNArray, "copy", na_copy, 0); // deprecated
926
+
927
+ rb_define_method(cNArray, "flatten", na_flatten, 0);
928
+ rb_define_method(cNArray, "swapaxes", na_swapaxes, 2);
929
+ rb_define_method(cNArray, "transpose", na_transpose, -1);
930
+
931
+ rb_define_method(cNArray, "reshape", na_reshape,-1);
932
+ rb_define_method(cNArray, "reshape!", na_reshape_bang,-1);
933
+ /*
934
+ rb_define_alias(cNArray, "shape=","reshape!");
935
+ */
936
+ rb_define_method(cNArray, "diagonal", na_diagonal,-1);
937
+
938
+ rb_define_method(cNArray, "swap_byte", nary_swap_byte, 0);
939
+ #ifdef DYNAMIC_ENDIAN
940
+ #else
941
+ #ifdef WORDS_BIGENDIAN
942
+ #else // LITTLE_ENDIAN
943
+ rb_define_alias(cNArray, "hton", "swap_byte");
944
+ rb_define_alias(cNArray, "network_order?", "byte_swapped?");
945
+ rb_define_alias(cNArray, "little_endian?", "host_order?");
946
+ rb_define_alias(cNArray, "vacs_order?", "host_order?");
947
+ #endif
948
+ #endif
949
+ rb_define_method(cNArray, "to_network", nary_to_network, 0);
950
+ rb_define_method(cNArray, "to_vacs", nary_to_vacs, 0);
951
+ rb_define_method(cNArray, "to_host", nary_to_host, 0);
952
+ rb_define_method(cNArray, "to_swapped", nary_to_swapped, 0);
953
+
954
+ //rb_define_method(cNArray, "dot", cumo_na_dot, 1);
955
+
956
+ id_mulsum = rb_intern("mulsum");
957
+ sym_mulsum = ID2SYM(id_mulsum);
958
+ id_respond_to_p = rb_intern("respond_to?");
959
+ id_store = rb_intern("store");
960
+ id_swap_byte = rb_intern("swap_byte");
961
+ }