cumo 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,961 @@
1
+ #include <ruby.h>
2
+ #include "cumo/narray.h"
3
+ #include "cumo/template.h"
4
+
5
+ static VALUE sym_mulsum;
6
+ static ID id_mulsum;
7
+ static ID id_respond_to_p;
8
+ static ID id_store;
9
+ static ID id_swap_byte;
10
+
11
+ // ---------------------------------------------------------------------
12
+
13
+ #define LOOP_UNARY_PTR(lp,proc) \
14
+ { \
15
+ size_t i; \
16
+ ssize_t s1, s2; \
17
+ char *p1, *p2; \
18
+ size_t *idx1, *idx2; \
19
+ INIT_COUNTER(lp, i); \
20
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1); \
21
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2); \
22
+ if (idx1) { \
23
+ if (idx2) { \
24
+ for (; i--;) { \
25
+ proc((p1+*idx1), (p2+*idx2)); \
26
+ idx1++; \
27
+ idx2++; \
28
+ } \
29
+ } else { \
30
+ for (; i--;) { \
31
+ proc((p1+*idx1), p2); \
32
+ idx1++; \
33
+ p2 += s2; \
34
+ } \
35
+ } \
36
+ } else { \
37
+ if (idx2) { \
38
+ for (; i--;) { \
39
+ proc(p1, (p1+*idx2)); \
40
+ p1 += s1; \
41
+ idx2++; \
42
+ } \
43
+ } else { \
44
+ for (; i--;) { \
45
+ proc(p1, p2); \
46
+ p1 += s1; \
47
+ p2 += s2; \
48
+ } \
49
+ } \
50
+ } \
51
+ }
52
+
53
+ #define m_memcpy(src,dst) memcpy(dst,src,e)
54
+ static void
55
+ iter_copy_bytes(na_loop_t *const lp)
56
+ {
57
+ size_t e;
58
+ e = lp->args[0].elmsz;
59
+ // TODO(sonots): CUDA kernelize
60
+ LOOP_UNARY_PTR(lp,m_memcpy);
61
+ }
62
+
63
+ VALUE
64
+ na_copy(VALUE self)
65
+ {
66
+ VALUE v;
67
+ ndfunc_arg_in_t ain[1] = {{Qnil,0}};
68
+ ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
69
+ ndfunc_t ndf = { iter_copy_bytes, FULL_LOOP, 1, 1, ain, aout };
70
+
71
+ v = na_ndloop(&ndf, 1, self);
72
+ return v;
73
+ }
74
+
75
+ VALUE
76
+ na_store(VALUE self, VALUE src)
77
+ {
78
+ return rb_funcall(self,id_store,1,src);
79
+ }
80
+
81
+ // ---------------------------------------------------------------------
82
+
83
+ #define m_swap_byte(q1,q2) \
84
+ { \
85
+ size_t j; \
86
+ memcpy(b1,q1,e); \
87
+ for (j=0; j<e; j++) { \
88
+ b2[e-1-j] = b1[j]; \
89
+ } \
90
+ memcpy(q2,b2,e); \
91
+ }
92
+
93
+ static void
94
+ iter_swap_byte(na_loop_t *const lp)
95
+ {
96
+ char *b1, *b2;
97
+ size_t e;
98
+
99
+ e = lp->args[0].elmsz;
100
+ b1 = ALLOCA_N(char, e);
101
+ b2 = ALLOCA_N(char, e);
102
+ LOOP_UNARY_PTR(lp,m_swap_byte);
103
+ }
104
+
105
+ static VALUE
106
+ nary_swap_byte(VALUE self)
107
+ {
108
+ VALUE v;
109
+ ndfunc_arg_in_t ain[1] = {{Qnil,0}};
110
+ ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
111
+ ndfunc_t ndf = { iter_swap_byte, FULL_LOOP|NDF_ACCEPT_BYTESWAP,
112
+ 1, 1, ain, aout };
113
+
114
+ v = na_ndloop(&ndf, 1, self);
115
+ if (self!=v) {
116
+ na_copy_flags(self, v);
117
+ }
118
+ REVERSE_ENDIAN(v);
119
+ return v;
120
+ }
121
+
122
+
123
+ static VALUE
124
+ nary_to_network(VALUE self)
125
+ {
126
+ if (TEST_BIG_ENDIAN(self)) {
127
+ return self;
128
+ }
129
+ return rb_funcall(self, id_swap_byte, 0);
130
+ }
131
+
132
+ static VALUE
133
+ nary_to_vacs(VALUE self)
134
+ {
135
+ if (TEST_LITTLE_ENDIAN(self)) {
136
+ return self;
137
+ }
138
+ return rb_funcall(self, id_swap_byte, 0);
139
+ }
140
+
141
+ static VALUE
142
+ nary_to_host(VALUE self)
143
+ {
144
+ if (TEST_HOST_ORDER(self)) {
145
+ return self;
146
+ }
147
+ return rb_funcall(self, id_swap_byte, 0);
148
+ }
149
+
150
+ static VALUE
151
+ nary_to_swapped(VALUE self)
152
+ {
153
+ if (TEST_BYTE_SWAPPED(self)) {
154
+ return self;
155
+ }
156
+ return rb_funcall(self, id_swap_byte, 0);
157
+ }
158
+
159
+
160
+ //----------------------------------------------------------------------
161
+
162
+ static inline int
163
+ check_axis(int axis, int ndim)
164
+ {
165
+ if (axis < -ndim || axis >= ndim) {
166
+ rb_raise(nary_eDimensionError,"invalid axis (%d for %d-dimension)",
167
+ axis, ndim);
168
+ }
169
+ if (axis < 0) {
170
+ axis += ndim;
171
+ }
172
+ return axis;
173
+ }
174
+
175
+ /*
176
+ Interchange two axes.
177
+ @overload swapaxes(axis1,axis2)
178
+ @param [Integer] axis1
179
+ @param [Integer] axis2
180
+ @return [Cumo::NArray] view of NArray.
181
+ @example
182
+ x = Cumo::Int32[[1,2,3]]
183
+
184
+ p x.swapaxes(0,1)
185
+ # Cumo::Int32(view)#shape=[3,1]
186
+ # [[1],
187
+ # [2],
188
+ # [3]]
189
+
190
+ p x = Cumo::Int32[[[0,1],[2,3]],[[4,5],[6,7]]]
191
+ # Cumo::Int32#shape=[2,2,2]
192
+ # [[[0, 1],
193
+ # [2, 3]],
194
+ # [[4, 5],
195
+ # [6, 7]]]
196
+
197
+ p x.swapaxes(0,2)
198
+ # Cumo::Int32(view)#shape=[2,2,2]
199
+ # [[[0, 4],
200
+ # [2, 6]],
201
+ # [[1, 5],
202
+ # [3, 7]]]
203
+ */
204
+ static VALUE
205
+ na_swapaxes(VALUE self, VALUE a1, VALUE a2)
206
+ {
207
+ int i, j, ndim;
208
+ size_t tmp_shape;
209
+ stridx_t tmp_stridx;
210
+ narray_view_t *na;
211
+ volatile VALUE view;
212
+
213
+ view = na_make_view(self);
214
+ GetNArrayView(view,na);
215
+
216
+ ndim = na->base.ndim;
217
+ i = check_axis(NUM2INT(a1), ndim);
218
+ j = check_axis(NUM2INT(a2), ndim);
219
+
220
+ tmp_shape = na->base.shape[i];
221
+ tmp_stridx = na->stridx[i];
222
+ na->base.shape[i] = na->base.shape[j];
223
+ na->stridx[i] = na->stridx[j];
224
+ na->base.shape[j] = tmp_shape;
225
+ na->stridx[j] = tmp_stridx;
226
+
227
+ return view;
228
+ }
229
+
230
+ static VALUE
231
+ na_transpose_map(VALUE self, int *map)
232
+ {
233
+ int i, ndim;
234
+ size_t *shape;
235
+ stridx_t *stridx;
236
+ narray_view_t *na;
237
+ volatile VALUE view;
238
+
239
+ view = na_make_view(self);
240
+ GetNArrayView(view,na);
241
+
242
+ ndim = na->base.ndim;
243
+ shape = ALLOCA_N(size_t,ndim);
244
+ stridx = ALLOCA_N(stridx_t,ndim);
245
+
246
+ for (i=0; i<ndim; i++) {
247
+ shape[i] = na->base.shape[i];
248
+ stridx[i] = na->stridx[i];
249
+ }
250
+ for (i=0; i<ndim; i++) {
251
+ na->base.shape[i] = shape[map[i]];
252
+ na->stridx[i] = stridx[map[i]];
253
+ }
254
+ return view;
255
+ }
256
+
257
+
258
+ #define SWAP(a,b,tmp) {tmp=a;a=b;b=tmp;}
259
+
260
+ static VALUE
261
+ na_transpose(int argc, VALUE *argv, VALUE self)
262
+ {
263
+ int ndim, *map, *permute;
264
+ int i, d;
265
+ bool is_positive, is_negative;
266
+ narray_t *na1;
267
+
268
+ GetNArray(self,na1);
269
+ ndim = na1->ndim;
270
+ if (ndim < 2) {
271
+ if (argc > 0) {
272
+ rb_raise(rb_eArgError, "unnecessary argument for 1-d array");
273
+ }
274
+ return na_make_view(self);
275
+ }
276
+ map = ALLOCA_N(int,ndim);
277
+ if (argc == 0) {
278
+ for (i=0; i < ndim; i++) {
279
+ map[i] = ndim-1-i;
280
+ }
281
+ return na_transpose_map(self,map);
282
+ }
283
+ // with argument
284
+ if (argc > ndim) {
285
+ rb_raise(rb_eArgError, "more arguments than ndim");
286
+ }
287
+ for (i=0; i < ndim; i++) {
288
+ map[i] = i;
289
+ }
290
+ permute = ALLOCA_N(int,argc);
291
+ for (i=0; i < argc; i++) {
292
+ permute[i] = 0;
293
+ }
294
+ is_positive = is_negative = 0;
295
+ for (i=0; i < argc; i++) {
296
+ if (TYPE(argv[i]) != T_FIXNUM) {
297
+ rb_raise(rb_eArgError, "invalid argument");
298
+ }
299
+ d = FIX2INT(argv[i]);
300
+ if (d >= 0) {
301
+ if (d >= argc) {
302
+ rb_raise(rb_eArgError, "out of dimension range");
303
+ }
304
+ if (is_negative) {
305
+ rb_raise(rb_eArgError, "dimension must be non-negative only or negative only");
306
+ }
307
+ if (permute[d]) {
308
+ rb_raise(rb_eArgError, "not permutation");
309
+ }
310
+ map[i] = d;
311
+ permute[d] = 1;
312
+ is_positive = 1;
313
+ } else {
314
+ if (d < -argc) {
315
+ rb_raise(rb_eArgError, "out of dimension range");
316
+ }
317
+ if (is_positive) {
318
+ rb_raise(rb_eArgError, "dimension must be non-negative only or negative only");
319
+ }
320
+ if (permute[argc+d]) {
321
+ rb_raise(rb_eArgError, "not permutation");
322
+ }
323
+ map[ndim-argc+i] = ndim+d;
324
+ permute[argc+d] = 1;
325
+ is_negative = 1;
326
+ }
327
+ }
328
+ return na_transpose_map(self,map);
329
+ }
330
+
331
+ //----------------------------------------------------------------------
332
+
333
+ static void
334
+ na_check_reshape(int argc, VALUE *argv, VALUE self, size_t *shape)
335
+ {
336
+ int i, unfixed=-1;
337
+ size_t total=1;
338
+ narray_t *na;
339
+
340
+ if (argc == 0) {
341
+ rb_raise(rb_eArgError, "No argrument");
342
+ }
343
+ GetNArray(self,na);
344
+ if (NA_SIZE(na) == 0) {
345
+ rb_raise(rb_eRuntimeError, "cannot reshape empty array");
346
+ }
347
+
348
+ /* get shape from argument */
349
+ for (i=0; i<argc; ++i) {
350
+ switch(TYPE(argv[i])) {
351
+ case T_FIXNUM:
352
+ total *= shape[i] = NUM2INT(argv[i]);
353
+ break;
354
+ case T_NIL:
355
+ case T_TRUE:
356
+ if (unfixed >= 0) {
357
+ rb_raise(rb_eArgError,"multiple unfixed dimension");
358
+ }
359
+ unfixed = i;
360
+ break;
361
+ default:
362
+ rb_raise(rb_eArgError,"illegal type");
363
+ }
364
+ }
365
+
366
+ if (unfixed>=0) {
367
+ if (NA_SIZE(na) % total != 0) {
368
+ rb_raise(rb_eArgError, "Total size size must be divisor");
369
+ }
370
+ shape[unfixed] = NA_SIZE(na) / total;
371
+ }
372
+ else if (total != NA_SIZE(na)) {
373
+ rb_raise(rb_eArgError, "Total size must be same");
374
+ }
375
+ }
376
+
377
+ /*
378
+ Change the shape of self NArray without coping.
379
+ Raise exception if self is non-contiguous.
380
+
381
+ @overload reshape!(size0,size1,...)
382
+ @param sizeN [Integer] new shape
383
+ @return [Cumo::NArray] return self.
384
+ @example
385
+ */
386
+ static VALUE
387
+ na_reshape_bang(int argc, VALUE *argv, VALUE self)
388
+ {
389
+ size_t *shape;
390
+ narray_t *na;
391
+
392
+ if (na_check_contiguous(self)==Qfalse) {
393
+ rb_raise(rb_eStandardError, "cannot change shape of non-contiguous NArray");
394
+ }
395
+ shape = ALLOCA_N(size_t, argc);
396
+ na_check_reshape(argc, argv, self, shape);
397
+
398
+ GetNArray(self, na);
399
+ na_setup_shape(na, argc, shape);
400
+ return self;
401
+ }
402
+
403
+ /*
404
+ Copy and change the shape of NArray.
405
+ Returns a copied NArray.
406
+
407
+ @overload reshape(size0,size1,...)
408
+ @param sizeN [Integer] new shape
409
+ @return [Cumo::NArray] return self.
410
+ @example
411
+ */
412
+ static VALUE
413
+ na_reshape(int argc, VALUE *argv, VALUE self)
414
+ {
415
+ size_t *shape;
416
+ narray_t *na;
417
+ VALUE copy;
418
+
419
+ shape = ALLOCA_N(size_t, argc);
420
+ na_check_reshape(argc, argv, self, shape);
421
+
422
+ copy = rb_funcall(self, rb_intern("dup"), 0);
423
+ GetNArray(copy, na);
424
+ na_setup_shape(na, argc, shape);
425
+ return copy;
426
+ }
427
+
428
+ //----------------------------------------------------------------------
429
+
430
+ VALUE
431
+ na_flatten_dim(VALUE self, int sd)
432
+ {
433
+ int i, nd, fd;
434
+ size_t j;
435
+ size_t *c, *pos, *idx1, *idx2;
436
+ size_t stride;
437
+ size_t *shape, size;
438
+ stridx_t sdx;
439
+ narray_t *na;
440
+ narray_view_t *na1, *na2;
441
+ volatile VALUE view;
442
+
443
+ GetNArray(self,na);
444
+ nd = na->ndim;
445
+
446
+ if (nd==0) {
447
+ return na_make_view(self);
448
+ }
449
+ if (sd<0 || sd>=nd) {
450
+ rb_bug("na_flaten_dim: start_dim (%d) out of range",sd);
451
+ }
452
+
453
+ // new shape
454
+ shape = ALLOCA_N(size_t,sd+1);
455
+ for (i=0; i<sd; i++) {
456
+ shape[i] = na->shape[i];
457
+ }
458
+ size = 1;
459
+ for (i=sd; i<nd; i++) {
460
+ size *= na->shape[i];
461
+ }
462
+ shape[sd] = size;
463
+
464
+ // new object
465
+ view = na_s_allocate_view(CLASS_OF(self));
466
+ na_copy_flags(self, view);
467
+ GetNArrayView(view, na2);
468
+
469
+ // new stride
470
+ na_setup_shape((narray_t*)na2, sd+1, shape);
471
+ na2->stridx = ALLOC_N(stridx_t,sd+1);
472
+
473
+ switch(na->type) {
474
+ case NARRAY_DATA_T:
475
+ case NARRAY_FILEMAP_T:
476
+ stride = nary_element_stride(self);
477
+ for (i=sd+1; i--; ) {
478
+ //printf("data: i=%d shpae[i]=%ld stride=%ld\n",i,shape[i],stride);
479
+ SDX_SET_STRIDE(na2->stridx[i],stride);
480
+ stride *= shape[i];
481
+ }
482
+ na2->offset = 0;
483
+ na2->data = self;
484
+ break;
485
+ case NARRAY_VIEW_T:
486
+ GetNArrayView(self, na1);
487
+ na2->data = na1->data;
488
+ na2->offset = na1->offset;
489
+ for (i=0; i<sd; i++) {
490
+ if (SDX_IS_INDEX(na1->stridx[i])) {
491
+ idx1 = SDX_GET_INDEX(na1->stridx[i]);
492
+ idx2 = ALLOC_N(size_t, shape[i]);
493
+ for (j=0; j<shape[i]; j++) {
494
+ idx2[j] = idx1[j];
495
+ }
496
+ SDX_SET_INDEX(na2->stridx[i],idx2);
497
+ } else {
498
+ na2->stridx[i] = na1->stridx[i];
499
+ //printf("view: i=%d stridx=%d\n",i,SDX_GET_STRIDE(sdx));
500
+ }
501
+ }
502
+ // flat dimenion == last dimension
503
+ if (RTEST(na_check_ladder(self,sd))) {
504
+ //if (0) {
505
+ na2->stridx[sd] = na1->stridx[nd-1];
506
+ } else {
507
+ // set index
508
+ idx2 = ALLOC_N(size_t, shape[sd]);
509
+ SDX_SET_INDEX(na2->stridx[sd],idx2);
510
+ // init for md-loop
511
+ fd = nd-sd;
512
+ c = ALLOC_N(size_t, fd);
513
+ for (i=0; i<fd; i++) c[i]=0;
514
+ pos = ALLOC_N(size_t, fd+1);
515
+ pos[0] = 0;
516
+ // md-loop
517
+ for (i=j=0;;) {
518
+ for (; i<fd; i++) {
519
+ sdx = na1->stridx[i+sd];
520
+ if (SDX_IS_INDEX(sdx)) {
521
+ pos[i+1] = pos[i] + SDX_GET_INDEX(sdx)[c[i]];
522
+ } else {
523
+ pos[i+1] = pos[i] + SDX_GET_STRIDE(sdx)*c[i];
524
+ }
525
+ }
526
+ idx2[j++] = pos[i];
527
+ for (;;) {
528
+ if (i==0) goto loop_end;
529
+ i--;
530
+ c[i]++;
531
+ if (c[i] < na1->base.shape[i+sd]) break;
532
+ c[i] = 0;
533
+ }
534
+ }
535
+ loop_end:
536
+ xfree(pos);
537
+ xfree(c);
538
+ }
539
+ break;
540
+ }
541
+ return view;
542
+ }
543
+
544
+ VALUE
545
+ na_flatten(VALUE self)
546
+ {
547
+ return na_flatten_dim(self,0);
548
+ }
549
+
550
+ //----------------------------------------------------------------------
551
+
552
+ #define MIN(a,b) (((a)<(b))?(a):(b))
553
+
554
+ /*
555
+ Returns a diagonal view of NArray
556
+ @overload diagonal([offset,axes])
557
+ @param [Integer] offset Diagonal offset from the main diagonal.
558
+ The default is 0. k>0 for diagonals above the main diagonal,
559
+ and k<0 for diagonals below the main diagonal.
560
+ @param [Array] axes Array of axes to be used as the 2-d sub-arrays
561
+ from which the diagonals should be taken. Defaults to last-two
562
+ axes ([-2,-1]).
563
+ @return [Cumo::NArray] diagonal view of NArray.
564
+ @example
565
+ a = Cumo::DFloat.new(4,5).seq
566
+ => Cumo::DFloat#shape=[4,5]
567
+ [[0, 1, 2, 3, 4],
568
+ [5, 6, 7, 8, 9],
569
+ [10, 11, 12, 13, 14],
570
+ [15, 16, 17, 18, 19]]
571
+ b = a.diagonal(1)
572
+ => Cumo::DFloat(view)#shape=[4]
573
+ [1, 7, 13, 19]
574
+ b.store(0)
575
+ a
576
+ => Cumo::DFloat#shape=[4,5]
577
+ [[0, 0, 2, 3, 4],
578
+ [5, 6, 0, 8, 9],
579
+ [10, 11, 12, 0, 14],
580
+ [15, 16, 17, 18, 0]]
581
+ b.store([1,2,3,4])
582
+ a
583
+ => Cumo::DFloat#shape=[4,5]
584
+ [[0, 1, 2, 3, 4],
585
+ [5, 6, 2, 8, 9],
586
+ [10, 11, 12, 3, 14],
587
+ [15, 16, 17, 18, 4]]
588
+ */
589
+ static VALUE
590
+ na_diagonal(int argc, VALUE *argv, VALUE self)
591
+ {
592
+ int i, k, nd;
593
+ size_t j;
594
+ size_t *idx0, *idx1, *diag_idx;
595
+ size_t *shape;
596
+ size_t diag_size;
597
+ ssize_t stride, stride0, stride1;
598
+ narray_t *na;
599
+ narray_view_t *na1, *na2;
600
+ VALUE view;
601
+ VALUE vofs=0, vaxes=0;
602
+ ssize_t kofs;
603
+ size_t k0, k1;
604
+ int ax[2];
605
+
606
+ // check arguments
607
+ if (argc>2) {
608
+ rb_raise(rb_eArgError,"too many arguments (%d for 0..2)",argc);
609
+ }
610
+
611
+ for (i=0; i<argc; i++) {
612
+ switch(TYPE(argv[i])) {
613
+ case T_FIXNUM:
614
+ if (vofs) {
615
+ rb_raise(rb_eArgError,"offset is given twice");
616
+ }
617
+ vofs = argv[i];
618
+ break;
619
+ case T_ARRAY:
620
+ if (vaxes) {
621
+ rb_raise(rb_eArgError,"axes-array is given twice");
622
+ }
623
+ vaxes = argv[i];
624
+ break;
625
+ }
626
+ }
627
+
628
+ if (vofs) {
629
+ kofs = NUM2SSIZET(vofs);
630
+ } else {
631
+ kofs = 0;
632
+ }
633
+
634
+ GetNArray(self,na);
635
+ nd = na->ndim;
636
+ if (nd < 2) {
637
+ rb_raise(nary_eDimensionError,"less than 2-d array");
638
+ }
639
+
640
+ if (vaxes) {
641
+ if (RARRAY_LEN(vaxes) != 2) {
642
+ rb_raise(rb_eArgError,"axes must be 2-element array");
643
+ }
644
+ ax[0] = NUM2INT(RARRAY_AREF(vaxes,0));
645
+ ax[1] = NUM2INT(RARRAY_AREF(vaxes,1));
646
+ if (ax[0]<-nd || ax[0]>=nd || ax[1]<-nd || ax[1]>=nd) {
647
+ rb_raise(rb_eArgError,"axis out of range:[%d,%d]",ax[0],ax[1]);
648
+ }
649
+ if (ax[0]<0) {ax[0] += nd;}
650
+ if (ax[1]<0) {ax[1] += nd;}
651
+ if (ax[0]==ax[1]) {
652
+ rb_raise(rb_eArgError,"same axes:[%d,%d]",ax[0],ax[1]);
653
+ }
654
+ } else {
655
+ ax[0] = nd-2;
656
+ ax[1] = nd-1;
657
+ }
658
+
659
+ // Diagonal offset from the main diagonal.
660
+ if (kofs >= 0) {
661
+ k0 = 0;
662
+ k1 = kofs;
663
+ if (k1 >= na->shape[ax[1]]) {
664
+ rb_raise(rb_eArgError,"invalid diagonal offset(%"SZF"d) for "
665
+ "last dimension size(%"SZF"d)",kofs,na->shape[ax[1]]);
666
+ }
667
+ } else {
668
+ k0 = -kofs;
669
+ k1 = 0;
670
+ if (k0 >= na->shape[ax[0]]) {
671
+ rb_raise(rb_eArgError,"invalid diagonal offset(=%"SZF"d) for "
672
+ "last-1 dimension size(%"SZF"d)",kofs,na->shape[ax[0]]);
673
+ }
674
+ }
675
+
676
+ diag_size = MIN(na->shape[ax[0]]-k0,na->shape[ax[1]]-k1);
677
+
678
+ // new shape
679
+ shape = ALLOCA_N(size_t,nd-1);
680
+ for (i=k=0; i<nd; i++) {
681
+ if (i != ax[0] && i != ax[1]) {
682
+ shape[k++] = na->shape[i];
683
+ }
684
+ }
685
+ shape[k] = diag_size;
686
+
687
+ // new object
688
+ view = na_s_allocate_view(CLASS_OF(self));
689
+ na_copy_flags(self, view);
690
+ GetNArrayView(view, na2);
691
+
692
+ // new stride
693
+ na_setup_shape((narray_t*)na2, nd-1, shape);
694
+ na2->stridx = ALLOC_N(stridx_t, nd-1);
695
+
696
+ switch(na->type) {
697
+ case NARRAY_DATA_T:
698
+ case NARRAY_FILEMAP_T:
699
+ na2->offset = 0;
700
+ na2->data = self;
701
+ stride = stride0 = stride1 = nary_element_stride(self);
702
+ for (i=nd,k=nd-2; i--; ) {
703
+ if (i==ax[1]) {
704
+ stride1 = stride;
705
+ if (kofs > 0) {
706
+ na2->offset = kofs*stride;
707
+ }
708
+ } else if (i==ax[0]) {
709
+ stride0 = stride;
710
+ if (kofs < 0) {
711
+ na2->offset = (-kofs)*stride;
712
+ }
713
+ } else {
714
+ SDX_SET_STRIDE(na2->stridx[--k],stride);
715
+ }
716
+ stride *= na->shape[i];
717
+ }
718
+ SDX_SET_STRIDE(na2->stridx[nd-2],stride0+stride1);
719
+ break;
720
+
721
+ case NARRAY_VIEW_T:
722
+ GetNArrayView(self, na1);
723
+ na2->data = na1->data;
724
+ na2->offset = na1->offset;
725
+ for (i=k=0; i<nd; i++) {
726
+ if (i != ax[0] && i != ax[1]) {
727
+ if (SDX_IS_INDEX(na1->stridx[i])) {
728
+ idx0 = SDX_GET_INDEX(na1->stridx[i]);
729
+ idx1 = ALLOC_N(size_t, na->shape[i]);
730
+ for (j=0; j<na->shape[i]; j++) {
731
+ idx1[j] = idx0[j];
732
+ }
733
+ SDX_SET_INDEX(na2->stridx[k],idx1);
734
+ } else {
735
+ na2->stridx[k] = na1->stridx[i];
736
+ }
737
+ k++;
738
+ }
739
+ }
740
+ if (SDX_IS_INDEX(na1->stridx[ax[0]])) {
741
+ idx0 = SDX_GET_INDEX(na1->stridx[ax[0]]);
742
+ diag_idx = ALLOC_N(size_t, diag_size);
743
+ if (SDX_IS_INDEX(na1->stridx[ax[1]])) {
744
+ idx1 = SDX_GET_INDEX(na1->stridx[ax[1]]);
745
+ for (j=0; j<diag_size; j++) {
746
+ diag_idx[j] = idx0[j+k0] + idx1[j+k1];
747
+ }
748
+ } else {
749
+ stride1 = SDX_GET_STRIDE(na1->stridx[ax[1]]);
750
+ for (j=0; j<diag_size; j++) {
751
+ diag_idx[j] = idx0[j+k0] + stride1*(j+k1);
752
+ }
753
+ }
754
+ SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
755
+ } else {
756
+ stride0 = SDX_GET_STRIDE(na1->stridx[ax[0]]);
757
+ if (SDX_IS_INDEX(na1->stridx[ax[1]])) {
758
+ idx1 = SDX_GET_INDEX(na1->stridx[ax[1]]);
759
+ diag_idx = ALLOC_N(size_t, diag_size);
760
+ for (j=0; j<diag_size; j++) {
761
+ diag_idx[j] = stride0*(j+k0) + idx1[j+k1];
762
+ }
763
+ SDX_SET_INDEX(na2->stridx[nd-2],diag_idx);
764
+ } else {
765
+ stride1 = SDX_GET_STRIDE(na1->stridx[ax[1]]);
766
+ na2->offset += stride0*k0 + stride1*k1;
767
+ SDX_SET_STRIDE(na2->stridx[nd-2],stride0+stride1);
768
+ }
769
+ }
770
+ break;
771
+ }
772
+ return view;
773
+ }
774
+
775
+ //----------------------------------------------------------------------
776
+
777
+
778
+ #if 0
779
+ #ifdef SWAP
780
+ #undef SWAP
781
+ #endif
782
+ #define SWAP(a,b,t) {t=a;a=b;b=t;}
783
+
784
+ static VALUE
785
+ na_new_dimension_for_dot(VALUE self, int pos, int len, bool transpose)
786
+ {
787
+ int i, k, l, nd;
788
+ size_t j;
789
+ size_t *idx1, *idx2;
790
+ size_t *shape;
791
+ ssize_t stride;
792
+ narray_t *na;
793
+ narray_view_t *na1, *na2;
794
+ size_t shape_n;
795
+ stridx_t stridx_n;
796
+ volatile VALUE view;
797
+
798
+ GetNArray(self,na);
799
+ nd = na->ndim;
800
+
801
+ view = na_s_allocate_view(CLASS_OF(self));
802
+
803
+ na_copy_flags(self, view);
804
+ GetNArrayView(view, na2);
805
+
806
+ // new dimension
807
+ if (pos < 0) pos += nd;
808
+ if (pos > nd || pos < 0) {
809
+ rb_raise(rb_eRangeError,"new dimension is out of range");
810
+ }
811
+ nd += len;
812
+ shape = ALLOCA_N(size_t,nd);
813
+ na2->stridx = ALLOC_N(stridx_t,nd);
814
+
815
+ switch(na->type) {
816
+ case NARRAY_DATA_T:
817
+ case NARRAY_FILEMAP_T:
818
+ i = k = 0;
819
+ while (i < nd) {
820
+ if (i == pos && len > 0) {
821
+ for (l=0; l<len; l++) {
822
+ shape[i++] = 1;
823
+ }
824
+ } else {
825
+ shape[i++] = na->shape[k++];
826
+ }
827
+ }
828
+ na_setup_shape((narray_t*)na2, nd, shape);
829
+ stride = nary_element_stride(self);
830
+ for (i=nd; i--;) {
831
+ SDX_SET_STRIDE(na2->stridx[i], stride);
832
+ stride *= shape[i];
833
+ }
834
+ na2->offset = 0;
835
+ na2->data = self;
836
+ break;
837
+ case NARRAY_VIEW_T:
838
+ GetNArrayView(self, na1);
839
+ i = k = 0;
840
+ while (i < nd) {
841
+ if (i == pos && len > 0) {
842
+ if (SDX_IS_INDEX(na1->stridx[k])) {
843
+ stride = SDX_GET_INDEX(na1->stridx[k])[0];
844
+ } else {
845
+ stride = SDX_GET_STRIDE(na1->stridx[k]);
846
+ }
847
+ for (l=0; l<len; l++) {
848
+ shape[i] = 1;
849
+ SDX_SET_STRIDE(na2->stridx[i], stride);
850
+ i++;
851
+ }
852
+ } else {
853
+ shape[i] = na1->base.shape[k];
854
+ if (SDX_IS_INDEX(na1->stridx[k])) {
855
+ idx1 = SDX_GET_INDEX(na1->stridx[k]);
856
+ idx2 = ALLOC_N(size_t,na1->base.shape[k]);
857
+ for (j=0; j<na1->base.shape[k]; j++) {
858
+ idx2[j] = idx1[j];
859
+ }
860
+ SDX_SET_INDEX(na2->stridx[i], idx2);
861
+ } else {
862
+ na2->stridx[i] = na1->stridx[k];
863
+ }
864
+ i++; k++;
865
+ }
866
+ }
867
+ na_setup_shape((narray_t*)na2, nd, shape);
868
+ na2->offset = na1->offset;
869
+ na2->data = na1->data;
870
+ break;
871
+ }
872
+
873
+ if (transpose) {
874
+ SWAP(na2->base.shape[nd-1], na2->base.shape[nd-2], shape_n);
875
+ SWAP(na2->stridx[nd-1], na2->stridx[nd-2], stridx_n);
876
+ }
877
+
878
+ return view;
879
+ }
880
+
881
+
882
+ //----------------------------------------------------------------------
883
+
884
+ /*
885
+ * call-seq:
886
+ * narray.dot(other) => narray
887
+ *
888
+ * Returns dot product.
889
+ *
890
+ */
891
+
892
+ static VALUE
893
+ cumo_na_dot(VALUE self, VALUE other)
894
+ {
895
+ VALUE test;
896
+ volatile VALUE a1=self, a2=other;
897
+ narray_t *na1, *na2;
898
+
899
+ test = rb_funcall(a1, id_respond_to_p, 1, sym_mulsum);
900
+ if (!RTEST(test)) {
901
+ rb_raise(rb_eNoMethodError,"requires mulsum method for dot method");
902
+ }
903
+ GetNArray(a1,na1);
904
+ GetNArray(a2,na2);
905
+ if (na1->ndim==0 || na2->ndim==0) {
906
+ rb_raise(nary_eDimensionError,"zero dimensional narray");
907
+ }
908
+ if (na2->ndim > 1) {
909
+ if (na1->shape[na1->ndim-1] != na2->shape[na2->ndim-2]) {
910
+ rb_raise(nary_eShapeError,"shape mismatch: self.shape[-1](=%"SZF"d) != other.shape[-2](=%"SZF"d)",
911
+ na1->shape[na1->ndim-1], na2->shape[na2->ndim-2]);
912
+ }
913
+ // insert new axis [ ..., last-1-dim, newaxis*other.ndim, last-dim ]
914
+ a1 = na_new_dimension_for_dot(a1, na1->ndim-1, na2->ndim-1, 0);
915
+ // insert & transpose [ newaxis*self.ndim, ..., last-dim, last-1-dim ]
916
+ a2 = na_new_dimension_for_dot(a2, 0, na1->ndim-1, 1);
917
+ }
918
+ return rb_funcall(a1,id_mulsum,2,a2,INT2FIX(-1));
919
+ }
920
+ #endif
921
+
922
+ void
923
+ Init_cumo_nary_data()
924
+ {
925
+ rb_define_method(cNArray, "copy", na_copy, 0); // deprecated
926
+
927
+ rb_define_method(cNArray, "flatten", na_flatten, 0);
928
+ rb_define_method(cNArray, "swapaxes", na_swapaxes, 2);
929
+ rb_define_method(cNArray, "transpose", na_transpose, -1);
930
+
931
+ rb_define_method(cNArray, "reshape", na_reshape,-1);
932
+ rb_define_method(cNArray, "reshape!", na_reshape_bang,-1);
933
+ /*
934
+ rb_define_alias(cNArray, "shape=","reshape!");
935
+ */
936
+ rb_define_method(cNArray, "diagonal", na_diagonal,-1);
937
+
938
+ rb_define_method(cNArray, "swap_byte", nary_swap_byte, 0);
939
+ #ifdef DYNAMIC_ENDIAN
940
+ #else
941
+ #ifdef WORDS_BIGENDIAN
942
+ #else // LITTLE_ENDIAN
943
+ rb_define_alias(cNArray, "hton", "swap_byte");
944
+ rb_define_alias(cNArray, "network_order?", "byte_swapped?");
945
+ rb_define_alias(cNArray, "little_endian?", "host_order?");
946
+ rb_define_alias(cNArray, "vacs_order?", "host_order?");
947
+ #endif
948
+ #endif
949
+ rb_define_method(cNArray, "to_network", nary_to_network, 0);
950
+ rb_define_method(cNArray, "to_vacs", nary_to_vacs, 0);
951
+ rb_define_method(cNArray, "to_host", nary_to_host, 0);
952
+ rb_define_method(cNArray, "to_swapped", nary_to_swapped, 0);
953
+
954
+ //rb_define_method(cNArray, "dot", cumo_na_dot, 1);
955
+
956
+ id_mulsum = rb_intern("mulsum");
957
+ sym_mulsum = ID2SYM(id_mulsum);
958
+ id_respond_to_p = rb_intern("respond_to?");
959
+ id_store = rb_intern("store");
960
+ id_swap_byte = rb_intern("swap_byte");
961
+ }