cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,38 @@
1
+ #ifndef CUMO_INTERN_NARRAY_H
2
+ #define CUMO_INTERN_NARRAY_H
3
+
4
+ /* Add cumo_ prefix to avoid C symbol collisions with Numo without modifying C implementations */
5
+
6
+ #define na_debug_flag cumo_na_debug_flag
7
+ extern int cumo_na_debug_flag;
8
+
9
+ #define mCumo rb_mCumo
10
+ extern VALUE rb_mCumo;
11
+ #define cNArray cumo_cNArray
12
+ extern VALUE cumo_cNArray;
13
+ #define nary_eCastError cumo_nary_eCastError
14
+ extern VALUE cumo_nary_eCastError;
15
+ #define nary_eShapeError cumo_nary_eShapeError
16
+ extern VALUE cumo_nary_eShapeError;
17
+ #define nary_eOperationError cumo_nary_eOperationError
18
+ extern VALUE cumo_nary_eOperationError;
19
+ #define nary_eDimensionError cumo_nary_eDimensionError
20
+ extern VALUE cumo_nary_eDimensionError;
21
+ #define nary_eValueError cumo_nary_eValueError
22
+ extern VALUE cumo_nary_eValueError;
23
+ #define na_data_type cumo_na_data_type
24
+ extern const rb_data_type_t cumo_na_data_type;
25
+
26
+ #define na_cStep cumo_na_cStep
27
+ extern VALUE cumo_na_cStep;
28
+
29
+ #define sym_reduce cumo_sym_reduce
30
+ extern VALUE cumo_sym_reduce;
31
+ #define sym_option cumo_sym_option
32
+ extern VALUE cumo_sym_option;
33
+ #define sym_loop_opt cumo_sym_loop_opt
34
+ extern VALUE cumo_sym_loop_opt;
35
+ #define sym_init cumo_sym_init
36
+ extern VALUE cumo_sym_init;
37
+
38
+ #endif /* CUMO_INTERN_NARRAY_H */
@@ -0,0 +1,6 @@
1
+ #ifndef CUMO_INTERN_KERNEL_H
2
+ #define CUMO_INTERN_KERNEL_H
3
+
4
+ void cumo_debug_breakpoint(void);
5
+
6
+ #endif /* ifndef CUMO_INTERN_KERNEL_H */
@@ -0,0 +1,429 @@
1
+ #ifndef CUMO_NARRAY_H
2
+ #define CUMO_NARRAY_H
3
+
4
+ #if defined(__cplusplus)
5
+ extern "C" {
6
+ #if 0
7
+ } /* satisfy cc-mode */
8
+ #endif
9
+ #endif
10
+
11
+ #include <math.h>
12
+ #include "cumo/compat.h"
13
+ #include "cumo/template.h"
14
+ #include "cumo/extconf.h"
15
+ #include "cumo/intern_fwd.h"
16
+
17
+ #ifdef HAVE_STDBOOL_H
18
+ # include <stdbool.h>
19
+ #endif
20
+
21
+ #ifdef HAVE_STDINT_H
22
+ # include <stdint.h>
23
+ #endif
24
+
25
+ #ifdef HAVE_SYS_TYPES_H
26
+ # include <sys/types.h>
27
+ #endif
28
+
29
+ #ifndef HAVE_U_INT8_T
30
+ # ifdef HAVE_UINT8_T
31
+ typedef uint8_t u_int8_t;
32
+ # endif
33
+ #endif
34
+
35
+ #ifndef HAVE_U_INT16_T
36
+ # ifdef HAVE_UINT16_T
37
+ typedef uint16_t u_int16_t;
38
+ # endif
39
+ #endif
40
+
41
+ #ifndef HAVE_U_INT32_T
42
+ # ifdef HAVE_UINT32_T
43
+ typedef uint32_t u_int32_t;
44
+ # endif
45
+ #endif
46
+
47
+ #ifndef HAVE_U_INT64_T
48
+ # ifdef HAVE_UINT64_T
49
+ typedef uint64_t u_int64_t;
50
+ # endif
51
+ #endif
52
+
53
+ #define SZF PRI_SIZE_PREFIX // defined in ruby.h
54
+
55
+ #if SIZEOF_LONG==8
56
+ # define NUM2INT64(x) NUM2LONG(x)
57
+ # define INT642NUM(x) LONG2NUM(x)
58
+ # define NUM2UINT64(x) NUM2ULONG(x)
59
+ # define UINT642NUM(x) ULONG2NUM(x)
60
+ # ifndef PRId64
61
+ # define PRId64 "ld"
62
+ # endif
63
+ # ifndef PRIu64
64
+ # define PRIu64 "lu"
65
+ # endif
66
+ #elif SIZEOF_LONG_LONG==8
67
+ # define NUM2INT64(x) NUM2LL(x)
68
+ # define INT642NUM(x) LL2NUM(x)
69
+ # define NUM2UINT64(x) NUM2ULL(x)
70
+ # define UINT642NUM(x) ULL2NUM(x)
71
+ # ifndef PRId64
72
+ # define PRId64 "lld"
73
+ # endif
74
+ # ifndef PRIu64
75
+ # define PRIu64 "llu"
76
+ # endif
77
+ #endif
78
+
79
+ #if SIZEOF_LONG==4
80
+ # define NUM2INT32(x) NUM2LONG(x)
81
+ # define INT322NUM(x) LONG2NUM(x)
82
+ # define NUM2UINT32(x) NUM2ULONG(x)
83
+ # define UINT322NUM(x) ULONG2NUM(x)
84
+ # ifndef PRId32
85
+ # define PRId32 "ld"
86
+ # endif
87
+ # ifndef PRIu32
88
+ # define PRIu32 "lu"
89
+ # endif
90
+ #elif SIZEOF_INT==4
91
+ # define NUM2INT32(x) NUM2INT(x)
92
+ # define INT322NUM(x) INT2NUM(x)
93
+ # define NUM2UINT32(x) NUM2UINT(x)
94
+ # define UINT322NUM(x) UINT2NUM(x)
95
+ # ifndef PRId32
96
+ # define PRId32 "d"
97
+ # endif
98
+ # ifndef PRIu32
99
+ # define PRIu32 "u"
100
+ # endif
101
+ #endif
102
+
103
+ #if SIZEOF_VALUE > 4
104
+ # undef INT322NUM
105
+ # undef UINT322NUM
106
+ # define INT322NUM(x) INT2FIX(x)
107
+ # define UINT322NUM(x) INT2FIX(x)
108
+ #endif
109
+
110
+ #ifndef HAVE_TYPE_BOOL
111
+ typedef int bool;
112
+ #endif
113
+ #ifndef FALSE /* in case these macros already exist */
114
+ # define FALSE 0 /* values of bool */
115
+ #endif
116
+ #ifndef TRUE
117
+ # define TRUE 1
118
+ #endif
119
+
120
+ typedef struct { float dat[2]; } scomplex;
121
+ typedef struct { double dat[2]; } dcomplex;
122
+ typedef int fortran_integer;
123
+
124
+ #define REAL(x) ((x).dat[0])
125
+ #define IMAG(x) ((x).dat[1])
126
+
127
+ extern int na_debug_flag;
128
+
129
+ #ifndef CUMO_NARRAY_C
130
+ extern VALUE cumo_cNArray;
131
+ extern VALUE rb_mCumo;
132
+ extern VALUE nary_eCastError;
133
+ extern VALUE nary_eShapeError;
134
+ extern VALUE nary_eOperationError;
135
+ extern VALUE nary_eDimensionError;
136
+ extern VALUE nary_eValueError;
137
+ extern const rb_data_type_t na_data_type;
138
+
139
+ //EXTERN const int na_sizeof[NA_NTYPES+1];
140
+ #endif
141
+
142
+ //#define na_upcast(x,y) cumo_na_upcast(x,y)
143
+
144
+ /* global variables within this module */
145
+ extern VALUE cumo_cBit;
146
+ extern VALUE cumo_cDFloat;
147
+ extern VALUE cumo_cSFloat;
148
+ extern VALUE cumo_cDComplex;
149
+ extern VALUE cumo_cSComplex;
150
+ extern VALUE cumo_cInt64;
151
+ extern VALUE cumo_cInt32;
152
+ extern VALUE cumo_cInt16;
153
+ extern VALUE cumo_cInt8;
154
+ extern VALUE cumo_cUInt64;
155
+ extern VALUE cumo_cUInt32;
156
+ extern VALUE cumo_cUInt16;
157
+ extern VALUE cumo_cUInt8;
158
+ extern VALUE cumo_cRObject;
159
+ extern VALUE na_cStep;
160
+ #ifndef HAVE_RB_CCOMPLEX
161
+ extern VALUE rb_cComplex;
162
+ #endif
163
+
164
+ extern VALUE sym_reduce;
165
+ extern VALUE sym_option;
166
+ extern VALUE sym_loop_opt;
167
+ extern VALUE sym_init;
168
+
169
+ #define NARRAY_DATA_T 0x1
170
+ #define NARRAY_VIEW_T 0x2
171
+ #define NARRAY_FILEMAP_T 0x3
172
+
173
+ typedef struct RNArray {
174
+ unsigned char ndim; // # of dimensions
175
+ unsigned char type;
176
+ unsigned char flag[2]; // flags
177
+ unsigned short elmsz; // element size
178
+ size_t size; // # of total elements
179
+ size_t *shape; // # of elements for each dimension
180
+ VALUE reduce;
181
+ } narray_t;
182
+
183
+
184
+ typedef struct RNArrayData {
185
+ narray_t base;
186
+ char *ptr;
187
+ } narray_data_t;
188
+
189
+
190
+ typedef union {
191
+ ssize_t stride;
192
+ size_t *index;
193
+ } stridx_t;
194
+
195
+ typedef struct RNArrayView {
196
+ narray_t base;
197
+ VALUE data; // data object
198
+ size_t offset; // offset of start point from data pointer
199
+ // :in units of elm.unit_bits
200
+ // address_unit pointer_unit access_unit data_unit
201
+ // elm.step_unit = elm.bit_size / elm.access_unit
202
+ // elm.step_unit = elm.size_bits / elm.unit_bits
203
+ stridx_t *stridx; // stride or indices of data pointer for each dimension
204
+ } narray_view_t;
205
+
206
+
207
+ // filemap is unimplemented
208
+ typedef struct RNArrayFileMap {
209
+ narray_t base;
210
+ char *ptr;
211
+ #ifdef WIN32
212
+ HANDLE hFile;
213
+ HANDLE hMap;
214
+ #else // POSIX mmap
215
+ int prot;
216
+ int flag;
217
+ #endif
218
+ } narray_filemap_t;
219
+
220
+
221
+ // this will be revised in future.
222
+ typedef struct {
223
+ unsigned int element_bits;
224
+ unsigned int element_bytes;
225
+ unsigned int element_stride;
226
+ } narray_type_info_t;
227
+
228
+
229
+ static inline narray_t *
230
+ na_get_narray_t(VALUE obj)
231
+ {
232
+ narray_t *na;
233
+
234
+ Check_TypedStruct(obj,&na_data_type);
235
+ na = (narray_t*)DATA_PTR(obj);
236
+ return na;
237
+ }
238
+
239
+ static inline narray_t *
240
+ _na_get_narray_t(VALUE obj, unsigned char na_type)
241
+ {
242
+ narray_t *na;
243
+
244
+ Check_TypedStruct(obj,&na_data_type);
245
+ na = (narray_t*)DATA_PTR(obj);
246
+ if (na->type != na_type) {
247
+ rb_bug("unknown type 0x%x (0x%x given)", na_type, na->type);
248
+ }
249
+ return na;
250
+ }
251
+
252
+ #define na_get_narray_data_t(obj) (narray_data_t*)_na_get_narray_t(obj,NARRAY_DATA_T)
253
+ #define na_get_narray_view_t(obj) (narray_view_t*)_na_get_narray_t(obj,NARRAY_VIEW_T)
254
+ #define na_get_narray_filemap_t(obj) (narray_filemap_t*)_na_get_narray_t(obj,NARRAY_FILEMAP_T)
255
+
256
+ #define GetNArray(obj,var) TypedData_Get_Struct(obj, narray_t, &na_data_type, var)
257
+ #define GetNArrayView(obj,var) TypedData_Get_Struct(obj, narray_view_t, &na_data_type, var)
258
+ #define GetNArrayData(obj,var) TypedData_Get_Struct(obj, narray_data_t, &na_data_type, var)
259
+
260
+ #define SDX_IS_STRIDE(x) ((x).stride&0x1)
261
+ #define SDX_IS_INDEX(x) (!SDX_IS_STRIDE(x))
262
+ #define SDX_GET_STRIDE(x) ((x).stride>>1)
263
+ #define SDX_GET_INDEX(x) ((x).index)
264
+
265
+ #define SDX_SET_STRIDE(x,s) ((x).stride=((s)<<1)|0x1)
266
+ #define SDX_SET_INDEX(x,idx) ((x).index=idx)
267
+
268
+ #define RNARRAY(val) ((narray_t*)DATA_PTR(val))
269
+ #define RNARRAY_DATA(val) ((narray_data_t*)DATA_PTR(val))
270
+ #define RNARRAY_VIEW(val) ((narray_view_t*)DATA_PTR(val))
271
+ #define RNARRAY_FILEMAP(val) ((narray_filemap_t*)DATA_PTR(val))
272
+
273
+ #define RNARRAY_NDIM(val) (RNARRAY(val)->ndim)
274
+ #define RNARRAY_TYPE(val) (RNARRAY(val)->type)
275
+ #define RNARRAY_FLAG(val) (RNARRAY(val)->flag)
276
+ #define RNARRAY_SIZE(val) (RNARRAY(val)->size)
277
+ #define RNARRAY_SHAPE(val) (RNARRAY(val)->shape)
278
+ #define RNARRAY_REDUCE(val) (RNARRAY(val)->reduce)
279
+
280
+ #define RNARRAY_DATA_PTR(val) (RNARRAY_DATA(val)->ptr)
281
+ #define RNARRAY_VIEW_DATA(val) (RNARRAY_VIEW(val)->data)
282
+ #define RNARRAY_VIEW_OFFSET(val) (RNARRAY_VIEW(val)->offset)
283
+ #define RNARRAY_VIEW_STRIDX(val) (RNARRAY_VIEW(val)->stridx)
284
+
285
+ #define NA_NDIM(na) (((narray_t*)na)->ndim)
286
+ #define NA_TYPE(na) (((narray_t*)na)->type)
287
+ #define NA_SIZE(na) (((narray_t*)na)->size)
288
+ #define NA_SHAPE(na) (((narray_t*)na)->shape)
289
+ #define NA_REDUCE(na) (((narray_t*)na)->reduce)
290
+
291
+ #define NA_FLAG(obj) (na_get_narray_t(obj)->flag)
292
+ #define NA_FLAG0(obj) (NA_FLAG(obj)[0])
293
+ #define NA_FLAG1(obj) (NA_FLAG(obj)[1])
294
+
295
+ #define NA_DATA(na) ((narray_data_t*)(na))
296
+ #define NA_VIEW(na) ((narray_view_t*)(na))
297
+ #define NA_DATA_PTR(na) (NA_DATA(na)->ptr)
298
+ #define NA_VIEW_DATA(na) (NA_VIEW(na)->data)
299
+ #define NA_VIEW_OFFSET(na) (NA_VIEW(na)->offset)
300
+ #define NA_VIEW_STRIDX(na) (NA_VIEW(na)->stridx)
301
+
302
+ #define NA_IS_INDEX_AT(na,i) (SDX_IS_INDEX(NA_VIEW_STRIDX(na)[i]))
303
+ #define NA_IS_STRIDE_AT(na,i) (SDX_IS_STRIDE(NA_VIEW_STRIDX(na)[i]))
304
+ #define NA_INDEX_AT(na,i) (SDX_GET_INDEX(NA_VIEW_STRIDX(na)[i]))
305
+ #define NA_STRIDE_AT(na,i) (SDX_GET_STRIDE(NA_VIEW_STRIDX(na)[i]))
306
+
307
+ #define NA_FILEMAP_PTR(na) (((narray_filemap_t*)na)->ptr)
308
+
309
+
310
+ #define NA_FL0_TEST(x,f) (NA_FLAG0(x)&(f))
311
+ #define NA_FL1_TEST(x,f) (NA_FLAG1(x)&(f))
312
+
313
+ #define NA_FL0_SET(x,f) do {NA_FLAG0(x) |= (f);} while(0)
314
+ #define NA_FL1_SET(x,f) do {NA_FLAG1(x) |= (f);} while(0)
315
+
316
+ #define NA_FL0_UNSET(x,f) do {NA_FLAG0(x) &= ~(f);} while(0)
317
+ #define NA_FL1_UNSET(x,f) do {NA_FLAG1(x) &= ~(f);} while(0)
318
+
319
+ #define NA_FL0_REVERSE(x,f) do {NA_FLAG0(x) ^= (f);} while(0)
320
+ #define NA_FL1_REVERSE(x,f) do {NA_FLAG1(x) ^= (f);} while(0)
321
+
322
+
323
+ /* FLAGS
324
+ - row-major / column-major
325
+ - Overwrite or not
326
+ - byteswapp
327
+ - Extensible?
328
+ - matrix or not
329
+ */
330
+
331
+ #define NA_FL0_BIG_ENDIAN (0x1<<0)
332
+ #define NA_FL0_COLUMN_MAJOR (0x1<<1)
333
+ #define NA_FL1_LOCK (0x1<<0)
334
+ #define NA_FL1_INPLACE (0x1<<1)
335
+
336
+ #define TEST_COLUMN_MAJOR(x) NA_FL0_TEST(x,NA_FL0_COLUMN_MAJOR)
337
+ #define SET_COLUMN_MAJOR(x) NA_FL0_SET(x,NA_FL0_COLUMN_MAJOR)
338
+ #define UNSET_COLUMN_MAJOR(x) NA_FL0_UNSET(x,NA_FL0_COLUMN_MAJOR)
339
+
340
+ #define TEST_ROW_MAJOR(x) (!TEST_COLUMN_MAJOR(x))
341
+ #define SET_ROW_MAJOR(x) UNSET_COLUMN_MAJOR(x)
342
+ #define UNSET_ROW_MAJOR(x) SET_COLUMN_MAJOR(x)
343
+
344
+ #define TEST_BIG_ENDIAN(x) NA_FL0_TEST(x,NA_FL0_BIG_ENDIAN)
345
+ #define SET_BIG_ENDIAN(x) NA_FL0_SET(x,NA_FL0_BIG_ENDIAN)
346
+ #define UNSET_BIG_ENDIAN(x) NA_FL0_UNSET(x,NA_FL0_BIG_ENDIAN)
347
+
348
+ #define TEST_LITTLE_ENDIAN(x) (!TEST_BIG_ENDIAN(x))
349
+ #define SET_LITTLE_ENDIAN(x) UNSET_BIG_ENDIAN(x)
350
+ #define UNSET_LITTLE_ENDIAN(x) SET_BIG_ENDIAN(x)
351
+
352
+ #define REVERSE_ENDIAN(x) NA_FL0_REVERSE((x),NA_FL0_BIG_ENDIAN)
353
+
354
+ #define TEST_LOCK(x) NA_FL1_TEST(x,NA_FL1_LOCK)
355
+ #define SET_LOCK(x) NA_FL1_SET(x,NA_FL1_LOCK)
356
+ #define UNSET_LOCK(x) NA_FL1_UNSET(x,NA_FL1_LOCK)
357
+
358
+ #define TEST_INPLACE(x) NA_FL1_TEST(x,NA_FL1_INPLACE)
359
+ #define SET_INPLACE(x) NA_FL1_SET(x,NA_FL1_INPLACE)
360
+ #define UNSET_INPLACE(x) NA_FL1_UNSET(x,NA_FL1_INPLACE)
361
+
362
+ #ifdef DYNAMIC_ENDIAN
363
+ // not supported
364
+ #else
365
+ #ifdef WORDS_BIGENDIAN
366
+ #define TEST_HOST_ORDER(x) TEST_BIG_ENDIAN(x)
367
+ #define SET_HOST_ORDER(x) SET_BIG_ENDIAN(x)
368
+ #define UNSET_HOST_ORDER(x) UNSET_BIG_ENDIAN(x)
369
+ #define TEST_BYTE_SWAPPED(x) TEST_LITTLE_ENDIAN(x)
370
+ #define SET_BYTE_SWAPPED(x) SET_LITTLE_ENDIAN(x)
371
+ #define UNSET_BYTE_SWAPPED(x) UNSET_LITTLE_ENDIAN(x)
372
+ #define NA_FL0_INIT NA_FL0_BIG_ENDIAN
373
+ #else // LITTLE ENDIAN
374
+ #define TEST_HOST_ORDER(x) TEST_LITTLE_ENDIAN(x)
375
+ #define SET_HOST_ORDER(x) SET_LITTLE_ENDIAN(x)
376
+ #define UNSET_HOST_ORDER(x) UNSET_LITTLE_ENDIAN(x)
377
+ #define TEST_BYTE_SWAPPED(x) TEST_BIG_ENDIAN(x)
378
+ #define SET_BYTE_SWAPPED(x) SET_BIG_ENDIAN(x)
379
+ #define UNSET_BYTE_SWAPPED(x) UNSET_BIG_ENDIAN(x)
380
+ #define NA_FL0_INIT 0
381
+ #endif
382
+ #endif
383
+ #define NA_FL1_INIT 0
384
+
385
+
386
+ #define IsNArray(obj) (rb_obj_is_kind_of(obj,cNArray)==Qtrue)
387
+
388
+ #define DEBUG_PRINT(v) puts(StringValueCStr(rb_funcall(v,rb_intern("inspect"),0)))
389
+
390
+ #define NA_IsNArray(obj) \
391
+ (rb_obj_is_kind_of(obj,cNArray)==Qtrue)
392
+ #define NA_IsArray(obj) \
393
+ (TYPE(obj)==T_ARRAY || rb_obj_is_kind_of(obj,cNArray)==Qtrue)
394
+
395
+ #define NUM2REAL(v) NUM2DBL( rb_funcall((v),na_id_real,0) )
396
+ #define NUM2IMAG(v) NUM2DBL( rb_funcall((v),na_id_imag,0) )
397
+
398
+ //#define NA_MAX_DIMENSION (int)(sizeof(VALUE)*8-2)
399
+ #define NA_MAX_DIMENSION 12
400
+ #define NA_MAX_ELMSZ 65535
401
+
402
+ typedef unsigned int BIT_DIGIT;
403
+ #define BYTE_BIT_DIGIT sizeof(BIT_DIGIT)
404
+ #define NB (sizeof(BIT_DIGIT)*8)
405
+ #define BALL (~(BIT_DIGIT)0)
406
+ #define SLB(n) (((n)==NB)?~(BIT_DIGIT)0:(~(~(BIT_DIGIT)0<<(n))))
407
+
408
+ #define ELEMENT_BIT_SIZE "ELEMENT_BIT_SIZE"
409
+ #define ELEMENT_BYTE_SIZE "ELEMENT_BYTE_SIZE"
410
+ #define CONTIGUOUS_STRIDE "CONTIGUOUS_STRIDE"
411
+
412
+
413
+ #ifdef RUBY_INTEGER_UNIFICATION
414
+ #define IS_INTEGER_CLASS(c) ((c)==rb_cInteger)
415
+ #else
416
+ #define IS_INTEGER_CLASS(c) ((c)==rb_cFixnum||(c)==rb_cBignum)
417
+ #endif
418
+
419
+ #include "cumo/ndloop.h"
420
+ #include "cumo/intern.h"
421
+
422
+ #if defined(__cplusplus)
423
+ #if 0
424
+ { /* satisfy cc-mode */
425
+ #endif
426
+ } /* extern "C" { */
427
+ #endif
428
+
429
+ #endif /* ifndef CUMO_NARRAY_H */