cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,77 @@
1
+ #ifndef CUMO_TEMPLATE_KERNEL_H
2
+ #define CUMO_TEMPLATE_KERNEL_H
3
+
4
+ #define LOAD_BIT( adr, pos, val ) \
5
+ { \
6
+ size_t dig = (size_t)(pos) / NB; \
7
+ int bit = (size_t)(pos) % NB; \
8
+ val = (((BIT_DIGIT*)(adr))[dig]>>(bit)) & 1u; \
9
+ }
10
+
11
+ #define LOAD_BIT_STEP( adr, pos, step, idx, val ) \
12
+ { \
13
+ size_t dig; int bit; \
14
+ if (idx) { \
15
+ dig = (size_t)((pos) + *(idx)) / NB; \
16
+ bit = (size_t)((pos) + *(idx)) % NB; \
17
+ idx++; \
18
+ } else { \
19
+ dig = (size_t)(pos) / NB; \
20
+ bit = (size_t)(pos) % NB; \
21
+ pos += step; \
22
+ } \
23
+ val = (((BIT_DIGIT*)(adr))[dig]>>bit) & 1u; \
24
+ }
25
+
26
+ #define STORE_BIT(adr,pos,val) \
27
+ { \
28
+ size_t dig = (size_t)(pos) / NB; \
29
+ int bit = (size_t)(pos) % NB; \
30
+ if (val) { \
31
+ atomicOr((BIT_DIGIT*)(adr) + (dig), (val)<<(bit)); \
32
+ } else { \
33
+ atomicAnd((BIT_DIGIT*)(adr) + (dig), ~(1u<<(bit))); \
34
+ } \
35
+ }
36
+ // val -> val&1 ??
37
+
38
+ #define STORE_BIT_STEP( adr, pos, step, idx, val ) \
39
+ { \
40
+ size_t dig; int bit; \
41
+ if (idx) { \
42
+ dig = (size_t)((pos) + *(idx)) / NB; \
43
+ bit = (size_t)((pos) + *(idx)) % NB; \
44
+ idx++; \
45
+ } else { \
46
+ dig = (size_t)(pos) / NB; \
47
+ bit = (size_t)(pos) % NB; \
48
+ pos += step; \
49
+ } \
50
+ if (val) { \
51
+ atomicOr((BIT_DIGIT*)(adr) + (dig), (val)<<(bit)); \
52
+ } else { \
53
+ atomicAnd((BIT_DIGIT*)(adr) + (dig), ~((1u)<<(bit))); \
54
+ } \
55
+ }
56
+ // val -> val&1 ??
57
+
58
+ #define MAX_BLOCK_DIM 128
59
+ #define MAX_GRID_DIM 2147483647 // ref. http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
60
+
61
+ static inline size_t
62
+ get_gridDim(size_t n)
63
+ {
64
+ size_t gridDim = (n / MAX_BLOCK_DIM) + 1;
65
+ if (gridDim > MAX_GRID_DIM) gridDim = MAX_GRID_DIM;
66
+ return gridDim;
67
+ }
68
+
69
+ static inline size_t
70
+ get_blockDim(size_t n)
71
+ {
72
+ size_t blockDim = (n > MAX_BLOCK_DIM) ? MAX_BLOCK_DIM : n;
73
+ return blockDim;
74
+ }
75
+
76
+
77
+ #endif /* ifndef CUMO_TEMPLATE_KERNEL_H */
@@ -0,0 +1,40 @@
1
+ typedef BIT_DIGIT dtype;
2
+ typedef BIT_DIGIT rtype;
3
+ #define cT cumo_cBit
4
+ #define cRT cT
5
+
6
+ #define m_zero 0
7
+ #define m_one 1
8
+
9
+ #define m_abs(x) (x)
10
+ #define m_sign(x) (((x)==0) ? 0:1)
11
+
12
+ #define m_from_double(x) (((x)==0) ? 0 : 1)
13
+ #define m_from_real(x) (((x)==0) ? 0 : 1)
14
+ #define m_from_sint(x) (((x)==0) ? 0 : 1)
15
+ #define m_from_int32(x) (((x)==0) ? 0 : 1)
16
+ #define m_from_int64(x) (((x)==0) ? 0 : 1)
17
+ #define m_from_uint32(x) (((x)==0) ? 0 : 1)
18
+ #define m_from_uint64(x) (((x)==0) ? 0 : 1)
19
+ #define m_data_to_num(x) INT2FIX(x)
20
+ #define m_sprintf(s,x) sprintf(s,"%1d",(int)(x))
21
+
22
+ #define m_copy(x) (x)
23
+ #define m_not(x) (~(x))
24
+ #define m_and(x,y) ((x)&(y))
25
+ #define m_or(x,y) ((x)|(y))
26
+ #define m_xor(x,y) ((x)^(y))
27
+ #define m_eq(x,y) (~((x)^(y)))
28
+ #define m_count_true(x) ((x)!=0)
29
+ #define m_count_true_cpu(x) ((x)!=0)
30
+ #define m_count_false(x) ((x)==0)
31
+ #define m_count_false_cpu(x) ((x)==0)
32
+
33
+ static inline BIT_DIGIT m_num_to_data(VALUE num) {
34
+ if (RTEST(num)) {
35
+ if (!RTEST(rb_equal(num,INT2FIX(0)))) {
36
+ return 1;
37
+ }
38
+ }
39
+ return 0;
40
+ }
@@ -0,0 +1,34 @@
1
+ #ifndef CUMO_BIT_KERNEL_H
2
+ #define CUMO_BIT_KERNEL_H
3
+
4
+ typedef BIT_DIGIT dtype;
5
+ typedef BIT_DIGIT rtype;
6
+
7
+ #define m_zero 0
8
+ #define m_one 1
9
+
10
+ #define m_abs(x) (x)
11
+ #define m_sign(x) (((x)==0) ? 0:1)
12
+
13
+ #define m_from_double(x) (((x)==0) ? 0 : 1)
14
+ #define m_from_real(x) (((x)==0) ? 0 : 1)
15
+ #define m_from_sint(x) (((x)==0) ? 0 : 1)
16
+ #define m_from_int32(x) (((x)==0) ? 0 : 1)
17
+ #define m_from_int64(x) (((x)==0) ? 0 : 1)
18
+ #define m_from_uint32(x) (((x)==0) ? 0 : 1)
19
+ #define m_from_uint64(x) (((x)==0) ? 0 : 1)
20
+ #define m_data_to_num(x) INT2FIX(x)
21
+ #define m_sprintf(s,x) sprintf(s,"%1d",(int)(x))
22
+
23
+ #define m_copy(x) (x)
24
+ #define m_not(x) (~(x))
25
+ #define m_and(x,y) ((x)&(y))
26
+ #define m_or(x,y) ((x)|(y))
27
+ #define m_xor(x,y) ((x)^(y))
28
+ #define m_eq(x,y) (~((x)^(y)))
29
+ #define m_count_true(x) ((x)!=0)
30
+ #define m_count_true_cpu(x) m_count_true(x)
31
+ #define m_count_false(x) ((x)==0)
32
+ #define m_count_false_cpu(x) m_count_false(x)
33
+
34
+ #endif // CUMO_BIT_KERNEL_H
@@ -0,0 +1,402 @@
1
+ static inline dtype c_new(rtype r, rtype i) {
2
+ dtype z;
3
+ REAL(z) = r;
4
+ IMAG(z) = i;
5
+ return z;
6
+ }
7
+
8
+ static inline dtype c_set_real(dtype x, rtype r) {
9
+ REAL(x)=r;
10
+ return x;
11
+ }
12
+
13
+ static inline dtype c_set_imag(dtype x, rtype i) {
14
+ IMAG(x)=i;
15
+ return x;
16
+ }
17
+
18
+ static inline VALUE COMP2NUM(dtype x) {
19
+ VALUE v;
20
+ v = rb_funcall(rb_intern("Kernel"), rb_intern("Complex"), 2,
21
+ rb_float_new(REAL(x)), rb_float_new(IMAG(x)));
22
+ return v;
23
+ }
24
+
25
+ static inline dtype NUM2COMP(VALUE v) {
26
+ dtype z;
27
+ REAL(z) = NUM2DBL(rb_funcall(v,id_real,0));
28
+ IMAG(z) = NUM2DBL(rb_funcall(v,id_imag,0));
29
+ return z;
30
+ }
31
+
32
+ #define c_is_zero(x) (REAL(x)==0 && IMAG(x)==0)
33
+ #define c_eq(x,y) (REAL(x)==REAL(y) && IMAG(x)==IMAG(y))
34
+ #define c_ne(x,y) (REAL(x)!=REAL(y) || IMAG(x)!=IMAG(y))
35
+ #define c_isnan(x) (isnan(REAL(x)) || isnan(IMAG(x)))
36
+ #define c_isinf(x) (isinf(REAL(x)) || isinf(IMAG(x)))
37
+ #define c_isposinf(x) ((isinf(REAL(x)) && signbit(REAL(x))==0) || \
38
+ (isinf(IMAG(x)) && signbit(IMAG(x))==0))
39
+ #define c_isneginf(x) ((isinf(REAL(x)) && signbit(REAL(x))) || \
40
+ (isinf(IMAG(x)) && signbit(IMAG(x))))
41
+ #define c_isfinite(x) (isfinite(REAL(x)) && isfinite(IMAG(x)))
42
+
43
+ static inline dtype c_zero() {
44
+ dtype z;
45
+ REAL(z) = 0;
46
+ IMAG(z) = 0;
47
+ return z;
48
+ }
49
+
50
+ static inline dtype c_one() {
51
+ dtype z;
52
+ REAL(z) = 1;
53
+ IMAG(z) = 0;
54
+ return z;
55
+ }
56
+
57
+ static inline dtype c_minus(dtype x) {
58
+ dtype z;
59
+ REAL(z) = -REAL(x);
60
+ IMAG(z) = -IMAG(x);
61
+ return z;
62
+ }
63
+
64
+ static inline dtype c_im(dtype x) {
65
+ dtype z;
66
+ REAL(z) = -IMAG(x);
67
+ IMAG(z) = REAL(x);
68
+ return z;
69
+ }
70
+
71
+ static inline dtype c_add(dtype x, dtype y) {
72
+ dtype z;
73
+ REAL(z) = REAL(x)+REAL(y);
74
+ IMAG(z) = IMAG(x)+IMAG(y);
75
+ return z;
76
+ }
77
+
78
+ static inline dtype c_sub(dtype x, dtype y) {
79
+ dtype z;
80
+ REAL(z) = REAL(x)-REAL(y);
81
+ IMAG(z) = IMAG(x)-IMAG(y);
82
+ return z;
83
+ }
84
+
85
+
86
+ static inline dtype c_mul(dtype x, dtype y) {
87
+ dtype z;
88
+ REAL(z) = REAL(x)*REAL(y)-IMAG(x)*IMAG(y);
89
+ IMAG(z) = REAL(x)*IMAG(y)+IMAG(x)*REAL(y);
90
+ return z;
91
+ }
92
+
93
+ static inline dtype c_mul_r(dtype x, rtype y) {
94
+ dtype z;
95
+ REAL(z) = REAL(x)*y;
96
+ IMAG(z) = IMAG(x)*y;
97
+ return z;
98
+ }
99
+
100
+ static inline dtype c_div(dtype x, dtype y) {
101
+ dtype z;
102
+ rtype s,yr,yi;
103
+ s = r_hypot(REAL(y),IMAG(y));
104
+ yr = REAL(y)/s;
105
+ yi = IMAG(y)/s;
106
+ REAL(z) = (REAL(x)*yr+IMAG(x)*yi)/s;
107
+ IMAG(z) = (IMAG(x)*yr-REAL(x)*yi)/s;
108
+ return z;
109
+ }
110
+
111
+ static inline dtype c_div_r(dtype x, rtype y) {
112
+ dtype z;
113
+ REAL(z) = REAL(x)/y;
114
+ IMAG(z) = IMAG(x)/y;
115
+ return z;
116
+ }
117
+
118
+ static inline dtype c_reciprocal(dtype x) {
119
+ dtype z;
120
+ if ( r_abs(REAL(x)) > r_abs(IMAG(x)) ) {
121
+ IMAG(z) = IMAG(x)/REAL(x);
122
+ REAL(z) = (1+IMAG(z)*IMAG(z))*REAL(x);
123
+ IMAG(z) /= -REAL(z);
124
+ REAL(z) = 1/REAL(z);
125
+ } else {
126
+ REAL(z) = REAL(x)/IMAG(x);
127
+ IMAG(z) = (1+REAL(z)*REAL(z))*IMAG(x);
128
+ REAL(z) /= IMAG(z);
129
+ IMAG(z) = -1/IMAG(z);
130
+ }
131
+ return z;
132
+ }
133
+
134
+ static inline dtype c_square(dtype x) {
135
+ dtype z;
136
+ REAL(z) = REAL(x)*REAL(x)-IMAG(x)*IMAG(x);
137
+ IMAG(z) = 2*REAL(x)*IMAG(x);
138
+ return z;
139
+ }
140
+
141
+ static inline dtype c_sqrt(dtype x) {
142
+ dtype z;
143
+ rtype xr, xi, r;
144
+ xr = REAL(x)/2;
145
+ xi = IMAG(x)/2;
146
+ r = r_hypot(xr,xi);
147
+ if (xr>0) {
148
+ REAL(z) = sqrt(r+xr);
149
+ IMAG(z) = xi/REAL(z);
150
+ } else if ( (r-=xr)!=0 ) {
151
+ IMAG(z) = (xi>=0) ? sqrt(r):-sqrt(r);
152
+ REAL(z) = xi/IMAG(z);
153
+ } else {
154
+ REAL(z) = IMAG(z) = 0;
155
+ }
156
+ return z;
157
+ }
158
+
159
+ static inline dtype c_log(dtype x) {
160
+ dtype z;
161
+ REAL(z) = r_log(r_hypot(REAL(x),IMAG(x)));
162
+ IMAG(z) = r_atan2(IMAG(x),REAL(x));
163
+ return z;
164
+ }
165
+
166
+ static inline dtype c_log2(dtype x) {
167
+ dtype z;
168
+ z = c_log(x);
169
+ z = c_mul_r(x,M_LOG2E);
170
+ return z;
171
+ }
172
+
173
+ static inline dtype c_log10(dtype x) {
174
+ dtype z;
175
+ z = c_log(x);
176
+ z = c_mul_r(x,M_LOG10E);
177
+ return z;
178
+ }
179
+
180
+ static inline dtype c_exp(dtype x) {
181
+ dtype z;
182
+ rtype a = r_exp(REAL(x));
183
+ REAL(z) = a*r_cos(IMAG(x));
184
+ IMAG(z) = a*r_sin(IMAG(x));
185
+ return z;
186
+ }
187
+
188
+ static inline dtype c_exp2(dtype x) {
189
+ dtype z;
190
+ rtype a = r_exp(REAL(x)*M_LN2);
191
+ REAL(z) = a*r_cos(IMAG(x));
192
+ IMAG(z) = a*r_sin(IMAG(x));
193
+ return z;
194
+ }
195
+
196
+ static inline dtype c_exp10(dtype x) {
197
+ dtype z;
198
+ rtype a = r_exp(REAL(x)*M_LN10);
199
+ REAL(z) = a*r_cos(IMAG(x));
200
+ IMAG(z) = a*r_sin(IMAG(x));
201
+ return z;
202
+ }
203
+
204
+ static inline dtype c_sin(dtype x) {
205
+ dtype z;
206
+ REAL(z) = r_sin(REAL(x))*r_cosh(IMAG(x));
207
+ IMAG(z) = r_cos(REAL(x))*r_sinh(IMAG(x));
208
+ return z;
209
+ }
210
+
211
+ static inline dtype c_sinh(dtype x) {
212
+ dtype z;
213
+ REAL(z) = r_sinh(REAL(x))*r_cos(IMAG(x));
214
+ IMAG(z) = r_cosh(REAL(x))*r_sin(IMAG(x));
215
+ return z;
216
+ }
217
+
218
+ static inline dtype c_cos(dtype x) {
219
+ dtype z;
220
+ REAL(z) = r_cos(REAL(x))*r_cosh(IMAG(x));
221
+ IMAG(z) = -r_sin(REAL(x))*r_sinh(IMAG(x));
222
+ return z;
223
+ }
224
+
225
+ static inline dtype c_cosh(dtype x) {
226
+ dtype z;
227
+ REAL(z) = r_cosh(REAL(x))*r_cos(IMAG(x));
228
+ IMAG(z) = r_sinh(REAL(x))*r_sin(IMAG(x));
229
+ return z;
230
+ }
231
+
232
+ static inline dtype c_tan(dtype x) {
233
+ dtype z;
234
+ rtype c, d;
235
+ if (r_abs(IMAG(x))<1) {
236
+ c = r_cos(REAL(x));
237
+ d = r_sinh(IMAG(x));
238
+ d = c*c + d*d;
239
+ REAL(z) = 0.5*r_sin(2*REAL(x))/d;
240
+ IMAG(z) = 0.5*r_sinh(2*IMAG(x))/d;
241
+ } else {
242
+ d = r_exp(-IMAG(x));
243
+ c = 2*d/(1-d*d);
244
+ c = c*c;
245
+ d = r_cos(REAL(x));
246
+ d = 1.0 + d*d*c;
247
+ REAL(z) = 0.5*r_sin(2*REAL(x))*c/d;
248
+ IMAG(z) = 1/r_tanh(IMAG(x))/d;
249
+ }
250
+ return z;
251
+ }
252
+
253
+ static inline dtype c_tanh(dtype x) {
254
+ dtype z;
255
+ rtype c, d, s;
256
+ c = r_cos(IMAG(x));
257
+ s = r_sinh(REAL(x));
258
+ d = c*c + s*s;
259
+ if (r_abs(REAL(x))<1) {
260
+ REAL(z) = s*r_cosh(REAL(x))/d;
261
+ IMAG(z) = 0.5*r_sin(2*IMAG(x))/d;
262
+ } else {
263
+ c = c / s;
264
+ c = 1 + c*c;
265
+ REAL(z) = 1/(r_tanh(REAL(x))*c);
266
+ IMAG(z) = 0.5*r_sin(2*IMAG(x))/d;
267
+ }
268
+ return z;
269
+ }
270
+
271
+ static inline dtype c_asin(dtype x) {
272
+ dtype z, y;
273
+ y = c_square(x);
274
+ REAL(y) = 1-REAL(y);
275
+ IMAG(y) = -IMAG(y);
276
+ y = c_sqrt(y);
277
+ REAL(y) -= IMAG(x);
278
+ IMAG(y) += REAL(x);
279
+ y = c_log(y);
280
+ REAL(z) = IMAG(y);
281
+ IMAG(z) = -REAL(y);
282
+ return z;
283
+ }
284
+
285
+ static inline dtype c_asinh(dtype x) {
286
+ dtype z, y;
287
+ y = c_square(x);
288
+ REAL(y) += 1;
289
+ y = c_sqrt(y);
290
+ REAL(y) += REAL(x);
291
+ IMAG(y) += IMAG(x);
292
+ z = c_log(y);
293
+ return z;
294
+ }
295
+
296
+ static inline dtype c_acos(dtype x) {
297
+ dtype z, y;
298
+ y = c_square(x);
299
+ REAL(y) = 1-REAL(y);
300
+ IMAG(y) = -IMAG(y);
301
+ y = c_sqrt(y);
302
+ REAL(z) = REAL(x)-IMAG(y);
303
+ IMAG(z) = IMAG(x)+REAL(y);
304
+ y = c_log(z);
305
+ REAL(z) = IMAG(y);
306
+ IMAG(z) = -REAL(y);
307
+ return z;
308
+ }
309
+
310
+ static inline dtype c_acosh(dtype x) {
311
+ dtype z, y;
312
+ y = c_square(x);
313
+ REAL(y) -= 1;
314
+ y = c_sqrt(y);
315
+ REAL(y) += REAL(x);
316
+ IMAG(y) += IMAG(x);
317
+ z = c_log(y);
318
+ return z;
319
+ }
320
+
321
+ static inline dtype c_atan(dtype x) {
322
+ dtype z, y;
323
+ REAL(y) = -REAL(x);
324
+ IMAG(y) = 1-IMAG(x);
325
+ REAL(z) = REAL(x);
326
+ IMAG(z) = 1+IMAG(x);
327
+ y = c_div(z,y);
328
+ y = c_log(y);
329
+ REAL(z) = -IMAG(y)/2;
330
+ IMAG(z) = REAL(y)/2;
331
+ return z;
332
+ }
333
+
334
+ static inline dtype c_atanh(dtype x) {
335
+ dtype z, y;
336
+ REAL(y) = 1-REAL(x);
337
+ IMAG(y) = -IMAG(x);
338
+ REAL(z) = 1+REAL(x);
339
+ IMAG(z) = IMAG(x);
340
+ y = c_div(z,y);
341
+ y = c_log(y);
342
+ REAL(z) = REAL(y)/2;
343
+ IMAG(z) = IMAG(y)/2;
344
+ return z;
345
+ }
346
+
347
+ static inline dtype c_pow(dtype x, dtype y)
348
+ {
349
+ dtype z;
350
+ if (c_is_zero(y)) {
351
+ z = c_one();
352
+ } else if (c_is_zero(x) && REAL(y)>0 && IMAG(y)==0) {
353
+ z = c_zero();
354
+ } else {
355
+ z = c_log(x);
356
+ z = c_mul(y,z);
357
+ z = c_exp(z);
358
+ }
359
+ return z;
360
+ }
361
+
362
+ static inline dtype c_pow_int(dtype x, int p)
363
+ {
364
+ dtype z = c_one();
365
+ if (p<0) {
366
+ x = c_pow_int(x,-p);
367
+ return c_reciprocal(x);
368
+ }
369
+ if (p==2) {return c_square(x);}
370
+ if (p&1) {z = x;}
371
+ p >>= 1;
372
+ while (p) {
373
+ x = c_square(x);
374
+ if (p&1) z = c_mul(z,x);
375
+ p >>= 1;
376
+ }
377
+ return z;
378
+ }
379
+
380
+ static inline dtype c_cbrt(dtype x) {
381
+ dtype z;
382
+ z = c_log(x);
383
+ z = c_div_r(z,3);
384
+ z = c_exp(z);
385
+ return z;
386
+ }
387
+
388
+ static inline rtype c_abs(dtype x) {
389
+ return r_hypot(REAL(x),IMAG(x));
390
+ }
391
+
392
+ static inline rtype c_abs_square(dtype x) {
393
+ return REAL(x)*REAL(x)+IMAG(x)*IMAG(x);
394
+ }
395
+
396
+
397
+
398
+ /*
399
+ static inline rtype c_hypot(dtype x, dtype y) {
400
+ return r_hypot(c_abs(x),c_abs(y));
401
+ }
402
+ */