cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.travis.yml +5 -0
  4. data/3rd_party/mkmf-cu/.gitignore +36 -0
  5. data/3rd_party/mkmf-cu/Gemfile +3 -0
  6. data/3rd_party/mkmf-cu/LICENSE +21 -0
  7. data/3rd_party/mkmf-cu/README.md +36 -0
  8. data/3rd_party/mkmf-cu/Rakefile +11 -0
  9. data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
  10. data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
  11. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
  12. data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
  13. data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
  14. data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/Gemfile +8 -0
  17. data/LICENSE.txt +82 -0
  18. data/README.md +252 -0
  19. data/Rakefile +43 -0
  20. data/bench/broadcast_fp32.rb +138 -0
  21. data/bench/cumo_bench.rb +193 -0
  22. data/bench/numo_bench.rb +138 -0
  23. data/bench/reduction_fp32.rb +117 -0
  24. data/bin/console +14 -0
  25. data/bin/setup +8 -0
  26. data/cumo.gemspec +32 -0
  27. data/ext/cumo/cuda/cublas.c +278 -0
  28. data/ext/cumo/cuda/driver.c +421 -0
  29. data/ext/cumo/cuda/memory_pool.cpp +185 -0
  30. data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
  31. data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
  32. data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
  33. data/ext/cumo/cuda/nvrtc.c +207 -0
  34. data/ext/cumo/cuda/runtime.c +167 -0
  35. data/ext/cumo/cumo.c +148 -0
  36. data/ext/cumo/depend.erb +58 -0
  37. data/ext/cumo/extconf.rb +179 -0
  38. data/ext/cumo/include/cumo.h +25 -0
  39. data/ext/cumo/include/cumo/compat.h +23 -0
  40. data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
  41. data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
  42. data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
  43. data/ext/cumo/include/cumo/cuda/driver.h +22 -0
  44. data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
  45. data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
  46. data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
  47. data/ext/cumo/include/cumo/indexer.h +238 -0
  48. data/ext/cumo/include/cumo/intern.h +142 -0
  49. data/ext/cumo/include/cumo/intern_fwd.h +38 -0
  50. data/ext/cumo/include/cumo/intern_kernel.h +6 -0
  51. data/ext/cumo/include/cumo/narray.h +429 -0
  52. data/ext/cumo/include/cumo/narray_kernel.h +149 -0
  53. data/ext/cumo/include/cumo/ndloop.h +95 -0
  54. data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
  55. data/ext/cumo/include/cumo/template.h +158 -0
  56. data/ext/cumo/include/cumo/template_kernel.h +77 -0
  57. data/ext/cumo/include/cumo/types/bit.h +40 -0
  58. data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
  59. data/ext/cumo/include/cumo/types/complex.h +402 -0
  60. data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
  61. data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
  62. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
  63. data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
  64. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
  65. data/ext/cumo/include/cumo/types/dfloat.h +47 -0
  66. data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
  67. data/ext/cumo/include/cumo/types/float_def.h +34 -0
  68. data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
  69. data/ext/cumo/include/cumo/types/float_macro.h +191 -0
  70. data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
  71. data/ext/cumo/include/cumo/types/int16.h +24 -0
  72. data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
  73. data/ext/cumo/include/cumo/types/int32.h +24 -0
  74. data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
  75. data/ext/cumo/include/cumo/types/int64.h +24 -0
  76. data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
  77. data/ext/cumo/include/cumo/types/int8.h +24 -0
  78. data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
  79. data/ext/cumo/include/cumo/types/int_macro.h +67 -0
  80. data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
  81. data/ext/cumo/include/cumo/types/real_accum.h +486 -0
  82. data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
  83. data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
  84. data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
  85. data/ext/cumo/include/cumo/types/robject.h +27 -0
  86. data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
  87. data/ext/cumo/include/cumo/types/scomplex.h +46 -0
  88. data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
  89. data/ext/cumo/include/cumo/types/sfloat.h +48 -0
  90. data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
  91. data/ext/cumo/include/cumo/types/uint16.h +25 -0
  92. data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
  93. data/ext/cumo/include/cumo/types/uint32.h +25 -0
  94. data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
  95. data/ext/cumo/include/cumo/types/uint64.h +25 -0
  96. data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
  97. data/ext/cumo/include/cumo/types/uint8.h +25 -0
  98. data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
  99. data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
  100. data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
  101. data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
  102. data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
  103. data/ext/cumo/narray/SFMT-params.h +97 -0
  104. data/ext/cumo/narray/SFMT-params19937.h +46 -0
  105. data/ext/cumo/narray/SFMT.c +620 -0
  106. data/ext/cumo/narray/SFMT.h +167 -0
  107. data/ext/cumo/narray/array.c +638 -0
  108. data/ext/cumo/narray/data.c +961 -0
  109. data/ext/cumo/narray/gen/cogen.rb +56 -0
  110. data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
  111. data/ext/cumo/narray/gen/def/bit.rb +37 -0
  112. data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
  113. data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
  114. data/ext/cumo/narray/gen/def/int16.rb +36 -0
  115. data/ext/cumo/narray/gen/def/int32.rb +36 -0
  116. data/ext/cumo/narray/gen/def/int64.rb +36 -0
  117. data/ext/cumo/narray/gen/def/int8.rb +36 -0
  118. data/ext/cumo/narray/gen/def/robject.rb +37 -0
  119. data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
  120. data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
  121. data/ext/cumo/narray/gen/def/uint16.rb +36 -0
  122. data/ext/cumo/narray/gen/def/uint32.rb +36 -0
  123. data/ext/cumo/narray/gen/def/uint64.rb +36 -0
  124. data/ext/cumo/narray/gen/def/uint8.rb +36 -0
  125. data/ext/cumo/narray/gen/erbpp2.rb +346 -0
  126. data/ext/cumo/narray/gen/narray_def.rb +268 -0
  127. data/ext/cumo/narray/gen/spec.rb +425 -0
  128. data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
  129. data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
  130. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
  131. data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
  132. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
  133. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
  134. data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
  135. data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
  136. data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
  137. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
  138. data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
  139. data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
  140. data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
  141. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
  142. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
  143. data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
  144. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
  145. data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
  146. data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
  147. data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
  148. data/ext/cumo/narray/gen/tmpl/class.c +9 -0
  149. data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
  150. data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
  151. data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
  152. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
  153. data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
  154. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
  155. data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
  156. data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
  157. data/ext/cumo/narray/gen/tmpl/each.c +47 -0
  158. data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
  159. data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
  160. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
  161. data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
  162. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
  163. data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
  164. data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
  165. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
  166. data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
  167. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
  168. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
  169. data/ext/cumo/narray/gen/tmpl/format.c +62 -0
  170. data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
  171. data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
  172. data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
  173. data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
  174. data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
  175. data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
  176. data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
  177. data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
  178. data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
  179. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
  180. data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
  181. data/ext/cumo/narray/gen/tmpl/median.c +66 -0
  182. data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
  183. data/ext/cumo/narray/gen/tmpl/module.c +9 -0
  184. data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
  185. data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
  186. data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
  187. data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
  188. data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
  189. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
  190. data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
  191. data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
  192. data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
  193. data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
  194. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
  195. data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
  196. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
  197. data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
  198. data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
  199. data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
  200. data/ext/cumo/narray/gen/tmpl/store.c +41 -0
  201. data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
  202. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
  203. data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
  204. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
  205. data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
  206. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
  207. data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
  208. data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
  209. data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
  210. data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
  211. data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
  212. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
  213. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
  214. data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
  215. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
  216. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
  217. data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
  218. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
  219. data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
  220. data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
  221. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
  222. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
  223. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
  224. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
  225. data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
  226. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
  227. data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
  228. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
  229. data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
  230. data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
  231. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
  232. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
  233. data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
  234. data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
  235. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
  236. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
  237. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
  238. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
  239. data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
  240. data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
  241. data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
  242. data/ext/cumo/narray/index.c +880 -0
  243. data/ext/cumo/narray/kwargs.c +153 -0
  244. data/ext/cumo/narray/math.c +142 -0
  245. data/ext/cumo/narray/narray.c +1948 -0
  246. data/ext/cumo/narray/ndloop.c +2105 -0
  247. data/ext/cumo/narray/rand.c +45 -0
  248. data/ext/cumo/narray/step.c +474 -0
  249. data/ext/cumo/narray/struct.c +886 -0
  250. data/lib/cumo.rb +3 -0
  251. data/lib/cumo/cuda.rb +11 -0
  252. data/lib/cumo/cuda/compile_error.rb +36 -0
  253. data/lib/cumo/cuda/compiler.rb +161 -0
  254. data/lib/cumo/cuda/device.rb +47 -0
  255. data/lib/cumo/cuda/link_state.rb +31 -0
  256. data/lib/cumo/cuda/module.rb +40 -0
  257. data/lib/cumo/cuda/nvrtc_program.rb +27 -0
  258. data/lib/cumo/linalg.rb +12 -0
  259. data/lib/cumo/narray.rb +2 -0
  260. data/lib/cumo/narray/extra.rb +1278 -0
  261. data/lib/erbpp.rb +294 -0
  262. data/lib/erbpp/line_number.rb +137 -0
  263. data/lib/erbpp/narray_def.rb +381 -0
  264. data/numo-narray-version +1 -0
  265. data/run.gdb +7 -0
  266. metadata +353 -0
@@ -0,0 +1,207 @@
1
+ #include <ruby.h>
2
+ #include <ruby/thread.h>
3
+ #include <assert.h>
4
+ #include <nvrtc.h>
5
+ #include "cumo/cuda/nvrtc.h"
6
+
7
+ VALUE cumo_cuda_eNVRTCError;
8
+ VALUE cumo_cuda_mNVRTC;
9
+ #define eNVRTCError cumo_cuda_eNVRTCError
10
+ #define mNVRTC cumo_cuda_mNVRTC
11
+
12
+ static void
13
+ check_status(nvrtcResult status)
14
+ {
15
+ if (status != 0) {
16
+ rb_raise(cumo_cuda_eNVRTCError, "%s (error=%d)", nvrtcGetErrorString(status), status);
17
+ }
18
+ }
19
+
20
+ static VALUE
21
+ rb_nvrtcVersion(VALUE self)
22
+ {
23
+ int _major, _minor;
24
+ nvrtcResult status;
25
+ VALUE major, minor;
26
+
27
+ status = nvrtcVersion(&_major, &_minor);
28
+
29
+ check_status(status);
30
+ major = INT2NUM(_major);
31
+ minor = INT2NUM(_minor);
32
+ return rb_ary_new3(2, major, minor);
33
+ }
34
+
35
+ struct nvrtcCreateProgramParam {
36
+ nvrtcProgram *prog;
37
+ const char* src;
38
+ const char *name;
39
+ int numHeaders;
40
+ const char** headers;
41
+ const char** includeNames;
42
+ };
43
+
44
+ static void*
45
+ nvrtcCreateProgram_without_gvl_cb(void *param)
46
+ {
47
+ struct nvrtcCreateProgramParam *p = param;
48
+ nvrtcResult status;
49
+ status = nvrtcCreateProgram(p->prog, p->src, p->name, p->numHeaders, p->headers, p->includeNames);
50
+ return (void *)status;
51
+ }
52
+
53
+ static VALUE
54
+ rb_nvrtcCreateProgram(
55
+ VALUE self,
56
+ VALUE src,
57
+ VALUE name,
58
+ VALUE headers,
59
+ VALUE includeNames)
60
+ {
61
+ nvrtcResult status;
62
+ nvrtcProgram _prog;
63
+ const char* _src = StringValueCStr(src);
64
+ const char* _name = StringValueCStr(name);
65
+ int _numHeaders = RARRAY_LEN(headers);
66
+ const char** _headers = (const char **)malloc(_numHeaders * sizeof(char *));
67
+ const char** _includeNames = (const char **)malloc(_numHeaders * sizeof(char *));
68
+ int i;
69
+ for (i = 0; i < _numHeaders; i++) {
70
+ VALUE header = RARRAY_PTR(headers)[i];
71
+ _headers[i] = StringValueCStr(header);
72
+ }
73
+ for (i = 0; i < _numHeaders; i++) {
74
+ VALUE include_name = RARRAY_PTR(includeNames)[i];
75
+ _includeNames[i] = StringValueCStr(include_name);
76
+ }
77
+
78
+ {
79
+ struct nvrtcCreateProgramParam param = {&_prog, _src, _name, _numHeaders, _headers, _includeNames};
80
+ status = (nvrtcResult)rb_thread_call_without_gvl(nvrtcCreateProgram_without_gvl_cb, &param, NULL, NULL);
81
+ }
82
+
83
+ free(_headers);
84
+ free(_includeNames);
85
+ check_status(status);
86
+ return SIZET2NUM((size_t)_prog);
87
+ }
88
+
89
+ struct nvrtcDestroyProgramParam {
90
+ nvrtcProgram *prog;
91
+ };
92
+
93
+ static void*
94
+ nvrtcDestroyProgram_without_gvl_cb(void *param)
95
+ {
96
+ struct nvrtcDestroyProgramParam *p = param;
97
+ nvrtcResult status;
98
+ status = nvrtcDestroyProgram(p->prog);
99
+ return (void *)status;
100
+ }
101
+
102
+ static VALUE
103
+ rb_nvrtcDestroyProgram(VALUE self, VALUE prog)
104
+ {
105
+ nvrtcResult status;
106
+ nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
107
+
108
+ struct nvrtcDestroyProgramParam param = {&_prog};
109
+ status = (nvrtcResult)rb_thread_call_without_gvl(nvrtcDestroyProgram_without_gvl_cb, &param, NULL, NULL);
110
+
111
+ check_status(status);
112
+ return Qnil;
113
+ }
114
+
115
+ struct nvrtcCompileProgramParam {
116
+ nvrtcProgram prog;
117
+ int numOptions;
118
+ const char** options;
119
+ };
120
+
121
+ static void*
122
+ nvrtcCompileProgram_without_gvl_cb(void *param)
123
+ {
124
+ struct nvrtcCompileProgramParam *p = param;
125
+ nvrtcResult status;
126
+ status = nvrtcCompileProgram(p->prog, p->numOptions, p->options);
127
+ return (void *)status;
128
+ }
129
+
130
+ static VALUE
131
+ rb_nvrtcCompileProgram(VALUE self, VALUE prog, VALUE options)
132
+ {
133
+ nvrtcResult status;
134
+ nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
135
+ int _numOptions = RARRAY_LEN(options);
136
+ const char** _options = (const char **)malloc(_numOptions * sizeof(char *));
137
+ int i;
138
+ for (i = 0; i < _numOptions; i++) {
139
+ VALUE option = RARRAY_PTR(options)[i];
140
+ _options[i] = StringValueCStr(option);
141
+ }
142
+
143
+ {
144
+ struct nvrtcCompileProgramParam param = {_prog, _numOptions, _options};
145
+ status = (nvrtcResult)rb_thread_call_without_gvl(nvrtcCompileProgram_without_gvl_cb, &param, NULL, NULL);
146
+ }
147
+
148
+ free(_options);
149
+ check_status(status);
150
+ return Qnil;
151
+ }
152
+
153
+ static VALUE
154
+ rb_nvrtcGetPTX(VALUE self, VALUE prog)
155
+ {
156
+ nvrtcResult status;
157
+ nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
158
+ size_t _ptxSizeRet;
159
+ char *_ptx;
160
+ VALUE ptx;
161
+
162
+ status = nvrtcGetPTXSize(_prog, &_ptxSizeRet);
163
+ check_status(status);
164
+
165
+ ptx = rb_str_new(NULL, _ptxSizeRet);
166
+ _ptx = RSTRING_PTR(ptx);
167
+ status = nvrtcGetPTX(_prog, _ptx);
168
+ check_status(status);
169
+
170
+ return ptx;
171
+ }
172
+
173
+ static VALUE
174
+ rb_nvrtcGetProgramLog(VALUE self, VALUE prog)
175
+ {
176
+ nvrtcResult status;
177
+ nvrtcProgram _prog = (nvrtcProgram)NUM2SIZET(prog);
178
+ size_t _logSizeRet;
179
+ char *_log;
180
+ VALUE log;
181
+
182
+ status = nvrtcGetProgramLogSize(_prog, &_logSizeRet);
183
+ check_status(status);
184
+
185
+ log = rb_str_new(NULL, _logSizeRet);
186
+ _log = RSTRING_PTR(log);
187
+ status = nvrtcGetProgramLog(_prog, _log);
188
+ check_status(status);
189
+
190
+ return log;
191
+ }
192
+
193
+ void
194
+ Init_cumo_cuda_nvrtc()
195
+ {
196
+ VALUE mCumo = rb_define_module("Cumo");
197
+ VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
198
+ mNVRTC = rb_define_module_under(mCUDA, "NVRTC");
199
+ eNVRTCError = rb_define_class_under(mCUDA, "NVRTCError", rb_eStandardError);
200
+
201
+ rb_define_singleton_method(mNVRTC, "nvrtcVersion", rb_nvrtcVersion, 0);
202
+ rb_define_singleton_method(mNVRTC, "nvrtcCreateProgram", rb_nvrtcCreateProgram, 4);
203
+ rb_define_singleton_method(mNVRTC, "nvrtcDestroyProgram", rb_nvrtcDestroyProgram, 1);
204
+ rb_define_singleton_method(mNVRTC, "nvrtcCompileProgram", rb_nvrtcCompileProgram, 2);
205
+ rb_define_singleton_method(mNVRTC, "nvrtcGetPTX", rb_nvrtcGetPTX, 1);
206
+ rb_define_singleton_method(mNVRTC, "nvrtcGetProgramLog", rb_nvrtcGetProgramLog, 1);
207
+ }
@@ -0,0 +1,167 @@
1
+ #include <ruby.h>
2
+ #include <assert.h>
3
+ #include <cuda_runtime.h>
4
+ #include "cumo/cuda/runtime.h"
5
+
6
+ VALUE cumo_cuda_eRuntimeError;
7
+ VALUE cumo_cuda_mRuntime;
8
+ #define eRuntimeError cumo_cuda_eRuntimeError
9
+ #define mRuntime cumo_cuda_mRuntime
10
+
11
+ #define check_status(status) (cumo_cuda_runtime_check_status((status)))
12
+
13
+ ///////////////////////////////////////////
14
+ // Version Management
15
+ ///////////////////////////////////////////
16
+
17
+ /*
18
+ Returns the CUDA driver version.
19
+
20
+ @return [Integer] Returns the CUDA driver version.
21
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION_1g8a06ee14a0551606b7c780084d5564ab
22
+ */
23
+ static VALUE
24
+ rb_cudaDriverGetVersion(VALUE self)
25
+ {
26
+ int _version;
27
+ cudaError_t status;
28
+
29
+ status = cudaDriverGetVersion(&_version);
30
+
31
+ check_status(status);
32
+ return INT2NUM(_version);
33
+ }
34
+
35
+ /*
36
+ Returns the CUDA Runtime version.
37
+
38
+ @return [Integer] Returns the CUDA Runtime version.
39
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION_1g0e3952c7802fd730432180f1f4a6cdc6
40
+ */
41
+ static VALUE
42
+ rb_cudaRuntimeGetVersion(VALUE self)
43
+ {
44
+ int _version;
45
+ cudaError_t status;
46
+
47
+ status = cudaRuntimeGetVersion(&_version);
48
+
49
+ check_status(status);
50
+ return INT2NUM(_version);
51
+ }
52
+
53
+ /////////////////////////////////////////
54
+ // Device and context operations
55
+ /////////////////////////////////////////
56
+
57
+ /*
58
+ Returns which device is currently being used.
59
+
60
+ @return [Integer] Returns the device on which the active host thread executes the device code.
61
+ @raise [Cumo::CUDA::RuntimeError]
62
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g80861db2ce7c29b6e8055af8ae01bc78
63
+ */
64
+ static VALUE
65
+ rb_cudaGetDevice(VALUE self)
66
+ {
67
+ int _device;
68
+ cudaError_t status;
69
+
70
+ status = cudaGetDevice(&_device);
71
+
72
+ check_status(status);
73
+ return INT2NUM(_device);
74
+ }
75
+
76
+ /*
77
+ Returns information about the device.
78
+
79
+ @param [Integer] attrib Device attribute to query
80
+ @param [Integer] device Device number to query
81
+ @return [Integer] Returned device attribute value
82
+ @raise [Cumo::CUDA::RuntimeError]
83
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1gb22e8256592b836df9a9cc36c9db7151
84
+ */
85
+ static VALUE
86
+ rb_cudaDeviceGetAttributes(VALUE self, VALUE attrib, VALUE device)
87
+ {
88
+ int _attrib = NUM2INT(attrib);
89
+ int _device = NUM2INT(device);
90
+ int _ret;
91
+ cudaError_t status;
92
+
93
+ status = cudaDeviceGetAttribute(&_ret, _attrib, _device);
94
+
95
+ check_status(status);
96
+ return INT2NUM(_ret);
97
+ }
98
+
99
+ /*
100
+ Returns the number of compute-capable devices.
101
+
102
+ @return [Integer] Returns the number of devices with compute capability greater or equal to 2.0
103
+ @raise [Cumo::CUDA::RuntimeError]
104
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g18808e54893cfcaafefeab31a73cc55f
105
+ */
106
+ static VALUE
107
+ rb_cudaGetDeviceCount(VALUE self)
108
+ {
109
+ int _count;
110
+ cudaError_t status;
111
+
112
+ status = cudaGetDeviceCount(&_count);
113
+
114
+ check_status(status);
115
+ return INT2NUM(_count);
116
+ }
117
+
118
+ /*
119
+ Set device to be used for GPU executions.
120
+
121
+ @param [Integer] device Device on which the active host thread should execute the device code.
122
+ @raise [Cumo::CUDA::RuntimeError]
123
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g159587909ffa0791bbe4b40187a4c6bb
124
+ */
125
+ static VALUE
126
+ rb_cudaSetDevice(VALUE self, VALUE device)
127
+ {
128
+ int _device = NUM2INT(device);
129
+ cudaError_t status;
130
+
131
+ status = cudaSetDevice(_device);
132
+
133
+ check_status(status);
134
+ return Qnil;
135
+ }
136
+
137
+ /*
138
+ Wait for compute device to finish.
139
+
140
+ @raise [Cumo::CUDA::RuntimeError]
141
+ @see http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g10e20b05a95f638a4071a655503df25d
142
+ */
143
+ static VALUE
144
+ rb_cudaDeviceSynchronize(VALUE self)
145
+ {
146
+ cudaError_t status;
147
+ status = cudaDeviceSynchronize();
148
+ check_status(status);
149
+ return Qnil;
150
+ }
151
+
152
+ void
153
+ Init_cumo_cuda_runtime()
154
+ {
155
+ VALUE mCumo = rb_define_module("Cumo");
156
+ VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
157
+ mRuntime = rb_define_module_under(mCUDA, "Runtime");
158
+ eRuntimeError = rb_define_class_under(mCUDA, "RuntimeError", rb_eStandardError);
159
+
160
+ rb_define_singleton_method(mRuntime, "cudaDriverGetVersion", rb_cudaDriverGetVersion, 0);
161
+ rb_define_singleton_method(mRuntime, "cudaRuntimeGetVersion", rb_cudaRuntimeGetVersion, 0);
162
+ rb_define_singleton_method(mRuntime, "cudaGetDevice", rb_cudaGetDevice, 0);
163
+ rb_define_singleton_method(mRuntime, "cudaDeviceGetAttributes", rb_cudaDeviceGetAttributes, 2);
164
+ rb_define_singleton_method(mRuntime, "cudaGetDeviceCount", rb_cudaGetDeviceCount, 0);
165
+ rb_define_singleton_method(mRuntime, "cudaSetDevice", rb_cudaSetDevice, 1);
166
+ rb_define_singleton_method(mRuntime, "cudaDeviceSynchronize", rb_cudaDeviceSynchronize, 0);
167
+ }
@@ -0,0 +1,148 @@
1
+ #define CUMO_C
2
+ #include <ruby.h>
3
+ #include <assert.h>
4
+ #include <stdlib.h>
5
+ #include "cumo.h"
6
+ #include "cumo/narray.h"
7
+
8
+ void Init_cumo();
9
+ void Init_cumo_narray();
10
+ void Init_cumo_nary_data();
11
+ void Init_cumo_nary_ndloop();
12
+ void Init_cumo_nary_step();
13
+ void Init_cumo_nary_index();
14
+ void Init_cumo_bit();
15
+ void Init_cumo_int8();
16
+ void Init_cumo_int16();
17
+ void Init_cumo_int32();
18
+ void Init_cumo_int64();
19
+ void Init_cumo_uint8();
20
+ void Init_cumo_uint16();
21
+ void Init_cumo_uint32();
22
+ void Init_cumo_uint64();
23
+ void Init_cumo_sfloat();
24
+ void Init_cumo_scomplex();
25
+ void Init_cumo_dfloat();
26
+ void Init_cumo_dcomplex();
27
+ void Init_cumo_robject();
28
+ void Init_cumo_nary_math();
29
+ void Init_cumo_nary_rand();
30
+ void Init_cumo_nary_array();
31
+ void Init_cumo_nary_struct();
32
+ void Init_cumo_cuda_driver();
33
+ void Init_cumo_cuda_memory_pool();
34
+ void Init_cumo_cuda_runtime();
35
+ void Init_cumo_cuda_nvrtc();
36
+
37
+ void
38
+ cumo_debug_breakpoint(void)
39
+ {
40
+ /* */
41
+ }
42
+
43
+ static bool cumo_compatible_mode_enabled;
44
+
45
+ bool cumo_compatible_mode_enabled_p()
46
+ {
47
+ return cumo_compatible_mode_enabled;
48
+ }
49
+
50
+ /*
51
+ Enable Numo NArray compatible mode.
52
+
53
+ Cumo returns 0-dimensional NArray instead of ruby numeric object
54
+ for some methods such as `extract`, and `[]` not to synchronize
55
+ between CPU and GPU for performance as default.
56
+
57
+ Enabling the compatible mode makes Cumo behave as Numo. But, please
58
+ note that it makes Cumo slow.
59
+
60
+ @return [Boolean] Returns previous state (true if enabled)
61
+ */
62
+ static VALUE
63
+ rb_enable_compatible_mode(VALUE self)
64
+ {
65
+ VALUE ret = (cumo_compatible_mode_enabled ? Qtrue : Qfalse);
66
+ cumo_compatible_mode_enabled = true;
67
+ return ret;
68
+ }
69
+
70
+ /*
71
+ Disable Numo NArray compatible mode.
72
+
73
+ @return [Boolean] Returns previous state (true if enabled)
74
+ */
75
+ static VALUE
76
+ rb_disable_compatible_mode(VALUE self)
77
+ {
78
+ VALUE ret = (cumo_compatible_mode_enabled ? Qtrue : Qfalse);
79
+ cumo_compatible_mode_enabled = false;
80
+ return ret;
81
+ }
82
+
83
+ /*
84
+ Returns whether Numo NArray compatible mode is enabled or not.
85
+
86
+ @return [Boolean] Returns the state (true if enabled)
87
+ */
88
+ static VALUE
89
+ rb_compatible_mode_enabled_p(VALUE self)
90
+ {
91
+ return (cumo_compatible_mode_enabled ? Qtrue : Qfalse);
92
+ }
93
+
94
+ /* initialization of Cumo Module */
95
+ void
96
+ Init_cumo()
97
+ {
98
+ const char* env;
99
+ VALUE mCumo;
100
+
101
+ mCumo = rb_define_module("Cumo");
102
+
103
+ rb_define_const(mCumo, "VERSION", rb_str_new2(CUMO_VERSION));
104
+
105
+ rb_define_singleton_method(mCumo, "enable_compatible_mode", RUBY_METHOD_FUNC(rb_enable_compatible_mode), 0);
106
+ rb_define_singleton_method(mCumo, "disable_compatible_mode", RUBY_METHOD_FUNC(rb_disable_compatible_mode), 0);
107
+ rb_define_singleton_method(mCumo, "compatible_mode_enabled?", RUBY_METHOD_FUNC(rb_compatible_mode_enabled_p), 0);
108
+
109
+ // default is false
110
+ env = getenv("CUMO_COMPATIBLE_MODE");
111
+ cumo_compatible_mode_enabled = (env != NULL && strcmp(env, "OFF") != 0 && strcmp(env, "0") != 0 && strcmp(env, "NO") != 0);
112
+
113
+ Init_cumo_narray();
114
+
115
+ Init_cumo_nary_step();
116
+ Init_cumo_nary_index();
117
+
118
+ Init_cumo_nary_data();
119
+ Init_cumo_nary_ndloop();
120
+
121
+ Init_cumo_dcomplex();
122
+ Init_cumo_dfloat();
123
+ Init_cumo_scomplex();
124
+ Init_cumo_sfloat();
125
+
126
+ Init_cumo_int64();
127
+ Init_cumo_uint64();
128
+ Init_cumo_int32();
129
+ Init_cumo_uint32();
130
+ Init_cumo_int16();
131
+ Init_cumo_uint16();
132
+ Init_cumo_int8();
133
+ Init_cumo_uint8();
134
+
135
+ Init_cumo_bit();
136
+ Init_cumo_robject();
137
+
138
+ Init_cumo_nary_math();
139
+
140
+ Init_cumo_nary_rand();
141
+ Init_cumo_nary_array();
142
+ Init_cumo_nary_struct();
143
+
144
+ Init_cumo_cuda_driver();
145
+ Init_cumo_cuda_memory_pool();
146
+ Init_cumo_cuda_runtime();
147
+ Init_cumo_cuda_nvrtc();
148
+ }