cumo 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/3rd_party/LICENSE.txt +60 -0
  4. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
  5. data/LICENSE.txt +1 -62
  6. data/README.md +33 -29
  7. data/bench/cumo_bench.rb +47 -25
  8. data/bench/numo_bench.rb +27 -25
  9. data/docs/src-tree.md +16 -0
  10. data/ext/cumo/cuda/cublas.c +69 -219
  11. data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
  12. data/ext/cumo/cuda/runtime.c +2 -14
  13. data/ext/cumo/cumo.c +16 -16
  14. data/ext/cumo/include/cumo.h +2 -2
  15. data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
  16. data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
  17. data/ext/cumo/include/cumo/indexer.h +46 -63
  18. data/ext/cumo/include/cumo/intern.h +58 -112
  19. data/ext/cumo/include/cumo/narray.h +214 -185
  20. data/ext/cumo/include/cumo/narray_kernel.h +66 -37
  21. data/ext/cumo/include/cumo/ndloop.h +42 -42
  22. data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
  23. data/ext/cumo/include/cumo/template.h +56 -51
  24. data/ext/cumo/include/cumo/template_kernel.h +31 -31
  25. data/ext/cumo/include/cumo/types/bit.h +3 -3
  26. data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
  27. data/ext/cumo/include/cumo/types/complex.h +126 -126
  28. data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
  29. data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
  30. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
  31. data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
  32. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
  33. data/ext/cumo/include/cumo/types/int_macro.h +1 -1
  34. data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
  35. data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
  36. data/ext/cumo/include/cumo/types/scomplex.h +5 -5
  37. data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
  38. data/ext/cumo/narray/array.c +143 -143
  39. data/ext/cumo/narray/data.c +184 -184
  40. data/ext/cumo/narray/gen/cogen.rb +5 -2
  41. data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
  42. data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
  43. data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
  44. data/ext/cumo/narray/gen/erbln.rb +132 -0
  45. data/ext/cumo/narray/gen/erbpp2.rb +18 -13
  46. data/ext/cumo/narray/gen/narray_def.rb +3 -3
  47. data/ext/cumo/narray/gen/spec.rb +2 -2
  48. data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
  49. data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
  50. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
  51. data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
  52. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
  53. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
  54. data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
  55. data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
  56. data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
  57. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
  58. data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
  59. data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
  60. data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
  61. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
  62. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
  63. data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
  64. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
  65. data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
  66. data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
  67. data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
  68. data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
  69. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
  70. data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
  71. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
  72. data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
  73. data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
  74. data/ext/cumo/narray/gen/tmpl/each.c +9 -9
  75. data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
  76. data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
  77. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
  78. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
  79. data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
  80. data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
  81. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
  82. data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
  83. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
  84. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
  85. data/ext/cumo/narray/gen/tmpl/format.c +11 -11
  86. data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
  87. data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
  88. data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
  89. data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
  91. data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
  92. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
  93. data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
  94. data/ext/cumo/narray/gen/tmpl/median.c +10 -10
  95. data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
  96. data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
  97. data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
  98. data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
  99. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
  100. data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
  101. data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
  102. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
  103. data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
  104. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
  105. data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
  106. data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
  107. data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
  108. data/ext/cumo/narray/gen/tmpl/store.c +6 -6
  109. data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
  110. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
  111. data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
  112. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
  113. data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
  114. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
  115. data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
  116. data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
  117. data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
  118. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
  119. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
  120. data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
  121. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
  122. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
  123. data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
  124. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
  125. data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
  126. data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
  127. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
  128. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
  129. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
  130. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
  131. data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
  132. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
  133. data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
  134. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
  135. data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
  136. data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
  137. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
  138. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
  139. data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
  140. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
  141. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
  142. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
  143. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
  144. data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
  145. data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
  146. data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
  147. data/ext/cumo/narray/index.c +213 -213
  148. data/ext/cumo/narray/math.c +27 -27
  149. data/ext/cumo/narray/narray.c +484 -484
  150. data/ext/cumo/narray/ndloop.c +259 -258
  151. data/ext/cumo/narray/rand.c +3 -3
  152. data/ext/cumo/narray/step.c +70 -70
  153. data/ext/cumo/narray/struct.c +139 -139
  154. metadata +6 -7
  155. data/ext/cumo/include/cumo/intern_fwd.h +0 -38
  156. data/lib/erbpp.rb +0 -294
  157. data/lib/erbpp/line_number.rb +0 -137
  158. data/lib/erbpp/narray_def.rb +0 -381
@@ -8,8 +8,9 @@ a = Numo::Float32.new(10).seq(1)
8
8
  b = Numo::Float32.new(10).seq(10,10)
9
9
  c = a + b
10
10
 
11
- def elementwise
12
- puts 'element-wise'
11
+ def elementwise(num = nil)
12
+ num ||= NUM
13
+ puts "elementwise(#{num})"
13
14
  Benchmark.bm do |r|
14
15
  a = Numo::Float32.new(10000).seq(1)
15
16
  b = Numo::Float32.new(10000).seq(10,10)
@@ -43,8 +44,9 @@ def elementwise
43
44
  end
44
45
  end
45
46
 
46
- def reduction
47
- puts 'reduction'
47
+ def reduction(num = nil)
48
+ num ||= NUM
49
+ puts "reduction(#{num})"
48
50
  Benchmark.bm do |r|
49
51
  a = Numo::Float32.new(10000).seq(1)
50
52
  r.report('10**4') do
@@ -73,9 +75,9 @@ def reduction
73
75
  end
74
76
  end
75
77
 
76
- def dot
77
- num = 3
78
- puts 'dot'
78
+ def dot(num = nil)
79
+ num ||= 1
80
+ puts "dot(#{num})"
79
81
  Benchmark.bm do |r|
80
82
  a = Numo::Float32.new(100,100).seq(1)
81
83
  b = Numo::Float32.new(100,100).seq(10,10)
@@ -115,24 +117,24 @@ dot
115
117
 
116
118
  # Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
117
119
  #
118
- # element-wise
120
+ # elementwise(100)
119
121
  # user system total real
120
- # 10**4 0.010000 0.000000 0.010000 ( 0.002212)
121
- # 10**5 0.000000 0.020000 0.020000 ( 0.021604)
122
- # 10**6 0.060000 0.060000 0.120000 ( 0.120241)
123
- # 10**7 0.980000 0.890000 1.870000 ( 1.874592)
124
- # 10**8 9.530000 8.520000 18.050000 ( 18.054087)
125
- # reduction
122
+ # 10**4 0.010000 0.000000 0.010000 ( 0.002368)
123
+ # 10**5 0.000000 0.020000 0.020000 ( 0.024129)
124
+ # 10**6 0.080000 0.050000 0.130000 ( 0.139918)
125
+ # 10**7 1.230000 1.020000 2.250000 ( 2.251331)
126
+ # 10**8 10.090000 8.560000 18.650000 ( 18.646369)
127
+ # reduction(100)
126
128
  # user system total real
127
- # 10**4 0.000000 0.000000 0.000000 ( 0.001313)
128
- # 10**5 0.010000 0.000000 0.010000 ( 0.011400)
129
- # 10**6 0.110000 0.000000 0.110000 ( 0.111674)
130
- # 10**7 1.120000 0.000000 1.120000 ( 1.127018)
131
- # 10**8 11.770000 0.010000 11.780000 ( 11.770858)
132
- # dot
129
+ # 10**4 0.000000 0.000000 0.000000 ( 0.001360)
130
+ # 10**5 0.020000 0.000000 0.020000 ( 0.011455)
131
+ # 10**6 0.110000 0.000000 0.110000 ( 0.111708)
132
+ # 10**7 1.130000 0.000000 1.130000 ( 1.137357)
133
+ # 10**8 11.830000 0.000000 11.830000 ( 11.832832)
134
+ # dot(1)
133
135
  # user system total real
134
- # 10**4 0.000000 0.000000 0.000000 ( 0.003935)
135
- # 10**5 0.040000 0.000000 0.040000 ( 0.037682)
136
- # 10**6 0.380000 0.000000 0.380000 ( 0.377312)
137
- # 10**7 3.790000 0.000000 3.790000 ( 3.792297)
138
- # 10**8 38.820000 0.000000 38.820000 ( 38.816987)
136
+ # 10**4 0.010000 0.000000 0.010000 ( 0.001390)
137
+ # 10**5 0.010000 0.000000 0.010000 ( 0.012563)
138
+ # 10**6 0.120000 0.010000 0.130000 ( 0.125406)
139
+ # 10**7 1.270000 0.000000 1.270000 ( 1.272804)
140
+ # 10**8 13.000000 0.000000 13.000000 ( 12.990586)
@@ -0,0 +1,16 @@
1
+ # Source code organizations
2
+
3
+ * `*_kernel.{h,cuh,cu}` files are for device (CUDA kernels).
4
+ * .cu files are compiled via nvcc.
5
+ * .cu files define C wrapper functions to launch CUDA kernels to enable to be called from .c files.
6
+ * Technically, it is not possible to use CRuby API such as `VALUE` in .cu files.
7
+ * CRuby API is not callable from CUDA kernel because they do not have `__device__` modifier.
8
+ * nvcc does not support `#include RUBY_EXTCONF_H`, so can not include `ruby.h`.
9
+ * (RULE) It is allowed to use C++14 codes in .cu files.
10
+ * Rest of `*.{h,c}` files are for host (CPU).
11
+ * Call C wrapper functions defined in .cu files.
12
+ * It can use CRuby API.
13
+ * (RULE) It is not allowed to use C++ codes in host files.
14
+
15
+ Ruby's `mkmf` (or `extconf.rb`) does not support to specify 3rd compiler such as NVCC for another files of extensions `.cu`.
16
+ Therefore, cumo specify a wrapper command `bin/mkmf-cu-nvcc` as a compiler and changes its behavor depending on extensions of files to compile.
@@ -4,12 +4,66 @@
4
4
  #include <ruby.h>
5
5
  #include "cumo/narray.h"
6
6
  #include "cumo/template.h"
7
+ #include "cumo/cuda/runtime.h"
7
8
 
8
- //static void *blas_handle = 0;
9
- //static char *blas_prefix = 0;
9
+ VALUE cumo_cuda_eCublasError;
10
+ VALUE cumo_cuda_mCublas;
11
+ #define eCublasError cumo_cuda_eCublasError
12
+ #define mCublas cumo_cuda_mCublas
13
+
14
+ static char*
15
+ get_cublas_error_msg(cublasStatus_t error) {
16
+ switch (error) {
17
+ #define RETURN_MSG(msg) \
18
+ case msg: \
19
+ return #msg
20
+
21
+ RETURN_MSG(CUBLAS_STATUS_SUCCESS);
22
+ RETURN_MSG(CUBLAS_STATUS_NOT_INITIALIZED);
23
+ RETURN_MSG(CUBLAS_STATUS_ALLOC_FAILED);
24
+ RETURN_MSG(CUBLAS_STATUS_INVALID_VALUE);
25
+ RETURN_MSG(CUBLAS_STATUS_ARCH_MISMATCH);
26
+ RETURN_MSG(CUBLAS_STATUS_MAPPING_ERROR);
27
+ RETURN_MSG(CUBLAS_STATUS_EXECUTION_FAILED);
28
+ RETURN_MSG(CUBLAS_STATUS_INTERNAL_ERROR);
29
+ RETURN_MSG(CUBLAS_STATUS_NOT_SUPPORTED);
30
+ RETURN_MSG(CUBLAS_STATUS_LICENSE_ERROR);
31
+
32
+ #undef RETURN_MSG
33
+ }
34
+ abort(); // never reach
35
+ }
36
+
37
+ void
38
+ cumo_cuda_cublas_check_status(cublasStatus_t status)
39
+ {
40
+ if (status != 0) {
41
+ rb_raise(cumo_cuda_eCublasError, "%s (error=%d)", get_cublas_error_msg(status), status);
42
+ }
43
+ }
44
+
45
+ // Lazily initialize cublas handle, and cache it
46
+ cublasHandle_t
47
+ cumo_cuda_cublas_handle()
48
+ {
49
+ static cublasHandle_t *handles = 0; // handle is never destroyed
50
+ if (handles == 0) {
51
+ int i;
52
+ int device_count = cumo_cuda_runtime_get_device_count();
53
+ handles = malloc(sizeof(cublasHandle_t) * device_count);
54
+ for (i = 0; i < device_count; ++i) {
55
+ handles[i] = 0;
56
+ }
57
+ }
58
+ int device = cumo_cuda_runtime_get_device();
59
+ if (handles[device] == 0) {
60
+ cublasCreate(&handles[device]);
61
+ }
62
+ return handles[device];
63
+ }
10
64
 
11
65
  VALUE
12
- cumo_cublas_option_value(VALUE value, VALUE default_value)
66
+ cumo_cuda_cublas_option_value(VALUE value, VALUE default_value)
13
67
  {
14
68
  switch(TYPE(value)) {
15
69
  case T_NIL:
@@ -19,45 +73,9 @@ cumo_cublas_option_value(VALUE value, VALUE default_value)
19
73
  return value;
20
74
  }
21
75
 
22
- //enum CBLAS_ORDER
23
- //cumo_cublas_option_order(VALUE order)
24
- //{
25
- // int opt;
26
- // char *ptr;
27
- //
28
- // switch(TYPE(order)) {
29
- // case T_NIL:
30
- // case T_UNDEF:
31
- // case T_FALSE:
32
- // return CblasRowMajor;
33
- // case T_TRUE:
34
- // return CblasColMajor;
35
- // case T_FIXNUM:
36
- // opt = FIX2INT(order);
37
- // if (opt >= CblasRowMajor && opt <= CblasColMajor) {
38
- // return opt;
39
- // }
40
- // break;
41
- // case T_SYMBOL:
42
- // order = rb_sym2str(order);
43
- // case T_STRING:
44
- // ptr = RSTRING_PTR(order);
45
- // if (RSTRING_LEN(order) > 0) {
46
- // switch(ptr[0]){
47
- // case 'R': case 'r':
48
- // return CblasRowMajor;
49
- // case 'C': case 'c':
50
- // return CblasColMajor;
51
- // }
52
- // }
53
- // break;
54
- // }
55
- // rb_raise(rb_eArgError,"invalid value for CBLAS_ORDER");
56
- // return 0;
57
- //}
58
-
76
+ #if 0
59
77
  cublasOperation_t
60
- cumo_cublas_option_trans(VALUE trans)
78
+ cumo_cuda_cublas_option_trans(VALUE trans)
61
79
  {
62
80
  int opt;
63
81
  char *ptr;
@@ -94,185 +112,17 @@ cumo_cublas_option_trans(VALUE trans)
94
112
  rb_raise(rb_eArgError, "invalid value for cublasOperation_t");
95
113
  return 0;
96
114
  }
115
+ #endif
97
116
 
98
- cublasFillMode_t
99
- cumo_cublas_option_uplo(VALUE uplo)
100
- {
101
- int opt;
102
- char *ptr;
103
-
104
- switch(TYPE(uplo)) {
105
- case T_NIL:
106
- case T_UNDEF:
107
- case T_FALSE:
108
- return CUBLAS_FILL_MODE_UPPER;
109
- case T_TRUE:
110
- return CUBLAS_FILL_MODE_LOWER;
111
- case T_FIXNUM:
112
- opt = FIX2INT(uplo);
113
- switch(opt){
114
- case CUBLAS_FILL_MODE_UPPER:
115
- case CUBLAS_FILL_MODE_LOWER:
116
- return opt;
117
- }
118
- break;
119
- case T_SYMBOL:
120
- uplo = rb_sym2str(uplo);
121
- case T_STRING:
122
- ptr = RSTRING_PTR(uplo);
123
- if (RSTRING_LEN(uplo) > 0) {
124
- switch(ptr[0]){
125
- case 'U': case 'u':
126
- return CUBLAS_FILL_MODE_UPPER;
127
- case 'L': case 'l':
128
- return CUBLAS_FILL_MODE_LOWER;
129
- }
130
- }
131
- break;
132
- }
133
- rb_raise(rb_eArgError, "invalid value for cublasFillMode_t");
134
- return 0;
135
- }
136
-
137
- cublasDiagType_t
138
- cumo_cublas_option_diag(VALUE diag)
139
- {
140
- int opt;
141
- char *ptr;
142
-
143
- switch(TYPE(diag)) {
144
- case T_NIL:
145
- case T_UNDEF:
146
- case T_FALSE:
147
- return CUBLAS_DIAG_NON_UNIT;
148
- case T_TRUE:
149
- return CUBLAS_DIAG_UNIT;
150
- case T_FIXNUM:
151
- opt = FIX2INT(diag);
152
- switch(opt){
153
- case CUBLAS_DIAG_NON_UNIT:
154
- case CUBLAS_DIAG_UNIT:
155
- return opt;
156
- }
157
- break;
158
- case T_SYMBOL:
159
- diag = rb_sym2str(diag);
160
- case T_STRING:
161
- ptr = RSTRING_PTR(diag);
162
- if (RSTRING_LEN(diag) > 0) {
163
- switch(ptr[0]){
164
- case 'N': case 'n':
165
- return CUBLAS_DIAG_NON_UNIT;
166
- case 'U': case 'u':
167
- return CUBLAS_DIAG_UNIT;
168
- }
169
- }
170
- break;
171
- }
172
- rb_raise(rb_eArgError, "invalid value for cublasDiagType_t");
173
- return 0;
174
- }
175
-
176
- cublasSideMode_t
177
- cumo_cublas_option_side(VALUE side)
117
+ void
118
+ Init_cumo_cuda_cublas(void)
178
119
  {
179
- int opt;
180
- char *ptr;
120
+ VALUE mCumo = rb_define_module("Cumo");
121
+ VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
181
122
 
182
- switch(TYPE(side)) {
183
- case T_NIL:
184
- case T_UNDEF:
185
- case T_FALSE:
186
- return CUBLAS_SIDE_LEFT;
187
- case T_TRUE:
188
- return CUBLAS_SIDE_RIGHT;
189
- case T_FIXNUM:
190
- opt = FIX2INT(side);
191
- switch(opt){
192
- case CUBLAS_SIDE_LEFT:
193
- case CUBLAS_SIDE_RIGHT:
194
- return opt;
195
- }
196
- break;
197
- case T_SYMBOL:
198
- side = rb_sym2str(side);
199
- case T_STRING:
200
- ptr = RSTRING_PTR(side);
201
- if (RSTRING_LEN(side) > 0) {
202
- switch(ptr[0]){
203
- case 'L': case 'l':
204
- return CUBLAS_SIDE_LEFT;
205
- case 'R': case 'r':
206
- return CUBLAS_SIDE_RIGHT;
207
- }
208
- }
209
- break;
210
- }
211
- rb_raise(rb_eArgError, "invalid value for cublasSideMode_t");
212
- return 0;
123
+ /*
124
+ Document-module: Cumo::Cublas
125
+ */
126
+ mCublas = rb_define_module_under(mCUDA, "Cublas");
127
+ eCublasError = rb_define_class_under(mCUDA, "CublasError", rb_eStandardError);
213
128
  }
214
-
215
- //void
216
- //cumo_cublas_check_func(void **func, const char *name)
217
- //{
218
- // char *s, *error;
219
- //
220
- // if (*func==0) {
221
- // if (blas_handle==0) {
222
- // rb_raise(rb_eRuntimeError,"BLAS library is not loaded");
223
- // }
224
- // if (blas_prefix==0) {
225
- // rb_raise(rb_eRuntimeError,"CBLAS prefix is not set");
226
- // }
227
- // s = alloca(strlen(blas_prefix)+strlen(name)+1);
228
- // strcpy(s,blas_prefix);
229
- // strcat(s,name);
230
- // dlerror();
231
- // *func = dlsym(blas_handle, s);
232
- // error = dlerror();
233
- // if (error != NULL) {
234
- // rb_raise(rb_eRuntimeError, "%s", error);
235
- // }
236
- // }
237
- //}
238
-
239
- //static VALUE
240
- //blas_s_prefix_set(VALUE mod, VALUE prefix)
241
- //{
242
- // long len;
243
- //
244
- // if (TYPE(prefix) != T_STRING) {
245
- // rb_raise(rb_eTypeError,"argument must be string");
246
- // }
247
- // if (blas_prefix) {
248
- // free(blas_prefix);
249
- // }
250
- // len = RSTRING_LEN(prefix);
251
- // blas_prefix = malloc(len+1);
252
- // strcpy(blas_prefix, StringValueCStr(prefix));
253
- // return prefix;
254
- //}
255
-
256
- //void
257
- //Init_blas(void)
258
- //{
259
- // VALUE mN;
260
- //
261
- // mN = rb_define_module("Numo");
262
- // /*
263
- // Document-module: Numo::Linalg
264
- // */
265
- // mLinalg = rb_define_module_under(mN, "Linalg");
266
- // mBlas = rb_define_module_under(mLinalg, "Blas");
267
- //
268
- // rb_define_module_function(mBlas, "dlopen", blas_s_dlopen, -1);
269
- // rb_define_module_function(mBlas, "prefix=", blas_s_prefix_set, 1);
270
- //
271
- // blas_prefix = malloc(strlen("cublas_")+1); // default prefix
272
- // strcpy(blas_prefix,"cublas_");
273
- //
274
- // Init_cumo_linalg_blas_s();
275
- // Init_cumo_linalg_blas_d();
276
- // Init_cumo_linalg_blas_c();
277
- // Init_cumo_linalg_blas_z();
278
- //}
@@ -6,6 +6,7 @@
6
6
  #include <memory>
7
7
  #include <mutex>
8
8
  #include <stdexcept>
9
+ #include <string>
9
10
  #include <unordered_map>
10
11
  #include <vector>
11
12
 
@@ -64,13 +64,7 @@ rb_cudaRuntimeGetVersion(VALUE self)
64
64
  static VALUE
65
65
  rb_cudaGetDevice(VALUE self)
66
66
  {
67
- int _device;
68
- cudaError_t status;
69
-
70
- status = cudaGetDevice(&_device);
71
-
72
- check_status(status);
73
- return INT2NUM(_device);
67
+ return INT2NUM(cumo_cuda_runtime_get_device());
74
68
  }
75
69
 
76
70
  /*
@@ -106,13 +100,7 @@ rb_cudaDeviceGetAttributes(VALUE self, VALUE attrib, VALUE device)
106
100
  static VALUE
107
101
  rb_cudaGetDeviceCount(VALUE self)
108
102
  {
109
- int _count;
110
- cudaError_t status;
111
-
112
- status = cudaGetDeviceCount(&_count);
113
-
114
- check_status(status);
115
- return INT2NUM(_count);
103
+ return INT2NUM(cumo_cuda_runtime_get_device_count());
116
104
  }
117
105
 
118
106
  /*
@@ -7,10 +7,10 @@
7
7
 
8
8
  void Init_cumo();
9
9
  void Init_cumo_narray();
10
- void Init_cumo_nary_data();
11
- void Init_cumo_nary_ndloop();
12
- void Init_cumo_nary_step();
13
- void Init_cumo_nary_index();
10
+ void Init_cumo_na_data();
11
+ void Init_cumo_na_ndloop();
12
+ void Init_cumo_na_step();
13
+ void Init_cumo_na_index();
14
14
  void Init_cumo_bit();
15
15
  void Init_cumo_int8();
16
16
  void Init_cumo_int16();
@@ -25,10 +25,10 @@ void Init_cumo_scomplex();
25
25
  void Init_cumo_dfloat();
26
26
  void Init_cumo_dcomplex();
27
27
  void Init_cumo_robject();
28
- void Init_cumo_nary_math();
29
- void Init_cumo_nary_rand();
30
- void Init_cumo_nary_array();
31
- void Init_cumo_nary_struct();
28
+ void Init_cumo_na_math();
29
+ void Init_cumo_na_rand();
30
+ void Init_cumo_na_array();
31
+ void Init_cumo_na_struct();
32
32
  void Init_cumo_cuda_driver();
33
33
  void Init_cumo_cuda_memory_pool();
34
34
  void Init_cumo_cuda_runtime();
@@ -112,11 +112,11 @@ Init_cumo()
112
112
 
113
113
  Init_cumo_narray();
114
114
 
115
- Init_cumo_nary_step();
116
- Init_cumo_nary_index();
115
+ Init_cumo_na_step();
116
+ Init_cumo_na_index();
117
117
 
118
- Init_cumo_nary_data();
119
- Init_cumo_nary_ndloop();
118
+ Init_cumo_na_data();
119
+ Init_cumo_na_ndloop();
120
120
 
121
121
  Init_cumo_dcomplex();
122
122
  Init_cumo_dfloat();
@@ -135,11 +135,11 @@ Init_cumo()
135
135
  Init_cumo_bit();
136
136
  Init_cumo_robject();
137
137
 
138
- Init_cumo_nary_math();
138
+ Init_cumo_na_math();
139
139
 
140
- Init_cumo_nary_rand();
141
- Init_cumo_nary_array();
142
- Init_cumo_nary_struct();
140
+ Init_cumo_na_rand();
141
+ Init_cumo_na_array();
142
+ Init_cumo_na_struct();
143
143
 
144
144
  Init_cumo_cuda_driver();
145
145
  Init_cumo_cuda_memory_pool();