cumo 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/3rd_party/LICENSE.txt +60 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
- data/LICENSE.txt +1 -62
- data/README.md +33 -29
- data/bench/cumo_bench.rb +47 -25
- data/bench/numo_bench.rb +27 -25
- data/docs/src-tree.md +16 -0
- data/ext/cumo/cuda/cublas.c +69 -219
- data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
- data/ext/cumo/cuda/runtime.c +2 -14
- data/ext/cumo/cumo.c +16 -16
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
- data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
- data/ext/cumo/include/cumo/indexer.h +46 -63
- data/ext/cumo/include/cumo/intern.h +58 -112
- data/ext/cumo/include/cumo/narray.h +214 -185
- data/ext/cumo/include/cumo/narray_kernel.h +66 -37
- data/ext/cumo/include/cumo/ndloop.h +42 -42
- data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
- data/ext/cumo/include/cumo/template.h +56 -51
- data/ext/cumo/include/cumo/template_kernel.h +31 -31
- data/ext/cumo/include/cumo/types/bit.h +3 -3
- data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
- data/ext/cumo/include/cumo/types/complex.h +126 -126
- data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
- data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
- data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
- data/ext/cumo/include/cumo/types/scomplex.h +5 -5
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
- data/ext/cumo/narray/array.c +143 -143
- data/ext/cumo/narray/data.c +184 -184
- data/ext/cumo/narray/gen/cogen.rb +5 -2
- data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
- data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
- data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
- data/ext/cumo/narray/gen/erbln.rb +132 -0
- data/ext/cumo/narray/gen/erbpp2.rb +18 -13
- data/ext/cumo/narray/gen/narray_def.rb +3 -3
- data/ext/cumo/narray/gen/spec.rb +2 -2
- data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
- data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
- data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
- data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
- data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
- data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
- data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
- data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
- data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/each.c +9 -9
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
- data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
- data/ext/cumo/narray/gen/tmpl/format.c +11 -11
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
- data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
- data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
- data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
- data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
- data/ext/cumo/narray/gen/tmpl/median.c +10 -10
- data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
- data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
- data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
- data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
- data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
- data/ext/cumo/narray/gen/tmpl/store.c +6 -6
- data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
- data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
- data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
- data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
- data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
- data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
- data/ext/cumo/narray/index.c +213 -213
- data/ext/cumo/narray/math.c +27 -27
- data/ext/cumo/narray/narray.c +484 -484
- data/ext/cumo/narray/ndloop.c +259 -258
- data/ext/cumo/narray/rand.c +3 -3
- data/ext/cumo/narray/step.c +70 -70
- data/ext/cumo/narray/struct.c +139 -139
- metadata +6 -7
- data/ext/cumo/include/cumo/intern_fwd.h +0 -38
- data/lib/erbpp.rb +0 -294
- data/lib/erbpp/line_number.rb +0 -137
- data/lib/erbpp/narray_def.rb +0 -381
data/bench/numo_bench.rb
CHANGED
@@ -8,8 +8,9 @@ a = Numo::Float32.new(10).seq(1)
|
|
8
8
|
b = Numo::Float32.new(10).seq(10,10)
|
9
9
|
c = a + b
|
10
10
|
|
11
|
-
def elementwise
|
12
|
-
|
11
|
+
def elementwise(num = nil)
|
12
|
+
num ||= NUM
|
13
|
+
puts "elementwise(#{num})"
|
13
14
|
Benchmark.bm do |r|
|
14
15
|
a = Numo::Float32.new(10000).seq(1)
|
15
16
|
b = Numo::Float32.new(10000).seq(10,10)
|
@@ -43,8 +44,9 @@ def elementwise
|
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
46
|
-
def reduction
|
47
|
-
|
47
|
+
def reduction(num = nil)
|
48
|
+
num ||= NUM
|
49
|
+
puts "reduction(#{num})"
|
48
50
|
Benchmark.bm do |r|
|
49
51
|
a = Numo::Float32.new(10000).seq(1)
|
50
52
|
r.report('10**4') do
|
@@ -73,9 +75,9 @@ def reduction
|
|
73
75
|
end
|
74
76
|
end
|
75
77
|
|
76
|
-
def dot
|
77
|
-
num
|
78
|
-
puts
|
78
|
+
def dot(num = nil)
|
79
|
+
num ||= 1
|
80
|
+
puts "dot(#{num})"
|
79
81
|
Benchmark.bm do |r|
|
80
82
|
a = Numo::Float32.new(100,100).seq(1)
|
81
83
|
b = Numo::Float32.new(100,100).seq(10,10)
|
@@ -115,24 +117,24 @@ dot
|
|
115
117
|
|
116
118
|
# Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
|
117
119
|
#
|
118
|
-
#
|
120
|
+
# elementwise(100)
|
119
121
|
# user system total real
|
120
|
-
# 10**4 0.010000 0.000000 0.010000 ( 0.
|
121
|
-
# 10**5 0.000000 0.020000 0.020000 ( 0.
|
122
|
-
# 10**6 0.
|
123
|
-
# 10**7
|
124
|
-
# 10**8
|
125
|
-
# reduction
|
122
|
+
# 10**4 0.010000 0.000000 0.010000 ( 0.002368)
|
123
|
+
# 10**5 0.000000 0.020000 0.020000 ( 0.024129)
|
124
|
+
# 10**6 0.080000 0.050000 0.130000 ( 0.139918)
|
125
|
+
# 10**7 1.230000 1.020000 2.250000 ( 2.251331)
|
126
|
+
# 10**8 10.090000 8.560000 18.650000 ( 18.646369)
|
127
|
+
# reduction(100)
|
126
128
|
# user system total real
|
127
|
-
# 10**4 0.000000 0.000000 0.000000 ( 0.
|
128
|
-
# 10**5 0.
|
129
|
-
# 10**6 0.110000 0.000000 0.110000 ( 0.
|
130
|
-
# 10**7 1.
|
131
|
-
# 10**8 11.
|
132
|
-
# dot
|
129
|
+
# 10**4 0.000000 0.000000 0.000000 ( 0.001360)
|
130
|
+
# 10**5 0.020000 0.000000 0.020000 ( 0.011455)
|
131
|
+
# 10**6 0.110000 0.000000 0.110000 ( 0.111708)
|
132
|
+
# 10**7 1.130000 0.000000 1.130000 ( 1.137357)
|
133
|
+
# 10**8 11.830000 0.000000 11.830000 ( 11.832832)
|
134
|
+
# dot(1)
|
133
135
|
# user system total real
|
134
|
-
# 10**4 0.
|
135
|
-
# 10**5 0.
|
136
|
-
# 10**6 0.
|
137
|
-
# 10**7
|
138
|
-
# 10**8
|
136
|
+
# 10**4 0.010000 0.000000 0.010000 ( 0.001390)
|
137
|
+
# 10**5 0.010000 0.000000 0.010000 ( 0.012563)
|
138
|
+
# 10**6 0.120000 0.010000 0.130000 ( 0.125406)
|
139
|
+
# 10**7 1.270000 0.000000 1.270000 ( 1.272804)
|
140
|
+
# 10**8 13.000000 0.000000 13.000000 ( 12.990586)
|
data/docs/src-tree.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Source code organizations
|
2
|
+
|
3
|
+
* `*_kernel.{h,cuh,cu}` files are for device (CUDA kernels).
|
4
|
+
* .cu files are compiled via nvcc.
|
5
|
+
* .cu files define C wrapper functions to launch CUDA kernels to enable to be called from .c files.
|
6
|
+
* Technically, it is not possible to use CRuby API such as `VALUE` in .cu files.
|
7
|
+
* CRuby API is not callable from CUDA kernel because they do not have `__device__` modifier.
|
8
|
+
* nvcc does not support `#include RUBY_EXTCONF_H`, so can not include `ruby.h`.
|
9
|
+
* (RULE) It is allowed to use C++14 codes in .cu files.
|
10
|
+
* Rest of `*.{h,c}` files are for host (CPU).
|
11
|
+
* Call C wrapper functions defined in .cu files.
|
12
|
+
* It can use CRuby API.
|
13
|
+
* (RULE) It is not allowed to use C++ codes in host files.
|
14
|
+
|
15
|
+
Ruby's `mkmf` (or `extconf.rb`) does not support to specify 3rd compiler such as NVCC for another files of extensions `.cu`.
|
16
|
+
Therefore, cumo specify a wrapper command `bin/mkmf-cu-nvcc` as a compiler and changes its behavor depending on extensions of files to compile.
|
data/ext/cumo/cuda/cublas.c
CHANGED
@@ -4,12 +4,66 @@
|
|
4
4
|
#include <ruby.h>
|
5
5
|
#include "cumo/narray.h"
|
6
6
|
#include "cumo/template.h"
|
7
|
+
#include "cumo/cuda/runtime.h"
|
7
8
|
|
8
|
-
|
9
|
-
|
9
|
+
VALUE cumo_cuda_eCublasError;
|
10
|
+
VALUE cumo_cuda_mCublas;
|
11
|
+
#define eCublasError cumo_cuda_eCublasError
|
12
|
+
#define mCublas cumo_cuda_mCublas
|
13
|
+
|
14
|
+
static char*
|
15
|
+
get_cublas_error_msg(cublasStatus_t error) {
|
16
|
+
switch (error) {
|
17
|
+
#define RETURN_MSG(msg) \
|
18
|
+
case msg: \
|
19
|
+
return #msg
|
20
|
+
|
21
|
+
RETURN_MSG(CUBLAS_STATUS_SUCCESS);
|
22
|
+
RETURN_MSG(CUBLAS_STATUS_NOT_INITIALIZED);
|
23
|
+
RETURN_MSG(CUBLAS_STATUS_ALLOC_FAILED);
|
24
|
+
RETURN_MSG(CUBLAS_STATUS_INVALID_VALUE);
|
25
|
+
RETURN_MSG(CUBLAS_STATUS_ARCH_MISMATCH);
|
26
|
+
RETURN_MSG(CUBLAS_STATUS_MAPPING_ERROR);
|
27
|
+
RETURN_MSG(CUBLAS_STATUS_EXECUTION_FAILED);
|
28
|
+
RETURN_MSG(CUBLAS_STATUS_INTERNAL_ERROR);
|
29
|
+
RETURN_MSG(CUBLAS_STATUS_NOT_SUPPORTED);
|
30
|
+
RETURN_MSG(CUBLAS_STATUS_LICENSE_ERROR);
|
31
|
+
|
32
|
+
#undef RETURN_MSG
|
33
|
+
}
|
34
|
+
abort(); // never reach
|
35
|
+
}
|
36
|
+
|
37
|
+
void
|
38
|
+
cumo_cuda_cublas_check_status(cublasStatus_t status)
|
39
|
+
{
|
40
|
+
if (status != 0) {
|
41
|
+
rb_raise(cumo_cuda_eCublasError, "%s (error=%d)", get_cublas_error_msg(status), status);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
// Lazily initialize cublas handle, and cache it
|
46
|
+
cublasHandle_t
|
47
|
+
cumo_cuda_cublas_handle()
|
48
|
+
{
|
49
|
+
static cublasHandle_t *handles = 0; // handle is never destroyed
|
50
|
+
if (handles == 0) {
|
51
|
+
int i;
|
52
|
+
int device_count = cumo_cuda_runtime_get_device_count();
|
53
|
+
handles = malloc(sizeof(cublasHandle_t) * device_count);
|
54
|
+
for (i = 0; i < device_count; ++i) {
|
55
|
+
handles[i] = 0;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
int device = cumo_cuda_runtime_get_device();
|
59
|
+
if (handles[device] == 0) {
|
60
|
+
cublasCreate(&handles[device]);
|
61
|
+
}
|
62
|
+
return handles[device];
|
63
|
+
}
|
10
64
|
|
11
65
|
VALUE
|
12
|
-
|
66
|
+
cumo_cuda_cublas_option_value(VALUE value, VALUE default_value)
|
13
67
|
{
|
14
68
|
switch(TYPE(value)) {
|
15
69
|
case T_NIL:
|
@@ -19,45 +73,9 @@ cumo_cublas_option_value(VALUE value, VALUE default_value)
|
|
19
73
|
return value;
|
20
74
|
}
|
21
75
|
|
22
|
-
|
23
|
-
//cumo_cublas_option_order(VALUE order)
|
24
|
-
//{
|
25
|
-
// int opt;
|
26
|
-
// char *ptr;
|
27
|
-
//
|
28
|
-
// switch(TYPE(order)) {
|
29
|
-
// case T_NIL:
|
30
|
-
// case T_UNDEF:
|
31
|
-
// case T_FALSE:
|
32
|
-
// return CblasRowMajor;
|
33
|
-
// case T_TRUE:
|
34
|
-
// return CblasColMajor;
|
35
|
-
// case T_FIXNUM:
|
36
|
-
// opt = FIX2INT(order);
|
37
|
-
// if (opt >= CblasRowMajor && opt <= CblasColMajor) {
|
38
|
-
// return opt;
|
39
|
-
// }
|
40
|
-
// break;
|
41
|
-
// case T_SYMBOL:
|
42
|
-
// order = rb_sym2str(order);
|
43
|
-
// case T_STRING:
|
44
|
-
// ptr = RSTRING_PTR(order);
|
45
|
-
// if (RSTRING_LEN(order) > 0) {
|
46
|
-
// switch(ptr[0]){
|
47
|
-
// case 'R': case 'r':
|
48
|
-
// return CblasRowMajor;
|
49
|
-
// case 'C': case 'c':
|
50
|
-
// return CblasColMajor;
|
51
|
-
// }
|
52
|
-
// }
|
53
|
-
// break;
|
54
|
-
// }
|
55
|
-
// rb_raise(rb_eArgError,"invalid value for CBLAS_ORDER");
|
56
|
-
// return 0;
|
57
|
-
//}
|
58
|
-
|
76
|
+
#if 0
|
59
77
|
cublasOperation_t
|
60
|
-
|
78
|
+
cumo_cuda_cublas_option_trans(VALUE trans)
|
61
79
|
{
|
62
80
|
int opt;
|
63
81
|
char *ptr;
|
@@ -94,185 +112,17 @@ cumo_cublas_option_trans(VALUE trans)
|
|
94
112
|
rb_raise(rb_eArgError, "invalid value for cublasOperation_t");
|
95
113
|
return 0;
|
96
114
|
}
|
115
|
+
#endif
|
97
116
|
|
98
|
-
|
99
|
-
|
100
|
-
{
|
101
|
-
int opt;
|
102
|
-
char *ptr;
|
103
|
-
|
104
|
-
switch(TYPE(uplo)) {
|
105
|
-
case T_NIL:
|
106
|
-
case T_UNDEF:
|
107
|
-
case T_FALSE:
|
108
|
-
return CUBLAS_FILL_MODE_UPPER;
|
109
|
-
case T_TRUE:
|
110
|
-
return CUBLAS_FILL_MODE_LOWER;
|
111
|
-
case T_FIXNUM:
|
112
|
-
opt = FIX2INT(uplo);
|
113
|
-
switch(opt){
|
114
|
-
case CUBLAS_FILL_MODE_UPPER:
|
115
|
-
case CUBLAS_FILL_MODE_LOWER:
|
116
|
-
return opt;
|
117
|
-
}
|
118
|
-
break;
|
119
|
-
case T_SYMBOL:
|
120
|
-
uplo = rb_sym2str(uplo);
|
121
|
-
case T_STRING:
|
122
|
-
ptr = RSTRING_PTR(uplo);
|
123
|
-
if (RSTRING_LEN(uplo) > 0) {
|
124
|
-
switch(ptr[0]){
|
125
|
-
case 'U': case 'u':
|
126
|
-
return CUBLAS_FILL_MODE_UPPER;
|
127
|
-
case 'L': case 'l':
|
128
|
-
return CUBLAS_FILL_MODE_LOWER;
|
129
|
-
}
|
130
|
-
}
|
131
|
-
break;
|
132
|
-
}
|
133
|
-
rb_raise(rb_eArgError, "invalid value for cublasFillMode_t");
|
134
|
-
return 0;
|
135
|
-
}
|
136
|
-
|
137
|
-
cublasDiagType_t
|
138
|
-
cumo_cublas_option_diag(VALUE diag)
|
139
|
-
{
|
140
|
-
int opt;
|
141
|
-
char *ptr;
|
142
|
-
|
143
|
-
switch(TYPE(diag)) {
|
144
|
-
case T_NIL:
|
145
|
-
case T_UNDEF:
|
146
|
-
case T_FALSE:
|
147
|
-
return CUBLAS_DIAG_NON_UNIT;
|
148
|
-
case T_TRUE:
|
149
|
-
return CUBLAS_DIAG_UNIT;
|
150
|
-
case T_FIXNUM:
|
151
|
-
opt = FIX2INT(diag);
|
152
|
-
switch(opt){
|
153
|
-
case CUBLAS_DIAG_NON_UNIT:
|
154
|
-
case CUBLAS_DIAG_UNIT:
|
155
|
-
return opt;
|
156
|
-
}
|
157
|
-
break;
|
158
|
-
case T_SYMBOL:
|
159
|
-
diag = rb_sym2str(diag);
|
160
|
-
case T_STRING:
|
161
|
-
ptr = RSTRING_PTR(diag);
|
162
|
-
if (RSTRING_LEN(diag) > 0) {
|
163
|
-
switch(ptr[0]){
|
164
|
-
case 'N': case 'n':
|
165
|
-
return CUBLAS_DIAG_NON_UNIT;
|
166
|
-
case 'U': case 'u':
|
167
|
-
return CUBLAS_DIAG_UNIT;
|
168
|
-
}
|
169
|
-
}
|
170
|
-
break;
|
171
|
-
}
|
172
|
-
rb_raise(rb_eArgError, "invalid value for cublasDiagType_t");
|
173
|
-
return 0;
|
174
|
-
}
|
175
|
-
|
176
|
-
cublasSideMode_t
|
177
|
-
cumo_cublas_option_side(VALUE side)
|
117
|
+
void
|
118
|
+
Init_cumo_cuda_cublas(void)
|
178
119
|
{
|
179
|
-
|
180
|
-
|
120
|
+
VALUE mCumo = rb_define_module("Cumo");
|
121
|
+
VALUE mCUDA = rb_define_module_under(mCumo, "CUDA");
|
181
122
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
case T_TRUE:
|
188
|
-
return CUBLAS_SIDE_RIGHT;
|
189
|
-
case T_FIXNUM:
|
190
|
-
opt = FIX2INT(side);
|
191
|
-
switch(opt){
|
192
|
-
case CUBLAS_SIDE_LEFT:
|
193
|
-
case CUBLAS_SIDE_RIGHT:
|
194
|
-
return opt;
|
195
|
-
}
|
196
|
-
break;
|
197
|
-
case T_SYMBOL:
|
198
|
-
side = rb_sym2str(side);
|
199
|
-
case T_STRING:
|
200
|
-
ptr = RSTRING_PTR(side);
|
201
|
-
if (RSTRING_LEN(side) > 0) {
|
202
|
-
switch(ptr[0]){
|
203
|
-
case 'L': case 'l':
|
204
|
-
return CUBLAS_SIDE_LEFT;
|
205
|
-
case 'R': case 'r':
|
206
|
-
return CUBLAS_SIDE_RIGHT;
|
207
|
-
}
|
208
|
-
}
|
209
|
-
break;
|
210
|
-
}
|
211
|
-
rb_raise(rb_eArgError, "invalid value for cublasSideMode_t");
|
212
|
-
return 0;
|
123
|
+
/*
|
124
|
+
Document-module: Cumo::Cublas
|
125
|
+
*/
|
126
|
+
mCublas = rb_define_module_under(mCUDA, "Cublas");
|
127
|
+
eCublasError = rb_define_class_under(mCUDA, "CublasError", rb_eStandardError);
|
213
128
|
}
|
214
|
-
|
215
|
-
//void
|
216
|
-
//cumo_cublas_check_func(void **func, const char *name)
|
217
|
-
//{
|
218
|
-
// char *s, *error;
|
219
|
-
//
|
220
|
-
// if (*func==0) {
|
221
|
-
// if (blas_handle==0) {
|
222
|
-
// rb_raise(rb_eRuntimeError,"BLAS library is not loaded");
|
223
|
-
// }
|
224
|
-
// if (blas_prefix==0) {
|
225
|
-
// rb_raise(rb_eRuntimeError,"CBLAS prefix is not set");
|
226
|
-
// }
|
227
|
-
// s = alloca(strlen(blas_prefix)+strlen(name)+1);
|
228
|
-
// strcpy(s,blas_prefix);
|
229
|
-
// strcat(s,name);
|
230
|
-
// dlerror();
|
231
|
-
// *func = dlsym(blas_handle, s);
|
232
|
-
// error = dlerror();
|
233
|
-
// if (error != NULL) {
|
234
|
-
// rb_raise(rb_eRuntimeError, "%s", error);
|
235
|
-
// }
|
236
|
-
// }
|
237
|
-
//}
|
238
|
-
|
239
|
-
//static VALUE
|
240
|
-
//blas_s_prefix_set(VALUE mod, VALUE prefix)
|
241
|
-
//{
|
242
|
-
// long len;
|
243
|
-
//
|
244
|
-
// if (TYPE(prefix) != T_STRING) {
|
245
|
-
// rb_raise(rb_eTypeError,"argument must be string");
|
246
|
-
// }
|
247
|
-
// if (blas_prefix) {
|
248
|
-
// free(blas_prefix);
|
249
|
-
// }
|
250
|
-
// len = RSTRING_LEN(prefix);
|
251
|
-
// blas_prefix = malloc(len+1);
|
252
|
-
// strcpy(blas_prefix, StringValueCStr(prefix));
|
253
|
-
// return prefix;
|
254
|
-
//}
|
255
|
-
|
256
|
-
//void
|
257
|
-
//Init_blas(void)
|
258
|
-
//{
|
259
|
-
// VALUE mN;
|
260
|
-
//
|
261
|
-
// mN = rb_define_module("Numo");
|
262
|
-
// /*
|
263
|
-
// Document-module: Numo::Linalg
|
264
|
-
// */
|
265
|
-
// mLinalg = rb_define_module_under(mN, "Linalg");
|
266
|
-
// mBlas = rb_define_module_under(mLinalg, "Blas");
|
267
|
-
//
|
268
|
-
// rb_define_module_function(mBlas, "dlopen", blas_s_dlopen, -1);
|
269
|
-
// rb_define_module_function(mBlas, "prefix=", blas_s_prefix_set, 1);
|
270
|
-
//
|
271
|
-
// blas_prefix = malloc(strlen("cublas_")+1); // default prefix
|
272
|
-
// strcpy(blas_prefix,"cublas_");
|
273
|
-
//
|
274
|
-
// Init_cumo_linalg_blas_s();
|
275
|
-
// Init_cumo_linalg_blas_d();
|
276
|
-
// Init_cumo_linalg_blas_c();
|
277
|
-
// Init_cumo_linalg_blas_z();
|
278
|
-
//}
|
data/ext/cumo/cuda/runtime.c
CHANGED
@@ -64,13 +64,7 @@ rb_cudaRuntimeGetVersion(VALUE self)
|
|
64
64
|
static VALUE
|
65
65
|
rb_cudaGetDevice(VALUE self)
|
66
66
|
{
|
67
|
-
|
68
|
-
cudaError_t status;
|
69
|
-
|
70
|
-
status = cudaGetDevice(&_device);
|
71
|
-
|
72
|
-
check_status(status);
|
73
|
-
return INT2NUM(_device);
|
67
|
+
return INT2NUM(cumo_cuda_runtime_get_device());
|
74
68
|
}
|
75
69
|
|
76
70
|
/*
|
@@ -106,13 +100,7 @@ rb_cudaDeviceGetAttributes(VALUE self, VALUE attrib, VALUE device)
|
|
106
100
|
static VALUE
|
107
101
|
rb_cudaGetDeviceCount(VALUE self)
|
108
102
|
{
|
109
|
-
|
110
|
-
cudaError_t status;
|
111
|
-
|
112
|
-
status = cudaGetDeviceCount(&_count);
|
113
|
-
|
114
|
-
check_status(status);
|
115
|
-
return INT2NUM(_count);
|
103
|
+
return INT2NUM(cumo_cuda_runtime_get_device_count());
|
116
104
|
}
|
117
105
|
|
118
106
|
/*
|
data/ext/cumo/cumo.c
CHANGED
@@ -7,10 +7,10 @@
|
|
7
7
|
|
8
8
|
void Init_cumo();
|
9
9
|
void Init_cumo_narray();
|
10
|
-
void
|
11
|
-
void
|
12
|
-
void
|
13
|
-
void
|
10
|
+
void Init_cumo_na_data();
|
11
|
+
void Init_cumo_na_ndloop();
|
12
|
+
void Init_cumo_na_step();
|
13
|
+
void Init_cumo_na_index();
|
14
14
|
void Init_cumo_bit();
|
15
15
|
void Init_cumo_int8();
|
16
16
|
void Init_cumo_int16();
|
@@ -25,10 +25,10 @@ void Init_cumo_scomplex();
|
|
25
25
|
void Init_cumo_dfloat();
|
26
26
|
void Init_cumo_dcomplex();
|
27
27
|
void Init_cumo_robject();
|
28
|
-
void
|
29
|
-
void
|
30
|
-
void
|
31
|
-
void
|
28
|
+
void Init_cumo_na_math();
|
29
|
+
void Init_cumo_na_rand();
|
30
|
+
void Init_cumo_na_array();
|
31
|
+
void Init_cumo_na_struct();
|
32
32
|
void Init_cumo_cuda_driver();
|
33
33
|
void Init_cumo_cuda_memory_pool();
|
34
34
|
void Init_cumo_cuda_runtime();
|
@@ -112,11 +112,11 @@ Init_cumo()
|
|
112
112
|
|
113
113
|
Init_cumo_narray();
|
114
114
|
|
115
|
-
|
116
|
-
|
115
|
+
Init_cumo_na_step();
|
116
|
+
Init_cumo_na_index();
|
117
117
|
|
118
|
-
|
119
|
-
|
118
|
+
Init_cumo_na_data();
|
119
|
+
Init_cumo_na_ndloop();
|
120
120
|
|
121
121
|
Init_cumo_dcomplex();
|
122
122
|
Init_cumo_dfloat();
|
@@ -135,11 +135,11 @@ Init_cumo()
|
|
135
135
|
Init_cumo_bit();
|
136
136
|
Init_cumo_robject();
|
137
137
|
|
138
|
-
|
138
|
+
Init_cumo_na_math();
|
139
139
|
|
140
|
-
|
141
|
-
|
142
|
-
|
140
|
+
Init_cumo_na_rand();
|
141
|
+
Init_cumo_na_array();
|
142
|
+
Init_cumo_na_struct();
|
143
143
|
|
144
144
|
Init_cumo_cuda_driver();
|
145
145
|
Init_cumo_cuda_memory_pool();
|