cumo 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/3rd_party/LICENSE.txt +60 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
- data/LICENSE.txt +1 -62
- data/README.md +33 -29
- data/bench/cumo_bench.rb +47 -25
- data/bench/numo_bench.rb +27 -25
- data/docs/src-tree.md +16 -0
- data/ext/cumo/cuda/cublas.c +69 -219
- data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
- data/ext/cumo/cuda/runtime.c +2 -14
- data/ext/cumo/cumo.c +16 -16
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
- data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
- data/ext/cumo/include/cumo/indexer.h +46 -63
- data/ext/cumo/include/cumo/intern.h +58 -112
- data/ext/cumo/include/cumo/narray.h +214 -185
- data/ext/cumo/include/cumo/narray_kernel.h +66 -37
- data/ext/cumo/include/cumo/ndloop.h +42 -42
- data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
- data/ext/cumo/include/cumo/template.h +56 -51
- data/ext/cumo/include/cumo/template_kernel.h +31 -31
- data/ext/cumo/include/cumo/types/bit.h +3 -3
- data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
- data/ext/cumo/include/cumo/types/complex.h +126 -126
- data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
- data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
- data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
- data/ext/cumo/include/cumo/types/scomplex.h +5 -5
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
- data/ext/cumo/narray/array.c +143 -143
- data/ext/cumo/narray/data.c +184 -184
- data/ext/cumo/narray/gen/cogen.rb +5 -2
- data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
- data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
- data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
- data/ext/cumo/narray/gen/erbln.rb +132 -0
- data/ext/cumo/narray/gen/erbpp2.rb +18 -13
- data/ext/cumo/narray/gen/narray_def.rb +3 -3
- data/ext/cumo/narray/gen/spec.rb +2 -2
- data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
- data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
- data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
- data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
- data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
- data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
- data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
- data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
- data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/each.c +9 -9
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
- data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
- data/ext/cumo/narray/gen/tmpl/format.c +11 -11
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
- data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
- data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
- data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
- data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
- data/ext/cumo/narray/gen/tmpl/median.c +10 -10
- data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
- data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
- data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
- data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
- data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
- data/ext/cumo/narray/gen/tmpl/store.c +6 -6
- data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
- data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
- data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
- data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
- data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
- data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
- data/ext/cumo/narray/index.c +213 -213
- data/ext/cumo/narray/math.c +27 -27
- data/ext/cumo/narray/narray.c +484 -484
- data/ext/cumo/narray/ndloop.c +259 -258
- data/ext/cumo/narray/rand.c +3 -3
- data/ext/cumo/narray/step.c +70 -70
- data/ext/cumo/narray/struct.c +139 -139
- metadata +6 -7
- data/ext/cumo/include/cumo/intern_fwd.h +0 -38
- data/lib/erbpp.rb +0 -294
- data/lib/erbpp/line_number.rb +0 -137
- data/lib/erbpp/narray_def.rb +0 -381
@@ -52,9 +52,9 @@ void <%="cumo_#{type_name}_mulsum#{nan}_reduce_kernel_launch"%>(char *p1, char *
|
|
52
52
|
|
53
53
|
void <%="cumo_#{type_name}_mulsum#{nan}_kernel_launch"%>(char *p1, char *p2, char *p3, ssize_t s1, ssize_t s2, ssize_t s3, uint64_t n)
|
54
54
|
{
|
55
|
-
size_t
|
56
|
-
size_t
|
57
|
-
<%="cumo_#{type_name}_mulsum#{nan}_kernel"%><<<
|
55
|
+
size_t grid_dim = cumo_get_grid_dim(n);
|
56
|
+
size_t block_dim = cumo_get_block_dim(n);
|
57
|
+
<%="cumo_#{type_name}_mulsum#{nan}_kernel"%><<<grid_dim, block_dim>>>(p1,p2,p3,s1,s2,s3,n);
|
58
58
|
}
|
59
59
|
//<% end %>
|
60
60
|
<% end %>
|
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
<% [64,32].each do |i| %>
|
4
4
|
<% unless type_name == 'robject' %>
|
5
|
-
void cumo_<%=type_name%>_<%=name%><%=nan%>_int<%=i%>_kernel_launch(
|
5
|
+
void cumo_<%=type_name%>_<%=name%><%=nan%>_int<%=i%>_kernel_launch(cumo_na_reduction_arg_t* arg);
|
6
6
|
<% end %>
|
7
7
|
|
8
8
|
#define idx_t int<%=i%>_t
|
9
9
|
static void
|
10
|
-
<%=c_iter%>_index<%=i%><%=nan%>(
|
10
|
+
<%=c_iter%>_index<%=i%><%=nan%>(cumo_na_loop_t *const lp)
|
11
11
|
{
|
12
12
|
// TODO(sonots): Support nan in CUDA
|
13
13
|
<% if type_name == 'robject' || nan == '_nan' %>
|
@@ -16,18 +16,18 @@ static void
|
|
16
16
|
char *d_ptr, *i_ptr, *o_ptr;
|
17
17
|
ssize_t d_step, i_step;
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
o_ptr =
|
19
|
+
CUMO_INIT_COUNTER(lp, n);
|
20
|
+
CUMO_INIT_PTR(lp, 0, d_ptr, d_step);
|
21
|
+
CUMO_INIT_PTR(lp, 1, i_ptr, i_step);
|
22
|
+
o_ptr = CUMO_NDL_PTR(lp,2);
|
23
23
|
|
24
|
-
|
24
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=nan%>", "<%=type_name%>");
|
25
25
|
idx = f_<%=name%><%=nan%>(n,d_ptr,d_step);
|
26
26
|
*(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
|
27
27
|
}
|
28
28
|
<% else %>
|
29
29
|
{
|
30
|
-
|
30
|
+
cumo_na_reduction_arg_t arg = cumo_na_make_reduction_arg(lp);
|
31
31
|
cumo_<%=type_name%>_<%=name%><%=nan%>_int<%=i%>_kernel_launch(&arg);
|
32
32
|
}
|
33
33
|
<% end %>
|
@@ -53,67 +53,67 @@ static VALUE
|
|
53
53
|
{
|
54
54
|
//<% if type_name == 'robject' %>
|
55
55
|
{
|
56
|
-
|
56
|
+
cumo_narray_t *na;
|
57
57
|
VALUE idx, reduce;
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{Qnil,0},{cumo_sym_reduce,0}};
|
59
|
+
cumo_ndfunc_arg_out_t aout[1] = {{0,0,0}};
|
60
|
+
cumo_ndfunc_t ndf = {0, CUMO_STRIDE_LOOP_NIP|CUMO_NDF_FLAT_REDUCE|CUMO_NDF_EXTRACT, 3,1, ain,aout};
|
61
61
|
|
62
|
-
|
62
|
+
CumoGetNArray(self,na);
|
63
63
|
if (na->ndim==0) {
|
64
64
|
return INT2FIX(0);
|
65
65
|
}
|
66
66
|
if (na->size > (~(u_int32_t)0)) {
|
67
67
|
aout[0].type = cumo_cInt64;
|
68
|
-
idx =
|
68
|
+
idx = cumo_na_new(cumo_cInt64, na->ndim, na->shape);
|
69
69
|
ndf.func = <%=c_iter%>_index64;
|
70
70
|
<% if is_float %>
|
71
|
-
reduce =
|
71
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index64_nan);
|
72
72
|
<% else %>
|
73
|
-
reduce =
|
73
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
|
74
74
|
<% end %>
|
75
75
|
} else {
|
76
76
|
aout[0].type = cumo_cInt32;
|
77
|
-
idx =
|
77
|
+
idx = cumo_na_new(cumo_cInt32, na->ndim, na->shape);
|
78
78
|
ndf.func = <%=c_iter%>_index32;
|
79
79
|
<% if is_float %>
|
80
|
-
reduce =
|
80
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index32_nan);
|
81
81
|
<% else %>
|
82
|
-
reduce =
|
82
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
|
83
83
|
<% end %>
|
84
84
|
}
|
85
85
|
rb_funcall(idx, rb_intern("seq"), 0);
|
86
86
|
|
87
|
-
return
|
87
|
+
return cumo_na_ndloop(&ndf, 3, self, idx, reduce);
|
88
88
|
}
|
89
89
|
<% else %>
|
90
90
|
{
|
91
|
-
|
91
|
+
cumo_narray_t *na;
|
92
92
|
VALUE reduce;
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
cumo_ndfunc_arg_in_t ain[2] = {{Qnil,0},{cumo_sym_reduce,0}};
|
94
|
+
cumo_ndfunc_arg_out_t aout[1] = {{0,0,0}};
|
95
|
+
cumo_ndfunc_t ndf = {0, CUMO_STRIDE_LOOP_NIP|CUMO_NDF_FLAT_REDUCE|CUMO_NDF_EXTRACT|CUMO_NDF_INDEXER_LOOP, 2,1, ain,aout};
|
96
96
|
|
97
|
-
|
97
|
+
CumoGetNArray(self,na);
|
98
98
|
if (na->size > (~(u_int32_t)0)) {
|
99
99
|
aout[0].type = cumo_cInt64;
|
100
100
|
ndf.func = <%=c_iter%>_index64;
|
101
101
|
<% if is_float %>
|
102
|
-
reduce =
|
102
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index64_nan);
|
103
103
|
<% else %>
|
104
|
-
reduce =
|
104
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
|
105
105
|
<% end %>
|
106
106
|
} else {
|
107
107
|
aout[0].type = cumo_cInt32;
|
108
108
|
ndf.func = <%=c_iter%>_index32;
|
109
109
|
<% if is_float %>
|
110
|
-
reduce =
|
110
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, <%=c_iter%>_index32_nan);
|
111
111
|
<% else %>
|
112
|
-
reduce =
|
112
|
+
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
|
113
113
|
<% end %>
|
114
114
|
}
|
115
115
|
|
116
|
-
return
|
116
|
+
return cumo_na_ndloop(&ndf, 2, self, reduce);
|
117
117
|
}
|
118
118
|
<% end %>
|
119
119
|
}
|
@@ -49,12 +49,12 @@ extern "C" {
|
|
49
49
|
#endif
|
50
50
|
#endif
|
51
51
|
|
52
|
-
void cumo_<%=type_name%>_min_index_int<%=i%>_kernel_launch(
|
52
|
+
void cumo_<%=type_name%>_min_index_int<%=i%>_kernel_launch(cumo_na_reduction_arg_t* arg)
|
53
53
|
{
|
54
54
|
cumo_reduce<dtype, idx_t, cumo_<%=type_name%>_min_index_int<%=i%>_impl>(*arg, cumo_<%=type_name%>_min_index_int<%=i%>_impl{});
|
55
55
|
}
|
56
56
|
|
57
|
-
void cumo_<%=type_name%>_max_index_int<%=i%>_kernel_launch(
|
57
|
+
void cumo_<%=type_name%>_max_index_int<%=i%>_kernel_launch(cumo_na_reduction_arg_t* arg)
|
58
58
|
{
|
59
59
|
cumo_reduce<dtype, idx_t, cumo_<%=type_name%>_max_index_int<%=i%>_impl>(*arg, cumo_<%=type_name%>_max_index_int<%=i%>_impl{});
|
60
60
|
}
|
@@ -2,11 +2,11 @@
|
|
2
2
|
<% $cumo_narray_gen_tmpl_accum_kernel_included = 1 %>
|
3
3
|
|
4
4
|
<% if type_name.include?('int') %>
|
5
|
-
|
5
|
+
<%= load_erb("real_accum").result(binding) %>
|
6
6
|
<% elsif type_name.include?('float') %>
|
7
|
-
<%=
|
7
|
+
<%= load_erb("float_accum").result(binding) %>
|
8
8
|
<% elsif type_name.include?('complex') %>
|
9
|
-
<%=
|
9
|
+
<%= load_erb("complex_accum").result(binding) %>
|
10
10
|
<% end %>
|
11
11
|
|
12
12
|
<% end %>
|
@@ -1,14 +1,14 @@
|
|
1
1
|
static size_t
|
2
2
|
<%=type_name%>_memsize(const void* ptr)
|
3
3
|
{
|
4
|
-
size_t size = sizeof(
|
5
|
-
const
|
4
|
+
size_t size = sizeof(cumo_narray_data_t);
|
5
|
+
const cumo_narray_data_t *na = (const cumo_narray_data_t*)ptr;
|
6
6
|
|
7
|
-
assert(na->base.type ==
|
7
|
+
assert(na->base.type == CUMO_NARRAY_DATA_T);
|
8
8
|
|
9
9
|
if (na->ptr != NULL) {
|
10
10
|
<% if is_bit %>
|
11
|
-
size += ((na->base.size-1)/8/sizeof(
|
11
|
+
size += ((na->base.size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT);
|
12
12
|
<% else %>
|
13
13
|
size += na->base.size * sizeof(dtype);
|
14
14
|
<% end %>
|
@@ -24,9 +24,9 @@ static size_t
|
|
24
24
|
static void
|
25
25
|
<%=type_name%>_free(void* ptr)
|
26
26
|
{
|
27
|
-
|
27
|
+
cumo_narray_data_t *na = (cumo_narray_data_t*)ptr;
|
28
28
|
|
29
|
-
assert(na->base.type ==
|
29
|
+
assert(na->base.type == CUMO_NARRAY_DATA_T);
|
30
30
|
|
31
31
|
if (na->ptr != NULL) {
|
32
32
|
cumo_cuda_runtime_free(na->ptr);
|
@@ -41,7 +41,7 @@ static void
|
|
41
41
|
xfree(na);
|
42
42
|
}
|
43
43
|
|
44
|
-
static
|
44
|
+
static cumo_narray_type_info_t <%=type_name%>_info = {
|
45
45
|
<% if is_bit %>
|
46
46
|
1, // element_bits
|
47
47
|
0, // element_bytes
|
@@ -59,7 +59,7 @@ static void
|
|
59
59
|
{
|
60
60
|
size_t n, i;
|
61
61
|
VALUE *a;
|
62
|
-
|
62
|
+
cumo_narray_data_t *na = ptr;
|
63
63
|
|
64
64
|
if (na->ptr) {
|
65
65
|
a = (VALUE*)(na->ptr);
|
@@ -73,7 +73,7 @@ static void
|
|
73
73
|
static const rb_data_type_t <%=type_name%>_data_type = {
|
74
74
|
"<%=full_class_name%>",
|
75
75
|
{<%=type_name%>_gc_mark, <%=type_name%>_free, <%=type_name%>_memsize,},
|
76
|
-
&
|
76
|
+
&cumo_na_data_type,
|
77
77
|
&<%=type_name%>_info,
|
78
78
|
0, // flags
|
79
79
|
};
|
@@ -83,7 +83,7 @@ static const rb_data_type_t <%=type_name%>_data_type = {
|
|
83
83
|
static const rb_data_type_t <%=type_name%>_data_type = {
|
84
84
|
"<%=full_class_name%>",
|
85
85
|
{0, <%=type_name%>_free, <%=type_name%>_memsize,},
|
86
|
-
&
|
86
|
+
&cumo_na_data_type,
|
87
87
|
&<%=type_name%>_info,
|
88
88
|
0, // flags
|
89
89
|
};
|
@@ -93,12 +93,12 @@ static const rb_data_type_t <%=type_name%>_data_type = {
|
|
93
93
|
static VALUE
|
94
94
|
<%=c_func(0)%>(VALUE klass)
|
95
95
|
{
|
96
|
-
|
96
|
+
cumo_narray_data_t *na = ALLOC(cumo_narray_data_t);
|
97
97
|
|
98
98
|
na->base.ndim = 0;
|
99
|
-
na->base.type =
|
100
|
-
na->base.flag[0] =
|
101
|
-
na->base.flag[1] =
|
99
|
+
na->base.type = CUMO_NARRAY_DATA_T;
|
100
|
+
na->base.flag[0] = CUMO_NA_FL0_INIT;
|
101
|
+
na->base.flag[1] = CUMO_NA_FL1_INIT;
|
102
102
|
na->base.size = 0;
|
103
103
|
na->base.shape = NULL;
|
104
104
|
na->base.reduce = INT2FIX(0);
|
@@ -1,14 +1,14 @@
|
|
1
1
|
static VALUE
|
2
2
|
<%=c_func(0)%>(VALUE self)
|
3
3
|
{
|
4
|
-
|
4
|
+
cumo_narray_t *na;
|
5
5
|
void *ptr;
|
6
6
|
|
7
|
-
|
7
|
+
CumoGetNArray(self,na);
|
8
8
|
|
9
|
-
switch(
|
10
|
-
case
|
11
|
-
ptr =
|
9
|
+
switch(CUMO_NA_TYPE(na)) {
|
10
|
+
case CUMO_NARRAY_DATA_T:
|
11
|
+
ptr = CUMO_NA_DATA_PTR(na);
|
12
12
|
if (na->size > 0 && ptr == NULL) {
|
13
13
|
<% if is_object %>
|
14
14
|
ptr = xmalloc(sizeof(dtype) * na->size);
|
@@ -21,17 +21,17 @@ static VALUE
|
|
21
21
|
<% else %>
|
22
22
|
ptr = cumo_cuda_runtime_malloc(sizeof(dtype) * na->size);
|
23
23
|
<% end %>
|
24
|
-
|
24
|
+
CUMO_NA_DATA_PTR(na) = ptr;
|
25
25
|
}
|
26
26
|
break;
|
27
|
-
case
|
28
|
-
rb_funcall(
|
27
|
+
case CUMO_NARRAY_VIEW_T:
|
28
|
+
rb_funcall(CUMO_NA_VIEW_DATA(na), rb_intern("allocate"), 0);
|
29
29
|
break;
|
30
|
-
case
|
31
|
-
//ptr = ((
|
30
|
+
case CUMO_NARRAY_FILEMAP_T:
|
31
|
+
//ptr = ((cumo_narray_filemap_t*)na)->ptr;
|
32
32
|
// to be implemented
|
33
33
|
default:
|
34
|
-
rb_bug("invalid narray type : %d",
|
34
|
+
rb_bug("invalid narray type : %d",CUMO_NA_TYPE(na));
|
35
35
|
}
|
36
36
|
return self;
|
37
37
|
}
|
@@ -60,7 +60,7 @@ static VALUE
|
|
60
60
|
int result_nd;
|
61
61
|
size_t pos;
|
62
62
|
|
63
|
-
result_nd =
|
64
|
-
return
|
63
|
+
result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
|
64
|
+
return cumo_na_aref_main(argc, argv, self, 0, result_nd, pos);
|
65
65
|
}
|
66
66
|
}
|
@@ -38,12 +38,12 @@ static VALUE
|
|
38
38
|
size_t pos;
|
39
39
|
char *ptr;
|
40
40
|
|
41
|
-
result_nd =
|
41
|
+
result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
|
42
42
|
if (result_nd) {
|
43
|
-
return
|
43
|
+
return cumo_na_aref_main(argc, argv, self, 0, result_nd, pos);
|
44
44
|
} else {
|
45
|
-
ptr =
|
46
|
-
|
45
|
+
ptr = cumo_na_get_pointer_for_read(self) + pos;
|
46
|
+
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
47
47
|
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
48
48
|
return m_extract(ptr);
|
49
49
|
}
|
@@ -48,8 +48,8 @@ static VALUE
|
|
48
48
|
if (argc==0) {
|
49
49
|
<%=c_func.sub(/_aset/,"_store")%>(self, argv[argc]);
|
50
50
|
} else {
|
51
|
-
nd =
|
52
|
-
a =
|
51
|
+
nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
|
52
|
+
a = cumo_na_aref_main(argc, argv, self, 0, nd, pos);
|
53
53
|
<%=c_func.sub(/_aset/,"_store")%>(a, argv[argc]);
|
54
54
|
}
|
55
55
|
return argv[argc];
|
@@ -12,11 +12,11 @@
|
|
12
12
|
//<% end %>
|
13
13
|
|
14
14
|
<% unless type_name == 'robject' %>
|
15
|
-
void <%="cumo_#{c_iter}_kernel_launch"%>(
|
15
|
+
void <%="cumo_#{c_iter}_kernel_launch"%>(cumo_na_iarray_t* a1, cumo_na_iarray_t* a2, cumo_na_iarray_t* a3, cumo_na_indexer_t* indexer);
|
16
16
|
<% end %>
|
17
17
|
|
18
18
|
static void
|
19
|
-
<%=c_iter%>(
|
19
|
+
<%=c_iter%>(cumo_na_loop_t *const lp)
|
20
20
|
{
|
21
21
|
<% if type_name == 'robject' %>
|
22
22
|
{
|
@@ -24,16 +24,16 @@ static void
|
|
24
24
|
char *p1, *p2, *p3;
|
25
25
|
ssize_t s1, s2, s3;
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
CUMO_INIT_COUNTER(lp, n);
|
28
|
+
CUMO_INIT_PTR(lp, 0, p1, s1);
|
29
|
+
CUMO_INIT_PTR(lp, 1, p2, s2);
|
30
|
+
CUMO_INIT_PTR(lp, 2, p3, s3);
|
31
31
|
|
32
|
-
|
32
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
33
33
|
//<% if need_align %>
|
34
|
-
if (
|
35
|
-
|
36
|
-
|
34
|
+
if (cumo_is_aligned(p1,sizeof(dtype)) &&
|
35
|
+
cumo_is_aligned(p2,sizeof(dtype)) &&
|
36
|
+
cumo_is_aligned(p3,sizeof(dtype)) ) {
|
37
37
|
|
38
38
|
if (s1 == sizeof(dtype) &&
|
39
39
|
s2 == sizeof(dtype) &&
|
@@ -52,9 +52,9 @@ static void
|
|
52
52
|
}
|
53
53
|
return;
|
54
54
|
}
|
55
|
-
if (
|
56
|
-
|
57
|
-
|
55
|
+
if (cumo_is_aligned_step(s1,sizeof(dtype)) &&
|
56
|
+
cumo_is_aligned_step(s2,sizeof(dtype)) &&
|
57
|
+
cumo_is_aligned_step(s3,sizeof(dtype)) ) {
|
58
58
|
//<% end %>
|
59
59
|
|
60
60
|
if (s2 == 0){ // Broadcasting from scalar value.
|
@@ -102,20 +102,20 @@ static void
|
|
102
102
|
}
|
103
103
|
for (i=0; i<n; i++) {
|
104
104
|
dtype x, y, z;
|
105
|
-
|
106
|
-
|
105
|
+
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
106
|
+
CUMO_GET_DATA_STRIDE(p2,s2,dtype,y);
|
107
107
|
check_intdivzero(y);
|
108
108
|
z = m_<%=name%>(x,y);
|
109
|
-
|
109
|
+
CUMO_SET_DATA_STRIDE(p3,s3,dtype,z);
|
110
110
|
}
|
111
111
|
//<% end %>
|
112
112
|
}
|
113
113
|
<% else %>
|
114
114
|
{
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
115
|
+
cumo_na_iarray_t a1 = cumo_na_make_iarray(&lp->args[0]);
|
116
|
+
cumo_na_iarray_t a2 = cumo_na_make_iarray(&lp->args[1]);
|
117
|
+
cumo_na_iarray_t a3 = cumo_na_make_iarray(&lp->args[2]);
|
118
|
+
cumo_na_indexer_t indexer = cumo_na_make_indexer(&lp->args[0]);
|
119
119
|
|
120
120
|
<%="cumo_#{c_iter}_kernel_launch"%>(&a1,&a2,&a3,&indexer);
|
121
121
|
}
|
@@ -126,15 +126,15 @@ static void
|
|
126
126
|
static VALUE
|
127
127
|
<%=c_func%>_self(VALUE self, VALUE other)
|
128
128
|
{
|
129
|
-
|
130
|
-
|
129
|
+
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
|
130
|
+
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
|
131
131
|
<% if type_name == 'robject' %>
|
132
|
-
|
132
|
+
cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_STRIDE_LOOP, 2, 1, ain, aout };
|
133
133
|
<% else %>
|
134
|
-
|
134
|
+
cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_STRIDE_LOOP|CUMO_NDF_INDEXER_LOOP, 2, 1, ain, aout };
|
135
135
|
<% end %>
|
136
136
|
|
137
|
-
return
|
137
|
+
return cumo_na_ndloop(&ndf, 2, self, other);
|
138
138
|
}
|
139
139
|
|
140
140
|
/*
|
@@ -151,12 +151,12 @@ static VALUE
|
|
151
151
|
<% else %>
|
152
152
|
VALUE klass, v;
|
153
153
|
|
154
|
-
klass =
|
154
|
+
klass = cumo_na_upcast(rb_obj_class(self),rb_obj_class(other));
|
155
155
|
if (klass==cT) {
|
156
156
|
return <%=c_func%>_self(self, other);
|
157
157
|
} else {
|
158
|
-
v = rb_funcall(klass,
|
159
|
-
return rb_funcall(v, <%=
|
158
|
+
v = rb_funcall(klass, cumo_id_cast, 1, self);
|
159
|
+
return rb_funcall(v, <%=cumo_id_op%>, 1, other);
|
160
160
|
}
|
161
161
|
<% end %>
|
162
162
|
}
|