cumo 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +23 -24
- data/bench/cumo_bench.rb +1 -0
- data/ext/cumo/cuda/memory_pool.cpp +9 -1
- data/ext/cumo/cuda/memory_pool_impl.cpp +2 -13
- data/ext/cumo/cumo.c +4 -4
- data/ext/cumo/depend.erb +1 -1
- data/ext/cumo/extconf.rb +2 -0
- data/ext/cumo/include/cumo.h +4 -4
- data/ext/cumo/include/cumo/indexer.h +50 -0
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +20 -1
- data/ext/cumo/include/cumo/narray_kernel.h +10 -0
- data/ext/cumo/include/cumo/ndloop.h +1 -1
- data/ext/cumo/narray/array.c +8 -2
- data/ext/cumo/narray/gen/tmpl/store_array.c +15 -3
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +10 -2
- data/ext/cumo/narray/index.c +77 -43
- data/ext/cumo/narray/narray.c +11 -2
- data/ext/cumo/narray/ndloop.c +49 -1
- data/ext/cumo/narray/ndloop_kernel.cu +97 -0
- data/ext/cumo/narray/step.c +56 -250
- data/lib/cumo/narray/extra.rb +50 -1
- metadata +4 -4
data/ext/cumo/narray/array.c
CHANGED
@@ -117,10 +117,12 @@ static int cumo_na_mdai_object_type(int type, VALUE v)
|
|
117
117
|
if (rb_obj_is_kind_of(v, rb_cRange)) {
|
118
118
|
MDAI_ATTR_TYPE(type,v,begin);
|
119
119
|
MDAI_ATTR_TYPE(type,v,end);
|
120
|
-
|
120
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
121
|
+
} else if (rb_obj_is_kind_of(v, rb_cArithSeq)) {
|
121
122
|
MDAI_ATTR_TYPE(type,v,begin);
|
122
123
|
MDAI_ATTR_TYPE(type,v,end);
|
123
124
|
MDAI_ATTR_TYPE(type,v,step);
|
125
|
+
#endif
|
124
126
|
} else {
|
125
127
|
type = cumo_na_object_type(type,v);
|
126
128
|
}
|
@@ -205,7 +207,11 @@ cumo_na_mdai_investigate(cumo_na_mdai_t *mdai, int ndim)
|
|
205
207
|
}
|
206
208
|
}
|
207
209
|
else
|
208
|
-
|
210
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
211
|
+
if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cArithSeq)) {
|
212
|
+
#else
|
213
|
+
if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cEnumerator)) {
|
214
|
+
#endif
|
209
215
|
cumo_na_step_sequence(v,&length,&dbeg,&dstep);
|
210
216
|
len += length-1;
|
211
217
|
mdai->type = cumo_na_mdai_object_type(mdai->type, v);
|
@@ -65,7 +65,11 @@ static void
|
|
65
65
|
if (idx1) {
|
66
66
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
67
67
|
x = ptr[i1];
|
68
|
-
|
68
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
69
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
70
|
+
#else
|
71
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
72
|
+
#endif
|
69
73
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
70
74
|
for (c=0; c<len && i<n; c++,i++) {
|
71
75
|
y = beg + step * c;
|
@@ -81,7 +85,11 @@ static void
|
|
81
85
|
} else {
|
82
86
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
83
87
|
x = ptr[i1];
|
84
|
-
|
88
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
89
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
90
|
+
#else
|
91
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
92
|
+
#endif
|
85
93
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
86
94
|
for (c=0; c<len && i<n; c++,i++) {
|
87
95
|
y = beg + step * c;
|
@@ -110,7 +118,11 @@ static void
|
|
110
118
|
dtype* host_z = ALLOC_N(dtype, n);
|
111
119
|
for (i=i1=0; i1<n1 && i<n; i1++) {
|
112
120
|
x = ptr[i1];
|
113
|
-
|
121
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
122
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
123
|
+
#else
|
124
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
125
|
+
#endif
|
114
126
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
115
127
|
for (c=0; c<len && i<n; c++,i++) {
|
116
128
|
y = beg + step * c;
|
@@ -52,7 +52,11 @@ static void
|
|
52
52
|
if (idx1) {
|
53
53
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
54
54
|
x = ptr[i1];
|
55
|
-
|
55
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
56
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
57
|
+
#else
|
58
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
59
|
+
#endif
|
56
60
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
57
61
|
for (c=0; c<len && i<n; c++,i++) {
|
58
62
|
y = beg + step * c;
|
@@ -69,7 +73,11 @@ static void
|
|
69
73
|
} else {
|
70
74
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
71
75
|
x = ptr[i1];
|
72
|
-
|
76
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
77
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
78
|
+
#else
|
79
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
80
|
+
#endif
|
73
81
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
74
82
|
for (c=0; c<len && i<n; c++,i++) {
|
75
83
|
y = beg + step * c;
|
data/ext/cumo/narray/index.c
CHANGED
@@ -12,23 +12,6 @@
|
|
12
12
|
#define cIndex cumo_cInt32
|
13
13
|
#endif
|
14
14
|
|
15
|
-
// from ruby/enumerator.c
|
16
|
-
struct enumerator {
|
17
|
-
VALUE obj;
|
18
|
-
ID meth;
|
19
|
-
VALUE args;
|
20
|
-
// use only above in this source
|
21
|
-
VALUE fib;
|
22
|
-
VALUE dst;
|
23
|
-
VALUE lookahead;
|
24
|
-
VALUE feedvalue;
|
25
|
-
VALUE stop_exc;
|
26
|
-
VALUE size;
|
27
|
-
// incompatible below depending on ruby version
|
28
|
-
//VALUE procs; // ruby 2.4
|
29
|
-
//rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
|
30
|
-
//VALUE (*size_fn)(ANYARGS); // ruby 2.0
|
31
|
-
};
|
32
15
|
|
33
16
|
// note: the memory refed by this pointer is not freed and causes memroy leak.
|
34
17
|
//
|
@@ -204,6 +187,42 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
|
|
204
187
|
ssize_t beg, end, beg_orig, end_orig;
|
205
188
|
const char *dot = "..", *edot = "...";
|
206
189
|
|
190
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
191
|
+
rb_arithmetic_sequence_components_t x;
|
192
|
+
rb_arithmetic_sequence_extract(range, &x);
|
193
|
+
step = NUM2SSIZET(x.step);
|
194
|
+
|
195
|
+
beg = beg_orig = NUM2SSIZET(x.begin);
|
196
|
+
if (beg < 0) {
|
197
|
+
beg += size;
|
198
|
+
}
|
199
|
+
if (T_NIL == TYPE(x.end)) { // endless range
|
200
|
+
end = size -1;
|
201
|
+
if (RTEST(x.exclude_end)) {
|
202
|
+
dot = edot;
|
203
|
+
}
|
204
|
+
} else {
|
205
|
+
end = end_orig = NUM2SSIZET(x.end);
|
206
|
+
if (end < 0) {
|
207
|
+
end += size;
|
208
|
+
}
|
209
|
+
if (RTEST(x.exclude_end)) {
|
210
|
+
end--;
|
211
|
+
dot = edot;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
if (beg < 0 || beg >= size || end < 0 || end >= size) {
|
215
|
+
if (T_NIL == TYPE(x.end)) { // endless range
|
216
|
+
rb_raise(rb_eRangeError,
|
217
|
+
"%"SZF"d%s is out of range for size=%"SZF"d",
|
218
|
+
beg_orig, dot, size);
|
219
|
+
} else {
|
220
|
+
rb_raise(rb_eRangeError,
|
221
|
+
"%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
|
222
|
+
beg_orig, dot, end_orig, size);
|
223
|
+
}
|
224
|
+
}
|
225
|
+
#else
|
207
226
|
beg = beg_orig = NUM2SSIZET(rb_funcall(range,cumo_id_beg,0));
|
208
227
|
if (beg < 0) {
|
209
228
|
beg += size;
|
@@ -222,44 +241,59 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
|
|
222
241
|
"%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
|
223
242
|
beg_orig, dot, end_orig, size);
|
224
243
|
}
|
244
|
+
#endif
|
225
245
|
n = (end-beg)/step+1;
|
226
246
|
if (n<0) n=0;
|
227
247
|
cumo_na_index_set_step(q,orig_dim,n,beg,step);
|
228
248
|
|
229
249
|
}
|
230
250
|
|
231
|
-
|
232
|
-
|
251
|
+
void
|
252
|
+
cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep)
|
233
253
|
{
|
234
254
|
int len;
|
235
|
-
|
236
|
-
|
255
|
+
VALUE step;
|
256
|
+
cumo_enumerator_t *e;
|
237
257
|
|
238
258
|
if (!RB_TYPE_P(enum_obj, T_DATA)) {
|
239
259
|
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
240
260
|
}
|
241
|
-
e = (
|
261
|
+
e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
|
262
|
+
|
263
|
+
if (!rb_obj_is_kind_of(e->obj, rb_cRange)) {
|
264
|
+
rb_raise(rb_eTypeError,"not Range object");
|
265
|
+
}
|
242
266
|
|
243
|
-
if (
|
244
|
-
|
245
|
-
|
267
|
+
if (e->meth == cumo_id_each) {
|
268
|
+
step = INT2NUM(1);
|
269
|
+
}
|
270
|
+
else if (e->meth == cumo_id_step) {
|
271
|
+
if (TYPE(e->args) != T_ARRAY) {
|
272
|
+
rb_raise(rb_eArgError,"no argument for step");
|
246
273
|
}
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
}
|
251
|
-
len = RARRAY_LEN(e->args);
|
252
|
-
if (len != 1) {
|
253
|
-
rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
|
254
|
-
}
|
255
|
-
step = NUM2SSIZET(RARRAY_AREF(e->args,0));
|
256
|
-
cumo_na_parse_range(e->obj, step, orig_dim, size, q);
|
257
|
-
} else {
|
258
|
-
rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
|
274
|
+
len = RARRAY_LEN(e->args);
|
275
|
+
if (len != 1) {
|
276
|
+
rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
|
259
277
|
}
|
278
|
+
step = RARRAY_AREF(e->args,0);
|
260
279
|
} else {
|
261
|
-
rb_raise(rb_eTypeError,"
|
280
|
+
rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
|
262
281
|
}
|
282
|
+
if (pstep) *pstep = step;
|
283
|
+
}
|
284
|
+
|
285
|
+
static void
|
286
|
+
cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
|
287
|
+
{
|
288
|
+
VALUE step;
|
289
|
+
cumo_enumerator_t *e;
|
290
|
+
|
291
|
+
if (!RB_TYPE_P(enum_obj, T_DATA)) {
|
292
|
+
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
293
|
+
}
|
294
|
+
cumo_na_parse_enumerator_step(enum_obj, &step);
|
295
|
+
e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
|
296
|
+
cumo_na_parse_range(e->obj, NUM2SSIZET(step), orig_dim, size, q); // e->obj : Range Object
|
263
297
|
}
|
264
298
|
|
265
299
|
// Analyze *a* which is *i*-th index object and store the information to q
|
@@ -316,14 +350,14 @@ cumo_na_index_parse_each(volatile VALUE a, ssize_t size, int i, cumo_na_index_ar
|
|
316
350
|
if (rb_obj_is_kind_of(a, rb_cRange)) {
|
317
351
|
cumo_na_parse_range(a, 1, i, size, q);
|
318
352
|
}
|
353
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
354
|
+
else if (rb_obj_is_kind_of(a, rb_cArithSeq)) {
|
355
|
+
cumo_na_parse_range(a, 1, i, size, q);
|
356
|
+
}
|
357
|
+
#endif
|
319
358
|
else if (rb_obj_is_kind_of(a, rb_cEnumerator)) {
|
320
359
|
cumo_na_parse_enumerator(a, i, size, q);
|
321
360
|
}
|
322
|
-
else if (rb_obj_is_kind_of(a, cumo_na_cStep)) {
|
323
|
-
ssize_t beg, step, n;
|
324
|
-
cumo_na_step_array_index(a, size, (size_t*)(&n), &beg, &step);
|
325
|
-
cumo_na_index_set_step(q,i,n,beg,step);
|
326
|
-
}
|
327
361
|
// NArray index
|
328
362
|
else if (CUMO_NA_CumoIsNArray(a)) {
|
329
363
|
cumo_na_parse_narray_index(a, i, size, q);
|
data/ext/cumo/narray/narray.c
CHANGED
@@ -40,10 +40,12 @@ VALUE cumo_sym_option;
|
|
40
40
|
VALUE cumo_sym_loop_opt;
|
41
41
|
VALUE cumo_sym_init;
|
42
42
|
|
43
|
-
VALUE cumo_na_cStep;
|
44
43
|
#ifndef HAVE_RB_CCOMPLEX
|
45
44
|
VALUE rb_cComplex;
|
46
45
|
#endif
|
46
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
47
|
+
VALUE rb_cArithSeq;
|
48
|
+
#endif
|
47
49
|
|
48
50
|
int cumo_na_inspect_rows_=20;
|
49
51
|
int cumo_na_inspect_cols_=80;
|
@@ -1512,7 +1514,11 @@ cumo_na_get_reduce_flag_from_axes(VALUE cumo_na_obj, VALUE axes)
|
|
1512
1514
|
step = 0;
|
1513
1515
|
//printf("beg=%d step=%d len=%d\n",beg,step,len);
|
1514
1516
|
} else if (rb_obj_is_kind_of(v,rb_cRange) ||
|
1515
|
-
|
1517
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
1518
|
+
rb_obj_is_kind_of(v,rb_cArithSeq)) {
|
1519
|
+
#else
|
1520
|
+
rb_obj_is_kind_of(v,rb_cEnumerator)) {
|
1521
|
+
#endif
|
1516
1522
|
cumo_na_step_array_index( v, ndim, &len, &beg, &step );
|
1517
1523
|
} else {
|
1518
1524
|
rb_raise(cumo_na_eDimensionError, "invalid dimension argument %s",
|
@@ -1849,6 +1855,9 @@ Init_cumo_narray()
|
|
1849
1855
|
rb_require("complex");
|
1850
1856
|
rb_cComplex = rb_const_get(rb_cObject, rb_intern("Complex"));
|
1851
1857
|
#endif
|
1858
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
1859
|
+
rb_cArithSeq = rb_path2class("Enumerator::ArithmeticSequence");
|
1860
|
+
#endif
|
1852
1861
|
|
1853
1862
|
rb_define_const(cNArray, "VERSION", rb_str_new2(CUMO_VERSION));
|
1854
1863
|
|
data/ext/cumo/narray/ndloop.c
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#include <ruby.h>
|
2
2
|
#include "cumo.h"
|
3
|
+
#include "cumo/indexer.h"
|
3
4
|
#include "cumo/narray.h"
|
4
5
|
#include "cumo/cuda/memory_pool.h"
|
5
6
|
#include "cumo/cuda/runtime.h"
|
@@ -1164,11 +1165,48 @@ cumo_ndfunc_set_bufcp(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
|
1164
1165
|
}
|
1165
1166
|
}
|
1166
1167
|
|
1168
|
+
static cumo_na_iarray_stridx_t
|
1169
|
+
cumo_na_make_iarray_buffer_copy(cumo_na_buffer_copy_t* lp)
|
1170
|
+
{
|
1171
|
+
cumo_na_iarray_stridx_t iarray;
|
1172
|
+
int i;
|
1173
|
+
int ndim = lp->ndim;
|
1174
|
+
iarray.ptr = lp->src_ptr + lp->src_iter[0].pos;
|
1175
|
+
for (i = 0; i < ndim; ++i) {
|
1176
|
+
if (LITER_SRC(lp,i).idx) {
|
1177
|
+
CUMO_SDX_SET_INDEX(iarray.stridx[i], LITER_SRC(lp,i).idx);
|
1178
|
+
} else {
|
1179
|
+
CUMO_SDX_SET_STRIDE(iarray.stridx[i], LITER_SRC(lp,i).step);
|
1180
|
+
}
|
1181
|
+
}
|
1182
|
+
return iarray;
|
1183
|
+
}
|
1184
|
+
|
1185
|
+
static cumo_na_indexer_t
|
1186
|
+
cumo_na_make_indexer_buffer_copy(cumo_na_buffer_copy_t* lp)
|
1187
|
+
{
|
1188
|
+
cumo_na_indexer_t indexer;
|
1189
|
+
int i;
|
1190
|
+
indexer.ndim = lp->ndim;
|
1191
|
+
indexer.total_size = 1;
|
1192
|
+
for (i = 0; i< lp->ndim; ++i) {
|
1193
|
+
indexer.shape[i] = lp->n[i];
|
1194
|
+
indexer.total_size *= lp->n[i];
|
1195
|
+
}
|
1196
|
+
return indexer;
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
|
1167
1200
|
|
1168
1201
|
// Make contiguous memory for ops not supporting index or stride (step) loop
|
1169
1202
|
static void
|
1170
1203
|
ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
|
1171
1204
|
{
|
1205
|
+
cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
|
1206
|
+
cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
|
1207
|
+
cumo_ndloop_copy_to_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
|
1208
|
+
|
1209
|
+
#if 0
|
1172
1210
|
size_t *c;
|
1173
1211
|
char *src, *buf;
|
1174
1212
|
int i;
|
@@ -1230,11 +1268,19 @@ ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
|
|
1230
1268
|
loop_end:
|
1231
1269
|
;
|
1232
1270
|
DBG(printf("]\n"));
|
1271
|
+
#endif
|
1233
1272
|
}
|
1234
1273
|
|
1274
|
+
void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
|
1275
|
+
|
1235
1276
|
static void
|
1236
1277
|
ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
|
1237
1278
|
{
|
1279
|
+
cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
|
1280
|
+
cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
|
1281
|
+
cumo_ndloop_copy_from_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
|
1282
|
+
|
1283
|
+
#if 0
|
1238
1284
|
size_t *c;
|
1239
1285
|
char *src, *buf;
|
1240
1286
|
int i;
|
@@ -1291,12 +1337,14 @@ ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
|
|
1291
1337
|
for (;;) {
|
1292
1338
|
if (i<=0) goto loop_end;
|
1293
1339
|
i--;
|
1294
|
-
|
1340
|
+
++c[i];
|
1341
|
+
if (c[i] < lp->n[i]) break;
|
1295
1342
|
c[i] = 0;
|
1296
1343
|
}
|
1297
1344
|
}
|
1298
1345
|
loop_end:
|
1299
1346
|
DBG(printf("]\n"));
|
1347
|
+
#endif
|
1300
1348
|
}
|
1301
1349
|
|
1302
1350
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#include "cumo/narray_kernel.h"
|
2
|
+
#include "cumo/indexer.h"
|
3
|
+
|
4
|
+
#if defined(__cplusplus)
|
5
|
+
extern "C" {
|
6
|
+
#if 0
|
7
|
+
} /* satisfy cc-mode */
|
8
|
+
#endif
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#define CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(NDIM) \
|
12
|
+
__global__ void cumo_ndloop_copy_from_buffer_kernel_dim##NDIM( \
|
13
|
+
cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
|
14
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
|
15
|
+
cumo_na_indexer_set_dim##NDIM(&indexer, i); \
|
16
|
+
char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
|
17
|
+
memcpy(p, buf + i * elmsz, elmsz); \
|
18
|
+
} \
|
19
|
+
}
|
20
|
+
|
21
|
+
#define CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(NDIM) \
|
22
|
+
__global__ void cumo_ndloop_copy_to_buffer_kernel_dim##NDIM( \
|
23
|
+
cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
|
24
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
|
25
|
+
cumo_na_indexer_set_dim##NDIM(&indexer, i); \
|
26
|
+
char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
|
27
|
+
memcpy(buf + i * elmsz, p, elmsz); \
|
28
|
+
} \
|
29
|
+
}
|
30
|
+
|
31
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(1)
|
32
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(2)
|
33
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(3)
|
34
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(4)
|
35
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL()
|
36
|
+
|
37
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(1)
|
38
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(2)
|
39
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(3)
|
40
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(4)
|
41
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL()
|
42
|
+
|
43
|
+
#undef CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL
|
44
|
+
#undef CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL
|
45
|
+
|
46
|
+
void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
|
47
|
+
{
|
48
|
+
size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
|
49
|
+
size_t block_dim = cumo_get_block_dim(indexer->total_size);
|
50
|
+
switch (indexer->ndim) {
|
51
|
+
case 1:
|
52
|
+
cumo_ndloop_copy_from_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
53
|
+
break;
|
54
|
+
case 2:
|
55
|
+
cumo_ndloop_copy_from_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
56
|
+
break;
|
57
|
+
case 3:
|
58
|
+
cumo_ndloop_copy_from_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
59
|
+
break;
|
60
|
+
case 4:
|
61
|
+
cumo_ndloop_copy_from_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
62
|
+
break;
|
63
|
+
default:
|
64
|
+
cumo_ndloop_copy_from_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
65
|
+
break;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
|
70
|
+
{
|
71
|
+
size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
|
72
|
+
size_t block_dim = cumo_get_block_dim(indexer->total_size);
|
73
|
+
switch (indexer->ndim) {
|
74
|
+
case 1:
|
75
|
+
cumo_ndloop_copy_to_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
76
|
+
break;
|
77
|
+
case 2:
|
78
|
+
cumo_ndloop_copy_to_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
79
|
+
break;
|
80
|
+
case 3:
|
81
|
+
cumo_ndloop_copy_to_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
82
|
+
break;
|
83
|
+
case 4:
|
84
|
+
cumo_ndloop_copy_to_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
85
|
+
break;
|
86
|
+
default:
|
87
|
+
cumo_ndloop_copy_to_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
88
|
+
break;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
#if defined(__cplusplus)
|
93
|
+
#if 0
|
94
|
+
{ /* satisfy cc-mode */
|
95
|
+
#endif
|
96
|
+
} /* extern "C" { */
|
97
|
+
#endif
|