cumo 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +23 -24
- data/bench/cumo_bench.rb +1 -0
- data/ext/cumo/cuda/memory_pool.cpp +9 -1
- data/ext/cumo/cuda/memory_pool_impl.cpp +2 -13
- data/ext/cumo/cumo.c +4 -4
- data/ext/cumo/depend.erb +1 -1
- data/ext/cumo/extconf.rb +2 -0
- data/ext/cumo/include/cumo.h +4 -4
- data/ext/cumo/include/cumo/indexer.h +50 -0
- data/ext/cumo/include/cumo/intern.h +1 -0
- data/ext/cumo/include/cumo/narray.h +20 -1
- data/ext/cumo/include/cumo/narray_kernel.h +10 -0
- data/ext/cumo/include/cumo/ndloop.h +1 -1
- data/ext/cumo/narray/array.c +8 -2
- data/ext/cumo/narray/gen/tmpl/store_array.c +15 -3
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +10 -2
- data/ext/cumo/narray/index.c +77 -43
- data/ext/cumo/narray/narray.c +11 -2
- data/ext/cumo/narray/ndloop.c +49 -1
- data/ext/cumo/narray/ndloop_kernel.cu +97 -0
- data/ext/cumo/narray/step.c +56 -250
- data/lib/cumo/narray/extra.rb +50 -1
- metadata +4 -4
data/ext/cumo/narray/array.c
CHANGED
@@ -117,10 +117,12 @@ static int cumo_na_mdai_object_type(int type, VALUE v)
|
|
117
117
|
if (rb_obj_is_kind_of(v, rb_cRange)) {
|
118
118
|
MDAI_ATTR_TYPE(type,v,begin);
|
119
119
|
MDAI_ATTR_TYPE(type,v,end);
|
120
|
-
|
120
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
121
|
+
} else if (rb_obj_is_kind_of(v, rb_cArithSeq)) {
|
121
122
|
MDAI_ATTR_TYPE(type,v,begin);
|
122
123
|
MDAI_ATTR_TYPE(type,v,end);
|
123
124
|
MDAI_ATTR_TYPE(type,v,step);
|
125
|
+
#endif
|
124
126
|
} else {
|
125
127
|
type = cumo_na_object_type(type,v);
|
126
128
|
}
|
@@ -205,7 +207,11 @@ cumo_na_mdai_investigate(cumo_na_mdai_t *mdai, int ndim)
|
|
205
207
|
}
|
206
208
|
}
|
207
209
|
else
|
208
|
-
|
210
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
211
|
+
if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cArithSeq)) {
|
212
|
+
#else
|
213
|
+
if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cEnumerator)) {
|
214
|
+
#endif
|
209
215
|
cumo_na_step_sequence(v,&length,&dbeg,&dstep);
|
210
216
|
len += length-1;
|
211
217
|
mdai->type = cumo_na_mdai_object_type(mdai->type, v);
|
@@ -65,7 +65,11 @@ static void
|
|
65
65
|
if (idx1) {
|
66
66
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
67
67
|
x = ptr[i1];
|
68
|
-
|
68
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
69
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
70
|
+
#else
|
71
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
72
|
+
#endif
|
69
73
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
70
74
|
for (c=0; c<len && i<n; c++,i++) {
|
71
75
|
y = beg + step * c;
|
@@ -81,7 +85,11 @@ static void
|
|
81
85
|
} else {
|
82
86
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
83
87
|
x = ptr[i1];
|
84
|
-
|
88
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
89
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
90
|
+
#else
|
91
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
92
|
+
#endif
|
85
93
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
86
94
|
for (c=0; c<len && i<n; c++,i++) {
|
87
95
|
y = beg + step * c;
|
@@ -110,7 +118,11 @@ static void
|
|
110
118
|
dtype* host_z = ALLOC_N(dtype, n);
|
111
119
|
for (i=i1=0; i1<n1 && i<n; i1++) {
|
112
120
|
x = ptr[i1];
|
113
|
-
|
121
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
122
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
123
|
+
#else
|
124
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
125
|
+
#endif
|
114
126
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
115
127
|
for (c=0; c<len && i<n; c++,i++) {
|
116
128
|
y = beg + step * c;
|
@@ -52,7 +52,11 @@ static void
|
|
52
52
|
if (idx1) {
|
53
53
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
54
54
|
x = ptr[i1];
|
55
|
-
|
55
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
56
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
57
|
+
#else
|
58
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
59
|
+
#endif
|
56
60
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
57
61
|
for (c=0; c<len && i<n; c++,i++) {
|
58
62
|
y = beg + step * c;
|
@@ -69,7 +73,11 @@ static void
|
|
69
73
|
} else {
|
70
74
|
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
|
71
75
|
x = ptr[i1];
|
72
|
-
|
76
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
77
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
|
78
|
+
#else
|
79
|
+
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
|
80
|
+
#endif
|
73
81
|
cumo_na_step_sequence(x,&len,&beg,&step);
|
74
82
|
for (c=0; c<len && i<n; c++,i++) {
|
75
83
|
y = beg + step * c;
|
data/ext/cumo/narray/index.c
CHANGED
@@ -12,23 +12,6 @@
|
|
12
12
|
#define cIndex cumo_cInt32
|
13
13
|
#endif
|
14
14
|
|
15
|
-
// from ruby/enumerator.c
|
16
|
-
struct enumerator {
|
17
|
-
VALUE obj;
|
18
|
-
ID meth;
|
19
|
-
VALUE args;
|
20
|
-
// use only above in this source
|
21
|
-
VALUE fib;
|
22
|
-
VALUE dst;
|
23
|
-
VALUE lookahead;
|
24
|
-
VALUE feedvalue;
|
25
|
-
VALUE stop_exc;
|
26
|
-
VALUE size;
|
27
|
-
// incompatible below depending on ruby version
|
28
|
-
//VALUE procs; // ruby 2.4
|
29
|
-
//rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
|
30
|
-
//VALUE (*size_fn)(ANYARGS); // ruby 2.0
|
31
|
-
};
|
32
15
|
|
33
16
|
// note: the memory refed by this pointer is not freed and causes memroy leak.
|
34
17
|
//
|
@@ -204,6 +187,42 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
|
|
204
187
|
ssize_t beg, end, beg_orig, end_orig;
|
205
188
|
const char *dot = "..", *edot = "...";
|
206
189
|
|
190
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
191
|
+
rb_arithmetic_sequence_components_t x;
|
192
|
+
rb_arithmetic_sequence_extract(range, &x);
|
193
|
+
step = NUM2SSIZET(x.step);
|
194
|
+
|
195
|
+
beg = beg_orig = NUM2SSIZET(x.begin);
|
196
|
+
if (beg < 0) {
|
197
|
+
beg += size;
|
198
|
+
}
|
199
|
+
if (T_NIL == TYPE(x.end)) { // endless range
|
200
|
+
end = size -1;
|
201
|
+
if (RTEST(x.exclude_end)) {
|
202
|
+
dot = edot;
|
203
|
+
}
|
204
|
+
} else {
|
205
|
+
end = end_orig = NUM2SSIZET(x.end);
|
206
|
+
if (end < 0) {
|
207
|
+
end += size;
|
208
|
+
}
|
209
|
+
if (RTEST(x.exclude_end)) {
|
210
|
+
end--;
|
211
|
+
dot = edot;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
if (beg < 0 || beg >= size || end < 0 || end >= size) {
|
215
|
+
if (T_NIL == TYPE(x.end)) { // endless range
|
216
|
+
rb_raise(rb_eRangeError,
|
217
|
+
"%"SZF"d%s is out of range for size=%"SZF"d",
|
218
|
+
beg_orig, dot, size);
|
219
|
+
} else {
|
220
|
+
rb_raise(rb_eRangeError,
|
221
|
+
"%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
|
222
|
+
beg_orig, dot, end_orig, size);
|
223
|
+
}
|
224
|
+
}
|
225
|
+
#else
|
207
226
|
beg = beg_orig = NUM2SSIZET(rb_funcall(range,cumo_id_beg,0));
|
208
227
|
if (beg < 0) {
|
209
228
|
beg += size;
|
@@ -222,44 +241,59 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
|
|
222
241
|
"%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
|
223
242
|
beg_orig, dot, end_orig, size);
|
224
243
|
}
|
244
|
+
#endif
|
225
245
|
n = (end-beg)/step+1;
|
226
246
|
if (n<0) n=0;
|
227
247
|
cumo_na_index_set_step(q,orig_dim,n,beg,step);
|
228
248
|
|
229
249
|
}
|
230
250
|
|
231
|
-
|
232
|
-
|
251
|
+
void
|
252
|
+
cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep)
|
233
253
|
{
|
234
254
|
int len;
|
235
|
-
|
236
|
-
|
255
|
+
VALUE step;
|
256
|
+
cumo_enumerator_t *e;
|
237
257
|
|
238
258
|
if (!RB_TYPE_P(enum_obj, T_DATA)) {
|
239
259
|
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
240
260
|
}
|
241
|
-
e = (
|
261
|
+
e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
|
262
|
+
|
263
|
+
if (!rb_obj_is_kind_of(e->obj, rb_cRange)) {
|
264
|
+
rb_raise(rb_eTypeError,"not Range object");
|
265
|
+
}
|
242
266
|
|
243
|
-
if (
|
244
|
-
|
245
|
-
|
267
|
+
if (e->meth == cumo_id_each) {
|
268
|
+
step = INT2NUM(1);
|
269
|
+
}
|
270
|
+
else if (e->meth == cumo_id_step) {
|
271
|
+
if (TYPE(e->args) != T_ARRAY) {
|
272
|
+
rb_raise(rb_eArgError,"no argument for step");
|
246
273
|
}
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
}
|
251
|
-
len = RARRAY_LEN(e->args);
|
252
|
-
if (len != 1) {
|
253
|
-
rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
|
254
|
-
}
|
255
|
-
step = NUM2SSIZET(RARRAY_AREF(e->args,0));
|
256
|
-
cumo_na_parse_range(e->obj, step, orig_dim, size, q);
|
257
|
-
} else {
|
258
|
-
rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
|
274
|
+
len = RARRAY_LEN(e->args);
|
275
|
+
if (len != 1) {
|
276
|
+
rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
|
259
277
|
}
|
278
|
+
step = RARRAY_AREF(e->args,0);
|
260
279
|
} else {
|
261
|
-
rb_raise(rb_eTypeError,"
|
280
|
+
rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
|
262
281
|
}
|
282
|
+
if (pstep) *pstep = step;
|
283
|
+
}
|
284
|
+
|
285
|
+
static void
|
286
|
+
cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
|
287
|
+
{
|
288
|
+
VALUE step;
|
289
|
+
cumo_enumerator_t *e;
|
290
|
+
|
291
|
+
if (!RB_TYPE_P(enum_obj, T_DATA)) {
|
292
|
+
rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
|
293
|
+
}
|
294
|
+
cumo_na_parse_enumerator_step(enum_obj, &step);
|
295
|
+
e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
|
296
|
+
cumo_na_parse_range(e->obj, NUM2SSIZET(step), orig_dim, size, q); // e->obj : Range Object
|
263
297
|
}
|
264
298
|
|
265
299
|
// Analyze *a* which is *i*-th index object and store the information to q
|
@@ -316,14 +350,14 @@ cumo_na_index_parse_each(volatile VALUE a, ssize_t size, int i, cumo_na_index_ar
|
|
316
350
|
if (rb_obj_is_kind_of(a, rb_cRange)) {
|
317
351
|
cumo_na_parse_range(a, 1, i, size, q);
|
318
352
|
}
|
353
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
354
|
+
else if (rb_obj_is_kind_of(a, rb_cArithSeq)) {
|
355
|
+
cumo_na_parse_range(a, 1, i, size, q);
|
356
|
+
}
|
357
|
+
#endif
|
319
358
|
else if (rb_obj_is_kind_of(a, rb_cEnumerator)) {
|
320
359
|
cumo_na_parse_enumerator(a, i, size, q);
|
321
360
|
}
|
322
|
-
else if (rb_obj_is_kind_of(a, cumo_na_cStep)) {
|
323
|
-
ssize_t beg, step, n;
|
324
|
-
cumo_na_step_array_index(a, size, (size_t*)(&n), &beg, &step);
|
325
|
-
cumo_na_index_set_step(q,i,n,beg,step);
|
326
|
-
}
|
327
361
|
// NArray index
|
328
362
|
else if (CUMO_NA_CumoIsNArray(a)) {
|
329
363
|
cumo_na_parse_narray_index(a, i, size, q);
|
data/ext/cumo/narray/narray.c
CHANGED
@@ -40,10 +40,12 @@ VALUE cumo_sym_option;
|
|
40
40
|
VALUE cumo_sym_loop_opt;
|
41
41
|
VALUE cumo_sym_init;
|
42
42
|
|
43
|
-
VALUE cumo_na_cStep;
|
44
43
|
#ifndef HAVE_RB_CCOMPLEX
|
45
44
|
VALUE rb_cComplex;
|
46
45
|
#endif
|
46
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
47
|
+
VALUE rb_cArithSeq;
|
48
|
+
#endif
|
47
49
|
|
48
50
|
int cumo_na_inspect_rows_=20;
|
49
51
|
int cumo_na_inspect_cols_=80;
|
@@ -1512,7 +1514,11 @@ cumo_na_get_reduce_flag_from_axes(VALUE cumo_na_obj, VALUE axes)
|
|
1512
1514
|
step = 0;
|
1513
1515
|
//printf("beg=%d step=%d len=%d\n",beg,step,len);
|
1514
1516
|
} else if (rb_obj_is_kind_of(v,rb_cRange) ||
|
1515
|
-
|
1517
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
1518
|
+
rb_obj_is_kind_of(v,rb_cArithSeq)) {
|
1519
|
+
#else
|
1520
|
+
rb_obj_is_kind_of(v,rb_cEnumerator)) {
|
1521
|
+
#endif
|
1516
1522
|
cumo_na_step_array_index( v, ndim, &len, &beg, &step );
|
1517
1523
|
} else {
|
1518
1524
|
rb_raise(cumo_na_eDimensionError, "invalid dimension argument %s",
|
@@ -1849,6 +1855,9 @@ Init_cumo_narray()
|
|
1849
1855
|
rb_require("complex");
|
1850
1856
|
rb_cComplex = rb_const_get(rb_cObject, rb_intern("Complex"));
|
1851
1857
|
#endif
|
1858
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
1859
|
+
rb_cArithSeq = rb_path2class("Enumerator::ArithmeticSequence");
|
1860
|
+
#endif
|
1852
1861
|
|
1853
1862
|
rb_define_const(cNArray, "VERSION", rb_str_new2(CUMO_VERSION));
|
1854
1863
|
|
data/ext/cumo/narray/ndloop.c
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#include <ruby.h>
|
2
2
|
#include "cumo.h"
|
3
|
+
#include "cumo/indexer.h"
|
3
4
|
#include "cumo/narray.h"
|
4
5
|
#include "cumo/cuda/memory_pool.h"
|
5
6
|
#include "cumo/cuda/runtime.h"
|
@@ -1164,11 +1165,48 @@ cumo_ndfunc_set_bufcp(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
|
1164
1165
|
}
|
1165
1166
|
}
|
1166
1167
|
|
1168
|
+
static cumo_na_iarray_stridx_t
|
1169
|
+
cumo_na_make_iarray_buffer_copy(cumo_na_buffer_copy_t* lp)
|
1170
|
+
{
|
1171
|
+
cumo_na_iarray_stridx_t iarray;
|
1172
|
+
int i;
|
1173
|
+
int ndim = lp->ndim;
|
1174
|
+
iarray.ptr = lp->src_ptr + lp->src_iter[0].pos;
|
1175
|
+
for (i = 0; i < ndim; ++i) {
|
1176
|
+
if (LITER_SRC(lp,i).idx) {
|
1177
|
+
CUMO_SDX_SET_INDEX(iarray.stridx[i], LITER_SRC(lp,i).idx);
|
1178
|
+
} else {
|
1179
|
+
CUMO_SDX_SET_STRIDE(iarray.stridx[i], LITER_SRC(lp,i).step);
|
1180
|
+
}
|
1181
|
+
}
|
1182
|
+
return iarray;
|
1183
|
+
}
|
1184
|
+
|
1185
|
+
static cumo_na_indexer_t
|
1186
|
+
cumo_na_make_indexer_buffer_copy(cumo_na_buffer_copy_t* lp)
|
1187
|
+
{
|
1188
|
+
cumo_na_indexer_t indexer;
|
1189
|
+
int i;
|
1190
|
+
indexer.ndim = lp->ndim;
|
1191
|
+
indexer.total_size = 1;
|
1192
|
+
for (i = 0; i< lp->ndim; ++i) {
|
1193
|
+
indexer.shape[i] = lp->n[i];
|
1194
|
+
indexer.total_size *= lp->n[i];
|
1195
|
+
}
|
1196
|
+
return indexer;
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
|
1167
1200
|
|
1168
1201
|
// Make contiguous memory for ops not supporting index or stride (step) loop
|
1169
1202
|
static void
|
1170
1203
|
ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
|
1171
1204
|
{
|
1205
|
+
cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
|
1206
|
+
cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
|
1207
|
+
cumo_ndloop_copy_to_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
|
1208
|
+
|
1209
|
+
#if 0
|
1172
1210
|
size_t *c;
|
1173
1211
|
char *src, *buf;
|
1174
1212
|
int i;
|
@@ -1230,11 +1268,19 @@ ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
|
|
1230
1268
|
loop_end:
|
1231
1269
|
;
|
1232
1270
|
DBG(printf("]\n"));
|
1271
|
+
#endif
|
1233
1272
|
}
|
1234
1273
|
|
1274
|
+
void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
|
1275
|
+
|
1235
1276
|
static void
|
1236
1277
|
ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
|
1237
1278
|
{
|
1279
|
+
cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
|
1280
|
+
cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
|
1281
|
+
cumo_ndloop_copy_from_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
|
1282
|
+
|
1283
|
+
#if 0
|
1238
1284
|
size_t *c;
|
1239
1285
|
char *src, *buf;
|
1240
1286
|
int i;
|
@@ -1291,12 +1337,14 @@ ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
|
|
1291
1337
|
for (;;) {
|
1292
1338
|
if (i<=0) goto loop_end;
|
1293
1339
|
i--;
|
1294
|
-
|
1340
|
+
++c[i];
|
1341
|
+
if (c[i] < lp->n[i]) break;
|
1295
1342
|
c[i] = 0;
|
1296
1343
|
}
|
1297
1344
|
}
|
1298
1345
|
loop_end:
|
1299
1346
|
DBG(printf("]\n"));
|
1347
|
+
#endif
|
1300
1348
|
}
|
1301
1349
|
|
1302
1350
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#include "cumo/narray_kernel.h"
|
2
|
+
#include "cumo/indexer.h"
|
3
|
+
|
4
|
+
#if defined(__cplusplus)
|
5
|
+
extern "C" {
|
6
|
+
#if 0
|
7
|
+
} /* satisfy cc-mode */
|
8
|
+
#endif
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#define CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(NDIM) \
|
12
|
+
__global__ void cumo_ndloop_copy_from_buffer_kernel_dim##NDIM( \
|
13
|
+
cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
|
14
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
|
15
|
+
cumo_na_indexer_set_dim##NDIM(&indexer, i); \
|
16
|
+
char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
|
17
|
+
memcpy(p, buf + i * elmsz, elmsz); \
|
18
|
+
} \
|
19
|
+
}
|
20
|
+
|
21
|
+
#define CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(NDIM) \
|
22
|
+
__global__ void cumo_ndloop_copy_to_buffer_kernel_dim##NDIM( \
|
23
|
+
cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
|
24
|
+
for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
|
25
|
+
cumo_na_indexer_set_dim##NDIM(&indexer, i); \
|
26
|
+
char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
|
27
|
+
memcpy(buf + i * elmsz, p, elmsz); \
|
28
|
+
} \
|
29
|
+
}
|
30
|
+
|
31
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(1)
|
32
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(2)
|
33
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(3)
|
34
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(4)
|
35
|
+
CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL()
|
36
|
+
|
37
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(1)
|
38
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(2)
|
39
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(3)
|
40
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(4)
|
41
|
+
CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL()
|
42
|
+
|
43
|
+
#undef CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL
|
44
|
+
#undef CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL
|
45
|
+
|
46
|
+
void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
|
47
|
+
{
|
48
|
+
size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
|
49
|
+
size_t block_dim = cumo_get_block_dim(indexer->total_size);
|
50
|
+
switch (indexer->ndim) {
|
51
|
+
case 1:
|
52
|
+
cumo_ndloop_copy_from_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
53
|
+
break;
|
54
|
+
case 2:
|
55
|
+
cumo_ndloop_copy_from_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
56
|
+
break;
|
57
|
+
case 3:
|
58
|
+
cumo_ndloop_copy_from_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
59
|
+
break;
|
60
|
+
case 4:
|
61
|
+
cumo_ndloop_copy_from_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
62
|
+
break;
|
63
|
+
default:
|
64
|
+
cumo_ndloop_copy_from_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
65
|
+
break;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
|
70
|
+
{
|
71
|
+
size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
|
72
|
+
size_t block_dim = cumo_get_block_dim(indexer->total_size);
|
73
|
+
switch (indexer->ndim) {
|
74
|
+
case 1:
|
75
|
+
cumo_ndloop_copy_to_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
76
|
+
break;
|
77
|
+
case 2:
|
78
|
+
cumo_ndloop_copy_to_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
79
|
+
break;
|
80
|
+
case 3:
|
81
|
+
cumo_ndloop_copy_to_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
82
|
+
break;
|
83
|
+
case 4:
|
84
|
+
cumo_ndloop_copy_to_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
85
|
+
break;
|
86
|
+
default:
|
87
|
+
cumo_ndloop_copy_to_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
|
88
|
+
break;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
#if defined(__cplusplus)
|
93
|
+
#if 0
|
94
|
+
{ /* satisfy cc-mode */
|
95
|
+
#endif
|
96
|
+
} /* extern "C" { */
|
97
|
+
#endif
|