cumo 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -117,10 +117,12 @@ static int cumo_na_mdai_object_type(int type, VALUE v)
117
117
  if (rb_obj_is_kind_of(v, rb_cRange)) {
118
118
  MDAI_ATTR_TYPE(type,v,begin);
119
119
  MDAI_ATTR_TYPE(type,v,end);
120
- } else if (rb_obj_is_kind_of(v, cumo_na_cStep)) {
120
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
121
+ } else if (rb_obj_is_kind_of(v, rb_cArithSeq)) {
121
122
  MDAI_ATTR_TYPE(type,v,begin);
122
123
  MDAI_ATTR_TYPE(type,v,end);
123
124
  MDAI_ATTR_TYPE(type,v,step);
125
+ #endif
124
126
  } else {
125
127
  type = cumo_na_object_type(type,v);
126
128
  }
@@ -205,7 +207,11 @@ cumo_na_mdai_investigate(cumo_na_mdai_t *mdai, int ndim)
205
207
  }
206
208
  }
207
209
  else
208
- if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, cumo_na_cStep)) {
210
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
211
+ if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cArithSeq)) {
212
+ #else
213
+ if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cEnumerator)) {
214
+ #endif
209
215
  cumo_na_step_sequence(v,&length,&dbeg,&dstep);
210
216
  len += length-1;
211
217
  mdai->type = cumo_na_mdai_object_type(mdai->type, v);
@@ -65,7 +65,11 @@ static void
65
65
  if (idx1) {
66
66
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
67
67
  x = ptr[i1];
68
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
68
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
69
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
70
+ #else
71
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
72
+ #endif
69
73
  cumo_na_step_sequence(x,&len,&beg,&step);
70
74
  for (c=0; c<len && i<n; c++,i++) {
71
75
  y = beg + step * c;
@@ -81,7 +85,11 @@ static void
81
85
  } else {
82
86
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
83
87
  x = ptr[i1];
84
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
88
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
89
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
90
+ #else
91
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
92
+ #endif
85
93
  cumo_na_step_sequence(x,&len,&beg,&step);
86
94
  for (c=0; c<len && i<n; c++,i++) {
87
95
  y = beg + step * c;
@@ -110,7 +118,11 @@ static void
110
118
  dtype* host_z = ALLOC_N(dtype, n);
111
119
  for (i=i1=0; i1<n1 && i<n; i1++) {
112
120
  x = ptr[i1];
113
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
121
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
122
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
123
+ #else
124
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
125
+ #endif
114
126
  cumo_na_step_sequence(x,&len,&beg,&step);
115
127
  for (c=0; c<len && i<n; c++,i++) {
116
128
  y = beg + step * c;
@@ -52,7 +52,11 @@ static void
52
52
  if (idx1) {
53
53
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
54
54
  x = ptr[i1];
55
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
55
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
56
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
57
+ #else
58
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
59
+ #endif
56
60
  cumo_na_step_sequence(x,&len,&beg,&step);
57
61
  for (c=0; c<len && i<n; c++,i++) {
58
62
  y = beg + step * c;
@@ -69,7 +73,11 @@ static void
69
73
  } else {
70
74
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
71
75
  x = ptr[i1];
72
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
76
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
77
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
78
+ #else
79
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
80
+ #endif
73
81
  cumo_na_step_sequence(x,&len,&beg,&step);
74
82
  for (c=0; c<len && i<n; c++,i++) {
75
83
  y = beg + step * c;
@@ -12,23 +12,6 @@
12
12
  #define cIndex cumo_cInt32
13
13
  #endif
14
14
 
15
- // from ruby/enumerator.c
16
- struct enumerator {
17
- VALUE obj;
18
- ID meth;
19
- VALUE args;
20
- // use only above in this source
21
- VALUE fib;
22
- VALUE dst;
23
- VALUE lookahead;
24
- VALUE feedvalue;
25
- VALUE stop_exc;
26
- VALUE size;
27
- // incompatible below depending on ruby version
28
- //VALUE procs; // ruby 2.4
29
- //rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
30
- //VALUE (*size_fn)(ANYARGS); // ruby 2.0
31
- };
32
15
 
33
16
  // note: the memory refed by this pointer is not freed and causes memroy leak.
34
17
  //
@@ -204,6 +187,42 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
204
187
  ssize_t beg, end, beg_orig, end_orig;
205
188
  const char *dot = "..", *edot = "...";
206
189
 
190
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
191
+ rb_arithmetic_sequence_components_t x;
192
+ rb_arithmetic_sequence_extract(range, &x);
193
+ step = NUM2SSIZET(x.step);
194
+
195
+ beg = beg_orig = NUM2SSIZET(x.begin);
196
+ if (beg < 0) {
197
+ beg += size;
198
+ }
199
+ if (T_NIL == TYPE(x.end)) { // endless range
200
+ end = size -1;
201
+ if (RTEST(x.exclude_end)) {
202
+ dot = edot;
203
+ }
204
+ } else {
205
+ end = end_orig = NUM2SSIZET(x.end);
206
+ if (end < 0) {
207
+ end += size;
208
+ }
209
+ if (RTEST(x.exclude_end)) {
210
+ end--;
211
+ dot = edot;
212
+ }
213
+ }
214
+ if (beg < 0 || beg >= size || end < 0 || end >= size) {
215
+ if (T_NIL == TYPE(x.end)) { // endless range
216
+ rb_raise(rb_eRangeError,
217
+ "%"SZF"d%s is out of range for size=%"SZF"d",
218
+ beg_orig, dot, size);
219
+ } else {
220
+ rb_raise(rb_eRangeError,
221
+ "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
222
+ beg_orig, dot, end_orig, size);
223
+ }
224
+ }
225
+ #else
207
226
  beg = beg_orig = NUM2SSIZET(rb_funcall(range,cumo_id_beg,0));
208
227
  if (beg < 0) {
209
228
  beg += size;
@@ -222,44 +241,59 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
222
241
  "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
223
242
  beg_orig, dot, end_orig, size);
224
243
  }
244
+ #endif
225
245
  n = (end-beg)/step+1;
226
246
  if (n<0) n=0;
227
247
  cumo_na_index_set_step(q,orig_dim,n,beg,step);
228
248
 
229
249
  }
230
250
 
231
- static void
232
- cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
251
+ void
252
+ cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep)
233
253
  {
234
254
  int len;
235
- ssize_t step;
236
- struct enumerator *e;
255
+ VALUE step;
256
+ cumo_enumerator_t *e;
237
257
 
238
258
  if (!RB_TYPE_P(enum_obj, T_DATA)) {
239
259
  rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
240
260
  }
241
- e = (struct enumerator *)DATA_PTR(enum_obj);
261
+ e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
262
+
263
+ if (!rb_obj_is_kind_of(e->obj, rb_cRange)) {
264
+ rb_raise(rb_eTypeError,"not Range object");
265
+ }
242
266
 
243
- if (rb_obj_is_kind_of(e->obj, rb_cRange)) {
244
- if (e->meth == cumo_id_each) {
245
- cumo_na_parse_range(e->obj, 1, orig_dim, size, q);
267
+ if (e->meth == cumo_id_each) {
268
+ step = INT2NUM(1);
269
+ }
270
+ else if (e->meth == cumo_id_step) {
271
+ if (TYPE(e->args) != T_ARRAY) {
272
+ rb_raise(rb_eArgError,"no argument for step");
246
273
  }
247
- else if (e->meth == cumo_id_step) {
248
- if (TYPE(e->args) != T_ARRAY) {
249
- rb_raise(rb_eArgError,"no argument for step");
250
- }
251
- len = RARRAY_LEN(e->args);
252
- if (len != 1) {
253
- rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
254
- }
255
- step = NUM2SSIZET(RARRAY_AREF(e->args,0));
256
- cumo_na_parse_range(e->obj, step, orig_dim, size, q);
257
- } else {
258
- rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
274
+ len = RARRAY_LEN(e->args);
275
+ if (len != 1) {
276
+ rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
259
277
  }
278
+ step = RARRAY_AREF(e->args,0);
260
279
  } else {
261
- rb_raise(rb_eTypeError,"not Range object");
280
+ rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
262
281
  }
282
+ if (pstep) *pstep = step;
283
+ }
284
+
285
+ static void
286
+ cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
287
+ {
288
+ VALUE step;
289
+ cumo_enumerator_t *e;
290
+
291
+ if (!RB_TYPE_P(enum_obj, T_DATA)) {
292
+ rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
293
+ }
294
+ cumo_na_parse_enumerator_step(enum_obj, &step);
295
+ e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
296
+ cumo_na_parse_range(e->obj, NUM2SSIZET(step), orig_dim, size, q); // e->obj : Range Object
263
297
  }
264
298
 
265
299
  // Analyze *a* which is *i*-th index object and store the information to q
@@ -316,14 +350,14 @@ cumo_na_index_parse_each(volatile VALUE a, ssize_t size, int i, cumo_na_index_ar
316
350
  if (rb_obj_is_kind_of(a, rb_cRange)) {
317
351
  cumo_na_parse_range(a, 1, i, size, q);
318
352
  }
353
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
354
+ else if (rb_obj_is_kind_of(a, rb_cArithSeq)) {
355
+ cumo_na_parse_range(a, 1, i, size, q);
356
+ }
357
+ #endif
319
358
  else if (rb_obj_is_kind_of(a, rb_cEnumerator)) {
320
359
  cumo_na_parse_enumerator(a, i, size, q);
321
360
  }
322
- else if (rb_obj_is_kind_of(a, cumo_na_cStep)) {
323
- ssize_t beg, step, n;
324
- cumo_na_step_array_index(a, size, (size_t*)(&n), &beg, &step);
325
- cumo_na_index_set_step(q,i,n,beg,step);
326
- }
327
361
  // NArray index
328
362
  else if (CUMO_NA_CumoIsNArray(a)) {
329
363
  cumo_na_parse_narray_index(a, i, size, q);
@@ -40,10 +40,12 @@ VALUE cumo_sym_option;
40
40
  VALUE cumo_sym_loop_opt;
41
41
  VALUE cumo_sym_init;
42
42
 
43
- VALUE cumo_na_cStep;
44
43
  #ifndef HAVE_RB_CCOMPLEX
45
44
  VALUE rb_cComplex;
46
45
  #endif
46
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
47
+ VALUE rb_cArithSeq;
48
+ #endif
47
49
 
48
50
  int cumo_na_inspect_rows_=20;
49
51
  int cumo_na_inspect_cols_=80;
@@ -1512,7 +1514,11 @@ cumo_na_get_reduce_flag_from_axes(VALUE cumo_na_obj, VALUE axes)
1512
1514
  step = 0;
1513
1515
  //printf("beg=%d step=%d len=%d\n",beg,step,len);
1514
1516
  } else if (rb_obj_is_kind_of(v,rb_cRange) ||
1515
- rb_obj_is_kind_of(v,cumo_na_cStep)) {
1517
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
1518
+ rb_obj_is_kind_of(v,rb_cArithSeq)) {
1519
+ #else
1520
+ rb_obj_is_kind_of(v,rb_cEnumerator)) {
1521
+ #endif
1516
1522
  cumo_na_step_array_index( v, ndim, &len, &beg, &step );
1517
1523
  } else {
1518
1524
  rb_raise(cumo_na_eDimensionError, "invalid dimension argument %s",
@@ -1849,6 +1855,9 @@ Init_cumo_narray()
1849
1855
  rb_require("complex");
1850
1856
  rb_cComplex = rb_const_get(rb_cObject, rb_intern("Complex"));
1851
1857
  #endif
1858
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
1859
+ rb_cArithSeq = rb_path2class("Enumerator::ArithmeticSequence");
1860
+ #endif
1852
1861
 
1853
1862
  rb_define_const(cNArray, "VERSION", rb_str_new2(CUMO_VERSION));
1854
1863
 
@@ -1,5 +1,6 @@
1
1
  #include <ruby.h>
2
2
  #include "cumo.h"
3
+ #include "cumo/indexer.h"
3
4
  #include "cumo/narray.h"
4
5
  #include "cumo/cuda/memory_pool.h"
5
6
  #include "cumo/cuda/runtime.h"
@@ -1164,11 +1165,48 @@ cumo_ndfunc_set_bufcp(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1164
1165
  }
1165
1166
  }
1166
1167
 
1168
+ static cumo_na_iarray_stridx_t
1169
+ cumo_na_make_iarray_buffer_copy(cumo_na_buffer_copy_t* lp)
1170
+ {
1171
+ cumo_na_iarray_stridx_t iarray;
1172
+ int i;
1173
+ int ndim = lp->ndim;
1174
+ iarray.ptr = lp->src_ptr + lp->src_iter[0].pos;
1175
+ for (i = 0; i < ndim; ++i) {
1176
+ if (LITER_SRC(lp,i).idx) {
1177
+ CUMO_SDX_SET_INDEX(iarray.stridx[i], LITER_SRC(lp,i).idx);
1178
+ } else {
1179
+ CUMO_SDX_SET_STRIDE(iarray.stridx[i], LITER_SRC(lp,i).step);
1180
+ }
1181
+ }
1182
+ return iarray;
1183
+ }
1184
+
1185
+ static cumo_na_indexer_t
1186
+ cumo_na_make_indexer_buffer_copy(cumo_na_buffer_copy_t* lp)
1187
+ {
1188
+ cumo_na_indexer_t indexer;
1189
+ int i;
1190
+ indexer.ndim = lp->ndim;
1191
+ indexer.total_size = 1;
1192
+ for (i = 0; i< lp->ndim; ++i) {
1193
+ indexer.shape[i] = lp->n[i];
1194
+ indexer.total_size *= lp->n[i];
1195
+ }
1196
+ return indexer;
1197
+ }
1198
+
1199
+ void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
1167
1200
 
1168
1201
  // Make contiguous memory for ops not supporting index or stride (step) loop
1169
1202
  static void
1170
1203
  ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
1171
1204
  {
1205
+ cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
1206
+ cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
1207
+ cumo_ndloop_copy_to_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
1208
+
1209
+ #if 0
1172
1210
  size_t *c;
1173
1211
  char *src, *buf;
1174
1212
  int i;
@@ -1230,11 +1268,19 @@ ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
1230
1268
  loop_end:
1231
1269
  ;
1232
1270
  DBG(printf("]\n"));
1271
+ #endif
1233
1272
  }
1234
1273
 
1274
+ void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
1275
+
1235
1276
  static void
1236
1277
  ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
1237
1278
  {
1279
+ cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
1280
+ cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
1281
+ cumo_ndloop_copy_from_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
1282
+
1283
+ #if 0
1238
1284
  size_t *c;
1239
1285
  char *src, *buf;
1240
1286
  int i;
@@ -1291,12 +1337,14 @@ ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
1291
1337
  for (;;) {
1292
1338
  if (i<=0) goto loop_end;
1293
1339
  i--;
1294
- if (++c[i] < lp->n[i]) break;
1340
+ ++c[i];
1341
+ if (c[i] < lp->n[i]) break;
1295
1342
  c[i] = 0;
1296
1343
  }
1297
1344
  }
1298
1345
  loop_end:
1299
1346
  DBG(printf("]\n"));
1347
+ #endif
1300
1348
  }
1301
1349
 
1302
1350
 
@@ -0,0 +1,97 @@
1
+ #include "cumo/narray_kernel.h"
2
+ #include "cumo/indexer.h"
3
+
4
+ #if defined(__cplusplus)
5
+ extern "C" {
6
+ #if 0
7
+ } /* satisfy cc-mode */
8
+ #endif
9
+ #endif
10
+
11
+ #define CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(NDIM) \
12
+ __global__ void cumo_ndloop_copy_from_buffer_kernel_dim##NDIM( \
13
+ cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
14
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
15
+ cumo_na_indexer_set_dim##NDIM(&indexer, i); \
16
+ char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
17
+ memcpy(p, buf + i * elmsz, elmsz); \
18
+ } \
19
+ }
20
+
21
+ #define CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(NDIM) \
22
+ __global__ void cumo_ndloop_copy_to_buffer_kernel_dim##NDIM( \
23
+ cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
24
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
25
+ cumo_na_indexer_set_dim##NDIM(&indexer, i); \
26
+ char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
27
+ memcpy(buf + i * elmsz, p, elmsz); \
28
+ } \
29
+ }
30
+
31
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(1)
32
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(2)
33
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(3)
34
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(4)
35
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL()
36
+
37
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(1)
38
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(2)
39
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(3)
40
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(4)
41
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL()
42
+
43
+ #undef CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL
44
+ #undef CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL
45
+
46
+ void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
47
+ {
48
+ size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
49
+ size_t block_dim = cumo_get_block_dim(indexer->total_size);
50
+ switch (indexer->ndim) {
51
+ case 1:
52
+ cumo_ndloop_copy_from_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
53
+ break;
54
+ case 2:
55
+ cumo_ndloop_copy_from_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
56
+ break;
57
+ case 3:
58
+ cumo_ndloop_copy_from_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
59
+ break;
60
+ case 4:
61
+ cumo_ndloop_copy_from_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
62
+ break;
63
+ default:
64
+ cumo_ndloop_copy_from_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
65
+ break;
66
+ }
67
+ }
68
+
69
+ void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
70
+ {
71
+ size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
72
+ size_t block_dim = cumo_get_block_dim(indexer->total_size);
73
+ switch (indexer->ndim) {
74
+ case 1:
75
+ cumo_ndloop_copy_to_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
76
+ break;
77
+ case 2:
78
+ cumo_ndloop_copy_to_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
79
+ break;
80
+ case 3:
81
+ cumo_ndloop_copy_to_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
82
+ break;
83
+ case 4:
84
+ cumo_ndloop_copy_to_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
85
+ break;
86
+ default:
87
+ cumo_ndloop_copy_to_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
88
+ break;
89
+ }
90
+ }
91
+
92
+ #if defined(__cplusplus)
93
+ #if 0
94
+ { /* satisfy cc-mode */
95
+ #endif
96
+ } /* extern "C" { */
97
+ #endif