cumo 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -117,10 +117,12 @@ static int cumo_na_mdai_object_type(int type, VALUE v)
117
117
  if (rb_obj_is_kind_of(v, rb_cRange)) {
118
118
  MDAI_ATTR_TYPE(type,v,begin);
119
119
  MDAI_ATTR_TYPE(type,v,end);
120
- } else if (rb_obj_is_kind_of(v, cumo_na_cStep)) {
120
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
121
+ } else if (rb_obj_is_kind_of(v, rb_cArithSeq)) {
121
122
  MDAI_ATTR_TYPE(type,v,begin);
122
123
  MDAI_ATTR_TYPE(type,v,end);
123
124
  MDAI_ATTR_TYPE(type,v,step);
125
+ #endif
124
126
  } else {
125
127
  type = cumo_na_object_type(type,v);
126
128
  }
@@ -205,7 +207,11 @@ cumo_na_mdai_investigate(cumo_na_mdai_t *mdai, int ndim)
205
207
  }
206
208
  }
207
209
  else
208
- if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, cumo_na_cStep)) {
210
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
211
+ if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cArithSeq)) {
212
+ #else
213
+ if (rb_obj_is_kind_of(v, rb_cRange) || rb_obj_is_kind_of(v, rb_cEnumerator)) {
214
+ #endif
209
215
  cumo_na_step_sequence(v,&length,&dbeg,&dstep);
210
216
  len += length-1;
211
217
  mdai->type = cumo_na_mdai_object_type(mdai->type, v);
@@ -65,7 +65,11 @@ static void
65
65
  if (idx1) {
66
66
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
67
67
  x = ptr[i1];
68
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
68
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
69
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
70
+ #else
71
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
72
+ #endif
69
73
  cumo_na_step_sequence(x,&len,&beg,&step);
70
74
  for (c=0; c<len && i<n; c++,i++) {
71
75
  y = beg + step * c;
@@ -81,7 +85,11 @@ static void
81
85
  } else {
82
86
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
83
87
  x = ptr[i1];
84
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
88
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
89
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
90
+ #else
91
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
92
+ #endif
85
93
  cumo_na_step_sequence(x,&len,&beg,&step);
86
94
  for (c=0; c<len && i<n; c++,i++) {
87
95
  y = beg + step * c;
@@ -110,7 +118,11 @@ static void
110
118
  dtype* host_z = ALLOC_N(dtype, n);
111
119
  for (i=i1=0; i1<n1 && i<n; i1++) {
112
120
  x = ptr[i1];
113
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
121
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
122
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
123
+ #else
124
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
125
+ #endif
114
126
  cumo_na_step_sequence(x,&len,&beg,&step);
115
127
  for (c=0; c<len && i<n; c++,i++) {
116
128
  y = beg + step * c;
@@ -52,7 +52,11 @@ static void
52
52
  if (idx1) {
53
53
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
54
54
  x = ptr[i1];
55
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
55
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
56
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
57
+ #else
58
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
59
+ #endif
56
60
  cumo_na_step_sequence(x,&len,&beg,&step);
57
61
  for (c=0; c<len && i<n; c++,i++) {
58
62
  y = beg + step * c;
@@ -69,7 +73,11 @@ static void
69
73
  } else {
70
74
  for (i=i1=0; i1<n1 && i<n; i++,i1++) {
71
75
  x = ptr[i1];
72
- if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
76
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
77
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cArithSeq)) {
78
+ #else
79
+ if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, rb_cEnumerator)) {
80
+ #endif
73
81
  cumo_na_step_sequence(x,&len,&beg,&step);
74
82
  for (c=0; c<len && i<n; c++,i++) {
75
83
  y = beg + step * c;
@@ -12,23 +12,6 @@
12
12
  #define cIndex cumo_cInt32
13
13
  #endif
14
14
 
15
- // from ruby/enumerator.c
16
- struct enumerator {
17
- VALUE obj;
18
- ID meth;
19
- VALUE args;
20
- // use only above in this source
21
- VALUE fib;
22
- VALUE dst;
23
- VALUE lookahead;
24
- VALUE feedvalue;
25
- VALUE stop_exc;
26
- VALUE size;
27
- // incompatible below depending on ruby version
28
- //VALUE procs; // ruby 2.4
29
- //rb_enumerator_size_func *size_fn; // ruby 2.1-2.4
30
- //VALUE (*size_fn)(ANYARGS); // ruby 2.0
31
- };
32
15
 
33
16
  // note: the memory refed by this pointer is not freed and causes memroy leak.
34
17
  //
@@ -204,6 +187,42 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
204
187
  ssize_t beg, end, beg_orig, end_orig;
205
188
  const char *dot = "..", *edot = "...";
206
189
 
190
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
191
+ rb_arithmetic_sequence_components_t x;
192
+ rb_arithmetic_sequence_extract(range, &x);
193
+ step = NUM2SSIZET(x.step);
194
+
195
+ beg = beg_orig = NUM2SSIZET(x.begin);
196
+ if (beg < 0) {
197
+ beg += size;
198
+ }
199
+ if (T_NIL == TYPE(x.end)) { // endless range
200
+ end = size -1;
201
+ if (RTEST(x.exclude_end)) {
202
+ dot = edot;
203
+ }
204
+ } else {
205
+ end = end_orig = NUM2SSIZET(x.end);
206
+ if (end < 0) {
207
+ end += size;
208
+ }
209
+ if (RTEST(x.exclude_end)) {
210
+ end--;
211
+ dot = edot;
212
+ }
213
+ }
214
+ if (beg < 0 || beg >= size || end < 0 || end >= size) {
215
+ if (T_NIL == TYPE(x.end)) { // endless range
216
+ rb_raise(rb_eRangeError,
217
+ "%"SZF"d%s is out of range for size=%"SZF"d",
218
+ beg_orig, dot, size);
219
+ } else {
220
+ rb_raise(rb_eRangeError,
221
+ "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
222
+ beg_orig, dot, end_orig, size);
223
+ }
224
+ }
225
+ #else
207
226
  beg = beg_orig = NUM2SSIZET(rb_funcall(range,cumo_id_beg,0));
208
227
  if (beg < 0) {
209
228
  beg += size;
@@ -222,44 +241,59 @@ cumo_na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, cumo_
222
241
  "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
223
242
  beg_orig, dot, end_orig, size);
224
243
  }
244
+ #endif
225
245
  n = (end-beg)/step+1;
226
246
  if (n<0) n=0;
227
247
  cumo_na_index_set_step(q,orig_dim,n,beg,step);
228
248
 
229
249
  }
230
250
 
231
- static void
232
- cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
251
+ void
252
+ cumo_na_parse_enumerator_step(VALUE enum_obj, VALUE *pstep)
233
253
  {
234
254
  int len;
235
- ssize_t step;
236
- struct enumerator *e;
255
+ VALUE step;
256
+ cumo_enumerator_t *e;
237
257
 
238
258
  if (!RB_TYPE_P(enum_obj, T_DATA)) {
239
259
  rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
240
260
  }
241
- e = (struct enumerator *)DATA_PTR(enum_obj);
261
+ e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
262
+
263
+ if (!rb_obj_is_kind_of(e->obj, rb_cRange)) {
264
+ rb_raise(rb_eTypeError,"not Range object");
265
+ }
242
266
 
243
- if (rb_obj_is_kind_of(e->obj, rb_cRange)) {
244
- if (e->meth == cumo_id_each) {
245
- cumo_na_parse_range(e->obj, 1, orig_dim, size, q);
267
+ if (e->meth == cumo_id_each) {
268
+ step = INT2NUM(1);
269
+ }
270
+ else if (e->meth == cumo_id_step) {
271
+ if (TYPE(e->args) != T_ARRAY) {
272
+ rb_raise(rb_eArgError,"no argument for step");
246
273
  }
247
- else if (e->meth == cumo_id_step) {
248
- if (TYPE(e->args) != T_ARRAY) {
249
- rb_raise(rb_eArgError,"no argument for step");
250
- }
251
- len = RARRAY_LEN(e->args);
252
- if (len != 1) {
253
- rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
254
- }
255
- step = NUM2SSIZET(RARRAY_AREF(e->args,0));
256
- cumo_na_parse_range(e->obj, step, orig_dim, size, q);
257
- } else {
258
- rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
274
+ len = RARRAY_LEN(e->args);
275
+ if (len != 1) {
276
+ rb_raise(rb_eArgError,"invalid number of step argument (1 for %d)",len);
259
277
  }
278
+ step = RARRAY_AREF(e->args,0);
260
279
  } else {
261
- rb_raise(rb_eTypeError,"not Range object");
280
+ rb_raise(rb_eTypeError,"unknown Range method: %s",rb_id2name(e->meth));
262
281
  }
282
+ if (pstep) *pstep = step;
283
+ }
284
+
285
+ static void
286
+ cumo_na_parse_enumerator(VALUE enum_obj, int orig_dim, ssize_t size, cumo_na_index_arg_t *q)
287
+ {
288
+ VALUE step;
289
+ cumo_enumerator_t *e;
290
+
291
+ if (!RB_TYPE_P(enum_obj, T_DATA)) {
292
+ rb_raise(rb_eTypeError,"wrong argument type (not T_DATA)");
293
+ }
294
+ cumo_na_parse_enumerator_step(enum_obj, &step);
295
+ e = (cumo_enumerator_t *)DATA_PTR(enum_obj);
296
+ cumo_na_parse_range(e->obj, NUM2SSIZET(step), orig_dim, size, q); // e->obj : Range Object
263
297
  }
264
298
 
265
299
  // Analyze *a* which is *i*-th index object and store the information to q
@@ -316,14 +350,14 @@ cumo_na_index_parse_each(volatile VALUE a, ssize_t size, int i, cumo_na_index_ar
316
350
  if (rb_obj_is_kind_of(a, rb_cRange)) {
317
351
  cumo_na_parse_range(a, 1, i, size, q);
318
352
  }
353
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
354
+ else if (rb_obj_is_kind_of(a, rb_cArithSeq)) {
355
+ cumo_na_parse_range(a, 1, i, size, q);
356
+ }
357
+ #endif
319
358
  else if (rb_obj_is_kind_of(a, rb_cEnumerator)) {
320
359
  cumo_na_parse_enumerator(a, i, size, q);
321
360
  }
322
- else if (rb_obj_is_kind_of(a, cumo_na_cStep)) {
323
- ssize_t beg, step, n;
324
- cumo_na_step_array_index(a, size, (size_t*)(&n), &beg, &step);
325
- cumo_na_index_set_step(q,i,n,beg,step);
326
- }
327
361
  // NArray index
328
362
  else if (CUMO_NA_CumoIsNArray(a)) {
329
363
  cumo_na_parse_narray_index(a, i, size, q);
@@ -40,10 +40,12 @@ VALUE cumo_sym_option;
40
40
  VALUE cumo_sym_loop_opt;
41
41
  VALUE cumo_sym_init;
42
42
 
43
- VALUE cumo_na_cStep;
44
43
  #ifndef HAVE_RB_CCOMPLEX
45
44
  VALUE rb_cComplex;
46
45
  #endif
46
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
47
+ VALUE rb_cArithSeq;
48
+ #endif
47
49
 
48
50
  int cumo_na_inspect_rows_=20;
49
51
  int cumo_na_inspect_cols_=80;
@@ -1512,7 +1514,11 @@ cumo_na_get_reduce_flag_from_axes(VALUE cumo_na_obj, VALUE axes)
1512
1514
  step = 0;
1513
1515
  //printf("beg=%d step=%d len=%d\n",beg,step,len);
1514
1516
  } else if (rb_obj_is_kind_of(v,rb_cRange) ||
1515
- rb_obj_is_kind_of(v,cumo_na_cStep)) {
1517
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
1518
+ rb_obj_is_kind_of(v,rb_cArithSeq)) {
1519
+ #else
1520
+ rb_obj_is_kind_of(v,rb_cEnumerator)) {
1521
+ #endif
1516
1522
  cumo_na_step_array_index( v, ndim, &len, &beg, &step );
1517
1523
  } else {
1518
1524
  rb_raise(cumo_na_eDimensionError, "invalid dimension argument %s",
@@ -1849,6 +1855,9 @@ Init_cumo_narray()
1849
1855
  rb_require("complex");
1850
1856
  rb_cComplex = rb_const_get(rb_cObject, rb_intern("Complex"));
1851
1857
  #endif
1858
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
1859
+ rb_cArithSeq = rb_path2class("Enumerator::ArithmeticSequence");
1860
+ #endif
1852
1861
 
1853
1862
  rb_define_const(cNArray, "VERSION", rb_str_new2(CUMO_VERSION));
1854
1863
 
@@ -1,5 +1,6 @@
1
1
  #include <ruby.h>
2
2
  #include "cumo.h"
3
+ #include "cumo/indexer.h"
3
4
  #include "cumo/narray.h"
4
5
  #include "cumo/cuda/memory_pool.h"
5
6
  #include "cumo/cuda/runtime.h"
@@ -1164,11 +1165,48 @@ cumo_ndfunc_set_bufcp(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1164
1165
  }
1165
1166
  }
1166
1167
 
1168
+ static cumo_na_iarray_stridx_t
1169
+ cumo_na_make_iarray_buffer_copy(cumo_na_buffer_copy_t* lp)
1170
+ {
1171
+ cumo_na_iarray_stridx_t iarray;
1172
+ int i;
1173
+ int ndim = lp->ndim;
1174
+ iarray.ptr = lp->src_ptr + lp->src_iter[0].pos;
1175
+ for (i = 0; i < ndim; ++i) {
1176
+ if (LITER_SRC(lp,i).idx) {
1177
+ CUMO_SDX_SET_INDEX(iarray.stridx[i], LITER_SRC(lp,i).idx);
1178
+ } else {
1179
+ CUMO_SDX_SET_STRIDE(iarray.stridx[i], LITER_SRC(lp,i).step);
1180
+ }
1181
+ }
1182
+ return iarray;
1183
+ }
1184
+
1185
+ static cumo_na_indexer_t
1186
+ cumo_na_make_indexer_buffer_copy(cumo_na_buffer_copy_t* lp)
1187
+ {
1188
+ cumo_na_indexer_t indexer;
1189
+ int i;
1190
+ indexer.ndim = lp->ndim;
1191
+ indexer.total_size = 1;
1192
+ for (i = 0; i< lp->ndim; ++i) {
1193
+ indexer.shape[i] = lp->n[i];
1194
+ indexer.total_size *= lp->n[i];
1195
+ }
1196
+ return indexer;
1197
+ }
1198
+
1199
+ void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
1167
1200
 
1168
1201
  // Make contiguous memory for ops not supporting index or stride (step) loop
1169
1202
  static void
1170
1203
  ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
1171
1204
  {
1205
+ cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
1206
+ cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
1207
+ cumo_ndloop_copy_to_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
1208
+
1209
+ #if 0
1172
1210
  size_t *c;
1173
1211
  char *src, *buf;
1174
1212
  int i;
@@ -1230,11 +1268,19 @@ ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
1230
1268
  loop_end:
1231
1269
  ;
1232
1270
  DBG(printf("]\n"));
1271
+ #endif
1233
1272
  }
1234
1273
 
1274
+ void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz);
1275
+
1235
1276
  static void
1236
1277
  ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
1237
1278
  {
1279
+ cumo_na_iarray_stridx_t a = cumo_na_make_iarray_buffer_copy(lp);
1280
+ cumo_na_indexer_t indexer = cumo_na_make_indexer_buffer_copy(lp);
1281
+ cumo_ndloop_copy_from_buffer_kernel_launch(&a, &indexer, lp->buf_ptr, lp->elmsz);
1282
+
1283
+ #if 0
1238
1284
  size_t *c;
1239
1285
  char *src, *buf;
1240
1286
  int i;
@@ -1291,12 +1337,14 @@ ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
1291
1337
  for (;;) {
1292
1338
  if (i<=0) goto loop_end;
1293
1339
  i--;
1294
- if (++c[i] < lp->n[i]) break;
1340
+ ++c[i];
1341
+ if (c[i] < lp->n[i]) break;
1295
1342
  c[i] = 0;
1296
1343
  }
1297
1344
  }
1298
1345
  loop_end:
1299
1346
  DBG(printf("]\n"));
1347
+ #endif
1300
1348
  }
1301
1349
 
1302
1350
 
@@ -0,0 +1,97 @@
1
+ #include "cumo/narray_kernel.h"
2
+ #include "cumo/indexer.h"
3
+
4
+ #if defined(__cplusplus)
5
+ extern "C" {
6
+ #if 0
7
+ } /* satisfy cc-mode */
8
+ #endif
9
+ #endif
10
+
11
+ #define CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(NDIM) \
12
+ __global__ void cumo_ndloop_copy_from_buffer_kernel_dim##NDIM( \
13
+ cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
14
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
15
+ cumo_na_indexer_set_dim##NDIM(&indexer, i); \
16
+ char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
17
+ memcpy(p, buf + i * elmsz, elmsz); \
18
+ } \
19
+ }
20
+
21
+ #define CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(NDIM) \
22
+ __global__ void cumo_ndloop_copy_to_buffer_kernel_dim##NDIM( \
23
+ cumo_na_iarray_stridx_t a, cumo_na_indexer_t indexer, char *buf, size_t elmsz) { \
24
+ for (uint64_t i = blockIdx.x * blockDim.x + threadIdx.x; i < indexer.total_size; i += blockDim.x * gridDim.x) { \
25
+ cumo_na_indexer_set_dim##NDIM(&indexer, i); \
26
+ char* p = cumo_na_iarray_stridx_at_dim##NDIM(&a, &indexer); \
27
+ memcpy(buf + i * elmsz, p, elmsz); \
28
+ } \
29
+ }
30
+
31
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(1)
32
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(2)
33
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(3)
34
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL(4)
35
+ CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL()
36
+
37
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(1)
38
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(2)
39
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(3)
40
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL(4)
41
+ CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL()
42
+
43
+ #undef CUMO_NDLOOP_COPY_FROM_BUFFER_KERNEL
44
+ #undef CUMO_NDLOOP_COPY_TO_BUFFER_KERNEL
45
+
46
+ void cumo_ndloop_copy_from_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
47
+ {
48
+ size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
49
+ size_t block_dim = cumo_get_block_dim(indexer->total_size);
50
+ switch (indexer->ndim) {
51
+ case 1:
52
+ cumo_ndloop_copy_from_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
53
+ break;
54
+ case 2:
55
+ cumo_ndloop_copy_from_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
56
+ break;
57
+ case 3:
58
+ cumo_ndloop_copy_from_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
59
+ break;
60
+ case 4:
61
+ cumo_ndloop_copy_from_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
62
+ break;
63
+ default:
64
+ cumo_ndloop_copy_from_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
65
+ break;
66
+ }
67
+ }
68
+
69
+ void cumo_ndloop_copy_to_buffer_kernel_launch(cumo_na_iarray_stridx_t *a, cumo_na_indexer_t* indexer, char *buf, size_t elmsz)
70
+ {
71
+ size_t grid_dim = cumo_get_grid_dim(indexer->total_size);
72
+ size_t block_dim = cumo_get_block_dim(indexer->total_size);
73
+ switch (indexer->ndim) {
74
+ case 1:
75
+ cumo_ndloop_copy_to_buffer_kernel_dim1<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
76
+ break;
77
+ case 2:
78
+ cumo_ndloop_copy_to_buffer_kernel_dim2<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
79
+ break;
80
+ case 3:
81
+ cumo_ndloop_copy_to_buffer_kernel_dim3<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
82
+ break;
83
+ case 4:
84
+ cumo_ndloop_copy_to_buffer_kernel_dim4<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
85
+ break;
86
+ default:
87
+ cumo_ndloop_copy_to_buffer_kernel_dim<<<grid_dim, block_dim>>>(*a,*indexer,buf,elmsz);
88
+ break;
89
+ }
90
+ }
91
+
92
+ #if defined(__cplusplus)
93
+ #if 0
94
+ { /* satisfy cc-mode */
95
+ #endif
96
+ } /* extern "C" { */
97
+ #endif