cumo 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/3rd_party/LICENSE.txt +60 -0
  4. data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
  5. data/LICENSE.txt +1 -62
  6. data/README.md +33 -29
  7. data/bench/cumo_bench.rb +47 -25
  8. data/bench/numo_bench.rb +27 -25
  9. data/docs/src-tree.md +16 -0
  10. data/ext/cumo/cuda/cublas.c +69 -219
  11. data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
  12. data/ext/cumo/cuda/runtime.c +2 -14
  13. data/ext/cumo/cumo.c +16 -16
  14. data/ext/cumo/include/cumo.h +2 -2
  15. data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
  16. data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
  17. data/ext/cumo/include/cumo/indexer.h +46 -63
  18. data/ext/cumo/include/cumo/intern.h +58 -112
  19. data/ext/cumo/include/cumo/narray.h +214 -185
  20. data/ext/cumo/include/cumo/narray_kernel.h +66 -37
  21. data/ext/cumo/include/cumo/ndloop.h +42 -42
  22. data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
  23. data/ext/cumo/include/cumo/template.h +56 -51
  24. data/ext/cumo/include/cumo/template_kernel.h +31 -31
  25. data/ext/cumo/include/cumo/types/bit.h +3 -3
  26. data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
  27. data/ext/cumo/include/cumo/types/complex.h +126 -126
  28. data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
  29. data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
  30. data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
  31. data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
  32. data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
  33. data/ext/cumo/include/cumo/types/int_macro.h +1 -1
  34. data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
  35. data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
  36. data/ext/cumo/include/cumo/types/scomplex.h +5 -5
  37. data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
  38. data/ext/cumo/narray/array.c +143 -143
  39. data/ext/cumo/narray/data.c +184 -184
  40. data/ext/cumo/narray/gen/cogen.rb +5 -2
  41. data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
  42. data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
  43. data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
  44. data/ext/cumo/narray/gen/erbln.rb +132 -0
  45. data/ext/cumo/narray/gen/erbpp2.rb +18 -13
  46. data/ext/cumo/narray/gen/narray_def.rb +3 -3
  47. data/ext/cumo/narray/gen/spec.rb +2 -2
  48. data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
  49. data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
  50. data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
  51. data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
  52. data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
  53. data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
  54. data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
  55. data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
  56. data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
  57. data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
  58. data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
  59. data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
  60. data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
  61. data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
  62. data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
  63. data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
  64. data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
  65. data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
  66. data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
  67. data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
  68. data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
  69. data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
  70. data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
  71. data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
  72. data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
  73. data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
  74. data/ext/cumo/narray/gen/tmpl/each.c +9 -9
  75. data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
  76. data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
  77. data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
  78. data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
  79. data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
  80. data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
  81. data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
  82. data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
  83. data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
  84. data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
  85. data/ext/cumo/narray/gen/tmpl/format.c +11 -11
  86. data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
  87. data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
  88. data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
  89. data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
  90. data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
  91. data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
  92. data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
  93. data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
  94. data/ext/cumo/narray/gen/tmpl/median.c +10 -10
  95. data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
  96. data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
  97. data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
  98. data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
  99. data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
  100. data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
  101. data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
  102. data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
  103. data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
  104. data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
  105. data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
  106. data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
  107. data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
  108. data/ext/cumo/narray/gen/tmpl/store.c +6 -6
  109. data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
  110. data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
  111. data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
  112. data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
  113. data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
  114. data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
  115. data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
  116. data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
  117. data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
  118. data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
  119. data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
  120. data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
  121. data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
  122. data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
  123. data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
  124. data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
  125. data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
  126. data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
  127. data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
  128. data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
  129. data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
  130. data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
  131. data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
  132. data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
  133. data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
  134. data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
  135. data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
  136. data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
  137. data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
  138. data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
  139. data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
  140. data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
  141. data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
  142. data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
  143. data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
  144. data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
  145. data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
  146. data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
  147. data/ext/cumo/narray/index.c +213 -213
  148. data/ext/cumo/narray/math.c +27 -27
  149. data/ext/cumo/narray/narray.c +484 -484
  150. data/ext/cumo/narray/ndloop.c +259 -258
  151. data/ext/cumo/narray/rand.c +3 -3
  152. data/ext/cumo/narray/step.c +70 -70
  153. data/ext/cumo/narray/struct.c +139 -139
  154. metadata +6 -7
  155. data/ext/cumo/include/cumo/intern_fwd.h +0 -38
  156. data/lib/erbpp.rb +0 -294
  157. data/lib/erbpp/line_number.rb +0 -137
  158. data/lib/erbpp/narray_def.rb +0 -381
@@ -17,33 +17,33 @@
17
17
  #define va_init_list(a,b) va_start(a)
18
18
  #endif
19
19
 
20
- typedef struct NA_BUFFER_COPY {
20
+ typedef struct CUMO_NA_BUFFER_COPY {
21
21
  int ndim;
22
22
  size_t elmsz;
23
23
  size_t *n;
24
24
  char *src_ptr;
25
25
  char *buf_ptr;
26
- na_loop_iter_t *src_iter;
27
- na_loop_iter_t *buf_iter;
28
- } na_buffer_copy_t;
29
-
30
- typedef struct NA_LOOP_XARGS {
31
- na_loop_iter_t *iter; // moved from na_loop_t
32
- na_buffer_copy_t *bufcp; // copy data to buffer
33
- int flag; // NDL_READ NDL_WRITE
26
+ cumo_na_loop_iter_t *src_iter;
27
+ cumo_na_loop_iter_t *buf_iter;
28
+ } cumo_na_buffer_copy_t;
29
+
30
+ typedef struct CUMO_NA_LOOP_XARGS {
31
+ cumo_na_loop_iter_t *iter; // moved from cumo_na_loop_t
32
+ cumo_na_buffer_copy_t *bufcp; // copy data to buffer
33
+ int flag; // CUMO_NDL_READ CUMO_NDL_WRITE
34
34
  bool free_user_iter; // alloc LARG(lp,j).iter=lp->xargs[j].iter
35
- } na_loop_xargs_t;
35
+ } cumo_na_loop_xargs_t;
36
36
 
37
- typedef struct NA_MD_LOOP {
37
+ typedef struct CUMO_NA_MD_LOOP {
38
38
  int narg;
39
39
  int nin;
40
40
  int ndim; // n of total dimention looped at loop_narray. NOTE: lp->ndim + lp-.user.ndim is the total dimension.
41
41
  unsigned int copy_flag; // set i-th bit if i-th arg is cast
42
42
  void *ptr; // memory for n
43
- na_loop_iter_t *iter_ptr; // memory for iter
43
+ cumo_na_loop_iter_t *iter_ptr; // memory for iter
44
44
  size_t *n; // n of elements for each dim (shape)
45
- na_loop_t user; // loop in user function
46
- na_loop_xargs_t *xargs; // extra data for each arg
45
+ cumo_na_loop_t user; // loop in user function
46
+ cumo_na_loop_xargs_t *xargs; // extra data for each arg
47
47
  int writeback; // write back result to i-th arg
48
48
  int init_aidx; // index of initializer argument
49
49
  int reduce_dim; // number of dimensions to reduce in reduction kernel, e.g., for an array of shape: [2,3,4],
@@ -53,35 +53,35 @@ typedef struct NA_MD_LOOP {
53
53
  VALUE reduce; // dimension indicies to reduce in reduction kernel (in bits), e.g., for an array of shape:
54
54
  // [2,3,4], 111b for sum(), 010b for sum(axis: 1), 110b for sum(axis: [1,2])
55
55
  VALUE loop_opt;
56
- ndfunc_t *ndfunc;
56
+ cumo_ndfunc_t *ndfunc;
57
57
  void (*loop_func)();
58
- } na_md_loop_t;
58
+ } cumo_na_md_loop_t;
59
59
 
60
60
  #define LARG(lp,iarg) ((lp)->user.args[iarg])
61
61
  #define LITER(lp,idim,iarg) ((lp)->xargs[iarg].iter[idim])
62
62
  #define LITER_SRC(lp,idim) ((lp)->src_iter[idim])
63
63
  #define LBUFCP(lp,j) ((lp)->xargs[j].bufcp)
64
64
 
65
- #define CASTABLE(t) (RTEST(t) && (t)!=OVERWRITE)
65
+ #define CASTABLE(t) (RTEST(t) && (t)!=CUMO_OVERWRITE)
66
66
 
67
- #define NDL_READ 1
68
- #define NDL_WRITE 2
69
- #define NDL_READ_WRITE (NDL_READ|NDL_WRITE)
67
+ #define CUMO_NDL_READ 1
68
+ #define CUMO_NDL_WRITE 2
69
+ #define CUMO_NDL_READ_WRITE (CUMO_NDL_READ|CUMO_NDL_WRITE)
70
70
 
71
- static ID id_cast;
72
- static ID id_extract;
71
+ static ID cumo_id_cast;
72
+ static ID cumo_id_extract;
73
73
 
74
74
  static inline VALUE
75
- nary_type_s_cast(VALUE type, VALUE obj)
75
+ cumo_na_type_s_cast(VALUE type, VALUE obj)
76
76
  {
77
- return rb_funcall(type,id_cast,1,obj);
77
+ return rb_funcall(type,cumo_id_cast,1,obj);
78
78
  }
79
79
 
80
80
  static void
81
- print_ndfunc(ndfunc_t *nf) {
81
+ print_ndfunc(cumo_ndfunc_t *nf) {
82
82
  volatile VALUE t;
83
83
  int i, k;
84
- printf("ndfunc_t = 0x%"SZF"x {\n",(size_t)nf);
84
+ printf("cumo_ndfunc_t = 0x%"SZF"x {\n",(size_t)nf);
85
85
  printf(" func = 0x%"SZF"x\n", (size_t)nf->func);
86
86
  printf(" flag = 0x%"SZF"x\n", (size_t)nf->flag);
87
87
  printf(" nin = %d\n", nf->nin);
@@ -106,9 +106,9 @@ print_ndfunc(ndfunc_t *nf) {
106
106
 
107
107
 
108
108
  static void
109
- print_ndloop(na_md_loop_t *lp) {
109
+ print_ndloop(cumo_na_md_loop_t *lp) {
110
110
  int i,j,nd;
111
- printf("na_md_loop_t = 0x%"SZF"x {\n",(size_t)lp);
111
+ printf("cumo_na_md_loop_t = 0x%"SZF"x {\n",(size_t)lp);
112
112
  printf(" narg = %d\n", lp->narg);
113
113
  printf(" nin = %d\n", lp->nin);
114
114
  printf(" ndim = %d\n", lp->ndim);
@@ -190,18 +190,18 @@ print_ndloop(na_md_loop_t *lp) {
190
190
  }
191
191
 
192
192
 
193
- // returns 0x01 if NDF_HAS_LOOP, but not supporting NDF_STRIDE_LOOP
194
- // returns 0x02 if NDF_HAS_LOOP, but not supporting NDF_INDEX_LOOP
193
+ // returns 0x01 if CUMO_NDF_HAS_LOOP, but not supporting CUMO_NDF_STRIDE_LOOP
194
+ // returns 0x02 if CUMO_NDF_HAS_LOOP, but not supporting CUMO_NDF_INDEX_LOOP
195
195
  static unsigned int
196
- ndloop_func_loop_spec(ndfunc_t *nf, int user_ndim)
196
+ ndloop_func_loop_spec(cumo_ndfunc_t *nf, int user_ndim)
197
197
  {
198
198
  unsigned int f=0;
199
199
  // If user function supports LOOP
200
- if (user_ndim > 0 || NDF_TEST(nf,NDF_HAS_LOOP)) {
201
- if (!NDF_TEST(nf,NDF_STRIDE_LOOP)) {
200
+ if (user_ndim > 0 || CUMO_NDF_TEST(nf,CUMO_NDF_HAS_LOOP)) {
201
+ if (!CUMO_NDF_TEST(nf,CUMO_NDF_STRIDE_LOOP)) {
202
202
  f |= 1;
203
203
  }
204
- if (!NDF_TEST(nf,NDF_INDEX_LOOP)) {
204
+ if (!CUMO_NDF_TEST(nf,CUMO_NDF_INDEX_LOOP)) {
205
205
  f |= 2;
206
206
  }
207
207
  }
@@ -214,7 +214,7 @@ ndloop_func_loop_spec(ndfunc_t *nf, int user_ndim)
214
214
  static int
215
215
  ndloop_cast_required(VALUE type, VALUE value)
216
216
  {
217
- return CASTABLE(type) && type != CLASS_OF(value);
217
+ return CASTABLE(type) && type != rb_obj_class(value);
218
218
  }
219
219
 
220
220
  static int
@@ -237,7 +237,7 @@ ndloop_cast_error(VALUE type, VALUE value)
237
237
  // to type specified by nf->args[j].type
238
238
  // returns copy_flag where nth-bit is set if nth argument is converted.
239
239
  static unsigned int
240
- ndloop_cast_args(ndfunc_t *nf, VALUE args)
240
+ ndloop_cast_args(cumo_ndfunc_t *nf, VALUE args)
241
241
  {
242
242
  int j;
243
243
  unsigned int copy_flag=0;
@@ -253,7 +253,7 @@ ndloop_cast_args(ndfunc_t *nf, VALUE args)
253
253
  continue;
254
254
 
255
255
  if (ndloop_castable_type(type)) {
256
- RARRAY_ASET(args,j,nary_type_s_cast(type, value));
256
+ RARRAY_ASET(args,j,cumo_na_type_s_cast(type, value));
257
257
  copy_flag |= 1<<j;
258
258
  } else {
259
259
  ndloop_cast_error(type, value);
@@ -266,18 +266,18 @@ ndloop_cast_args(ndfunc_t *nf, VALUE args)
266
266
 
267
267
 
268
268
  static void
269
- ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, na_md_loop_t *lp)
269
+ ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, cumo_na_md_loop_t *lp)
270
270
  {
271
- if (type==sym_reduce) {
271
+ if (type==cumo_sym_reduce) {
272
272
  lp->reduce = value;
273
273
  }
274
- else if (type==sym_option) {
274
+ else if (type==cumo_sym_option) {
275
275
  lp->user.option = value;
276
276
  }
277
- else if (type==sym_loop_opt) {
277
+ else if (type==cumo_sym_loop_opt) {
278
278
  lp->loop_opt = value;
279
279
  }
280
- else if (type==sym_init) {
280
+ else if (type==cumo_sym_init) {
281
281
  lp->init_aidx = at;
282
282
  }
283
283
  else {
@@ -292,7 +292,7 @@ max2(int x, int y)
292
292
  }
293
293
 
294
294
  static void
295
- ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
295
+ ndloop_find_max_dimension(cumo_na_md_loop_t *lp, cumo_ndfunc_t *nf, VALUE args)
296
296
  {
297
297
  int j;
298
298
  int nin=0; // number of input objects (except for symbols)
@@ -307,8 +307,8 @@ ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
307
307
  } else {
308
308
  nin++;
309
309
  user_nd = max2(user_nd, nf->ain[j].dim);
310
- if (IsNArray(v))
311
- loop_nd = max2(loop_nd, RNARRAY_NDIM(v) - nf->ain[j].dim);
310
+ if (CumoIsNArray(v))
311
+ loop_nd = max2(loop_nd, CUMO_RNARRAY_NDIM(v) - nf->ain[j].dim);
312
312
  }
313
313
  }
314
314
 
@@ -329,9 +329,9 @@ ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
329
329
  */
330
330
 
331
331
  static void
332
- ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
332
+ ndloop_alloc(cumo_na_md_loop_t *lp, cumo_ndfunc_t *nf, VALUE args,
333
333
  void *opt_ptr, unsigned int copy_flag,
334
- void (*loop_func)(ndfunc_t*, na_md_loop_t*))
334
+ void (*loop_func)(cumo_ndfunc_t*, cumo_na_md_loop_t*))
335
335
  {
336
336
  int i,j;
337
337
  int narg;
@@ -342,7 +342,7 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
342
342
 
343
343
  long args_len;
344
344
 
345
- na_loop_iter_t *iter;
345
+ cumo_na_loop_iter_t *iter;
346
346
 
347
347
  int trans_dim;
348
348
  unsigned int f;
@@ -375,19 +375,19 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
375
375
  max_nd = lp->ndim + lp->user.ndim;
376
376
 
377
377
  n1 = sizeof(size_t)*(max_nd+1);
378
- n2 = sizeof(na_loop_xargs_t)*narg;
378
+ n2 = sizeof(cumo_na_loop_xargs_t)*narg;
379
379
  n2 = ((n2-1)/8+1)*8;
380
- n3 = sizeof(na_loop_args_t)*narg;
380
+ n3 = sizeof(cumo_na_loop_args_t)*narg;
381
381
  n3 = ((n3-1)/8+1)*8;
382
- n4 = sizeof(na_loop_iter_t)*narg*(max_nd+1);
382
+ n4 = sizeof(cumo_na_loop_iter_t)*narg*(max_nd+1);
383
383
  n4 = ((n4-1)/8+1)*8;
384
384
  n5 = sizeof(int)*(max_nd+1);
385
385
 
386
386
  lp->ptr = buf = (char*)xmalloc(n1+n2+n3+n4+n5);
387
387
  lp->n = (size_t*)buf; buf+=n1;
388
- lp->xargs = (na_loop_xargs_t*)buf; buf+=n2;
389
- lp->user.args = (na_loop_args_t*)buf; buf+=n3;
390
- lp->iter_ptr = iter = (na_loop_iter_t*)buf; buf+=n4;
388
+ lp->xargs = (cumo_na_loop_xargs_t*)buf; buf+=n2;
389
+ lp->user.args = (cumo_na_loop_args_t*)buf; buf+=n3;
390
+ lp->iter_ptr = iter = (cumo_na_loop_iter_t*)buf; buf+=n4;
391
391
  lp->trans_map = (int*)buf;
392
392
 
393
393
  for (j=0; j<narg; j++) {
@@ -397,7 +397,7 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
397
397
  LARG(lp,j).ndim = 0;
398
398
  lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
399
399
  lp->xargs[j].bufcp = NULL;
400
- lp->xargs[j].flag = (j<lp->nin) ? NDL_READ : NDL_WRITE;
400
+ lp->xargs[j].flag = (j<lp->nin) ? CUMO_NDL_READ : CUMO_NDL_WRITE;
401
401
  lp->xargs[j].free_user_iter = 0;
402
402
  }
403
403
 
@@ -414,10 +414,10 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
414
414
  // array loop
415
415
  // [*,+,*,+,*] => [*,*,*,+,+]
416
416
  // trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
417
- if (NDF_TEST(nf,NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
417
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
418
418
  trans_dim = 0;
419
419
  for (i=0; i<max_nd; i++) {
420
- if (na_test_reduce(lp->reduce, i)) {
420
+ if (cumo_na_test_reduce(lp->reduce, i)) {
421
421
  lp->trans_map[i] = -1;
422
422
  } else {
423
423
  lp->trans_map[i] = trans_dim++;
@@ -449,12 +449,12 @@ ndloop_release(VALUE vlp)
449
449
  {
450
450
  int j;
451
451
  VALUE v;
452
- na_md_loop_t *lp = (na_md_loop_t*)(vlp);
452
+ cumo_na_md_loop_t *lp = (cumo_na_md_loop_t*)(vlp);
453
453
 
454
454
  for (j=0; j < lp->narg; j++) {
455
455
  v = LARG(lp,j).value;
456
- if (IsNArray(v)) {
457
- na_release_lock(v);
456
+ if (CumoIsNArray(v)) {
457
+ cumo_na_release_lock(v);
458
458
  }
459
459
  }
460
460
  for (j=0; j<lp->narg; j++) {
@@ -483,7 +483,7 @@ ndloop_release(VALUE vlp)
483
483
  set lp->n[i] (shape of n-d iteration) here
484
484
  */
485
485
  static void
486
- ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
486
+ ndloop_check_shape(cumo_na_md_loop_t *lp, int nf_dim, cumo_narray_t *na)
487
487
  {
488
488
  int i, k;
489
489
  size_t n;
@@ -500,7 +500,7 @@ ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
500
500
  lp->n[i] = n;
501
501
  } else if (lp->n[i] != n) {
502
502
  // inconsistent array shape
503
- rb_raise(nary_eShapeError,"shape1[%d](=%"SZF"u) != shape2[%d](=%"SZF"u)",
503
+ rb_raise(cumo_na_eShapeError,"shape1[%d](=%"SZF"u) != shape2[%d](=%"SZF"u)",
504
504
  i, lp->n[i], k, n);
505
505
  }
506
506
  }
@@ -512,37 +512,37 @@ ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
512
512
  na->shape[i] == lp->n[ dim_map[i] ]
513
513
  */
514
514
  static void
515
- ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
515
+ ndloop_set_stepidx(cumo_na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
516
516
  {
517
517
  size_t n, s;
518
518
  int i, k, nd;
519
- stridx_t sdx;
520
- narray_t *na;
519
+ cumo_stridx_t sdx;
520
+ cumo_narray_t *na;
521
521
 
522
522
  LARG(lp,j).value = vna;
523
- LARG(lp,j).elmsz = nary_element_stride(vna);
524
- if (rwflag == NDL_READ) {
525
- LARG(lp,j).ptr = na_get_pointer_for_read(vna);
523
+ LARG(lp,j).elmsz = cumo_na_element_stride(vna);
524
+ if (rwflag == CUMO_NDL_READ) {
525
+ LARG(lp,j).ptr = cumo_na_get_pointer_for_read(vna);
526
526
  } else
527
- if (rwflag == NDL_WRITE) {
528
- LARG(lp,j).ptr = na_get_pointer_for_write(vna);
527
+ if (rwflag == CUMO_NDL_WRITE) {
528
+ LARG(lp,j).ptr = cumo_na_get_pointer_for_write(vna);
529
529
  } else
530
- if (rwflag == NDL_READ_WRITE) {
531
- LARG(lp,j).ptr = na_get_pointer_for_read_write(vna);
530
+ if (rwflag == CUMO_NDL_READ_WRITE) {
531
+ LARG(lp,j).ptr = cumo_na_get_pointer_for_read_write(vna);
532
532
  } else {
533
533
  rb_bug("invalid value for read-write flag");
534
534
  }
535
- GetNArray(vna,na);
535
+ CumoGetNArray(vna,na);
536
536
  nd = LARG(lp,j).ndim;
537
537
 
538
- switch(NA_TYPE(na)) {
539
- case NARRAY_DATA_T:
540
- if (NA_DATA_PTR(na)==NULL && NA_SIZE(na)>0) {
538
+ switch(CUMO_NA_TYPE(na)) {
539
+ case CUMO_NARRAY_DATA_T:
540
+ if (CUMO_NA_DATA_PTR(na)==NULL && CUMO_NA_SIZE(na)>0) {
541
541
  rb_bug("cannot read no-data NArray");
542
542
  rb_raise(rb_eRuntimeError,"cannot read no-data NArray");
543
543
  }
544
544
  // through
545
- case NARRAY_FILEMAP_T:
545
+ case CUMO_NARRAY_FILEMAP_T:
546
546
  s = LARG(lp,j).elmsz;
547
547
  for (k=na->ndim; k--;) {
548
548
  n = na->shape[k];
@@ -557,25 +557,25 @@ ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
557
557
  }
558
558
  LITER(lp,0,j).pos = 0;
559
559
  break;
560
- case NARRAY_VIEW_T:
561
- LITER(lp,0,j).pos = NA_VIEW_OFFSET(na);
560
+ case CUMO_NARRAY_VIEW_T:
561
+ LITER(lp,0,j).pos = CUMO_NA_VIEW_OFFSET(na);
562
562
  for (k=0; k<na->ndim; k++) {
563
563
  n = na->shape[k];
564
- sdx = NA_VIEW_STRIDX(na)[k];
564
+ sdx = CUMO_NA_VIEW_STRIDX(na)[k];
565
565
  if (n > 1 || nd > 0) {
566
566
  i = dim_map[k];
567
- if (SDX_IS_INDEX(sdx)) {
567
+ if (CUMO_SDX_IS_INDEX(sdx)) {
568
568
  LITER(lp,i,j).step = 0;
569
- LITER(lp,i,j).idx = SDX_GET_INDEX(sdx);
569
+ LITER(lp,i,j).idx = CUMO_SDX_GET_INDEX(sdx);
570
570
  } else {
571
- LITER(lp,i,j).step = SDX_GET_STRIDE(sdx);
571
+ LITER(lp,i,j).step = CUMO_SDX_GET_STRIDE(sdx);
572
572
  //LITER(lp,i,j).idx = NULL;
573
573
  }
574
574
  } else if (n==1) {
575
- if (SDX_IS_INDEX(sdx)) {
576
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("ndloop_set_stepidx", "any");
575
+ if (CUMO_SDX_IS_INDEX(sdx)) {
576
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("ndloop_set_stepidx", "any");
577
577
  cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
578
- LITER(lp,0,j).pos += SDX_GET_INDEX(sdx)[0];
578
+ LITER(lp,0,j).pos += CUMO_SDX_GET_INDEX(sdx)[0];
579
579
  }
580
580
  }
581
581
  nd--;
@@ -589,11 +589,11 @@ ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
589
589
 
590
590
 
591
591
  static void
592
- ndloop_init_args(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
592
+ ndloop_init_args(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE args)
593
593
  {
594
594
  int i, j;
595
595
  VALUE v;
596
- narray_t *na;
596
+ cumo_narray_t *na;
597
597
  int nf_dim;
598
598
  int dim_beg;
599
599
  int *dim_map;
@@ -612,12 +612,12 @@ na->shape[i] == lp->n[ dim_map[i] ]
612
612
  continue;
613
613
  }
614
614
  v = RARRAY_AREF(args,j);
615
- if (IsNArray(v)) {
615
+ if (CumoIsNArray(v)) {
616
616
  // set LARG(lp,j) with v
617
- GetNArray(v,na);
617
+ CumoGetNArray(v,na);
618
618
  nf_dim = nf->ain[j].dim;
619
619
  if (nf_dim > na->ndim) {
620
- rb_raise(nary_eDimensionError,"requires >= %d-dimensioal array "
620
+ rb_raise(cumo_na_eDimensionError,"requires >= %d-dimensioal array "
621
621
  "while %d-dimensional array is given",nf_dim,na->ndim);
622
622
  }
623
623
  ndloop_check_shape(lp, nf_dim, na);
@@ -626,10 +626,10 @@ na->shape[i] == lp->n[ dim_map[i] ]
626
626
  dim_map[i] = lp->trans_map[i+dim_beg];
627
627
  //printf("dim_map[%d]=%d na->shape[%d]=%d\n",i,dim_map[i],i,na->shape[i]);
628
628
  }
629
- if (nf->ain[j].type==OVERWRITE) {
630
- lp->xargs[j].flag = flag = NDL_WRITE;
629
+ if (nf->ain[j].type==CUMO_OVERWRITE) {
630
+ lp->xargs[j].flag = flag = CUMO_NDL_WRITE;
631
631
  } else {
632
- lp->xargs[j].flag = flag = NDL_READ;
632
+ lp->xargs[j].flag = flag = CUMO_NDL_READ;
633
633
  }
634
634
  LARG(lp,j).ndim = nf_dim;
635
635
  ndloop_set_stepidx(lp, j, v, dim_map, flag);
@@ -658,22 +658,22 @@ na->shape[i] == lp->n[ dim_map[i] ]
658
658
 
659
659
 
660
660
  static int
661
- ndloop_check_inplace(VALUE type, int na_ndim, size_t *na_shape, VALUE v)
661
+ ndloop_check_inplace(VALUE type, int cumo_na_ndim, size_t *cumo_na_shape, VALUE v)
662
662
  {
663
663
  int i;
664
- narray_t *na;
664
+ cumo_narray_t *na;
665
665
 
666
666
  // type check
667
- if (type != CLASS_OF(v)) {
667
+ if (type != rb_obj_class(v)) {
668
668
  return 0;
669
669
  }
670
- GetNArray(v,na);
670
+ CumoGetNArray(v,na);
671
671
  // shape check
672
- if (na->ndim != na_ndim) {
672
+ if (na->ndim != cumo_na_ndim) {
673
673
  return 0;
674
674
  }
675
- for (i=0; i<na_ndim; i++) {
676
- if (na_shape[i] != na->shape[i]) {
675
+ for (i=0; i<cumo_na_ndim; i++) {
676
+ if (cumo_na_shape[i] != na->shape[i]) {
677
677
  return 0;
678
678
  }
679
679
  }
@@ -682,8 +682,8 @@ ndloop_check_inplace(VALUE type, int na_ndim, size_t *na_shape, VALUE v)
682
682
  }
683
683
 
684
684
  static VALUE
685
- ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
686
- int na_ndim, size_t *na_shape, VALUE args)
685
+ ndloop_find_inplace(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE type,
686
+ int cumo_na_ndim, size_t *cumo_na_shape, VALUE args)
687
687
  {
688
688
  int j;
689
689
  VALUE v;
@@ -691,9 +691,9 @@ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
691
691
  // find inplace
692
692
  for (j=0; j<nf->nin; j++) {
693
693
  v = RARRAY_AREF(args,j);
694
- if (IsNArray(v)) {
695
- if (TEST_INPLACE(v)) {
696
- if (ndloop_check_inplace(type,na_ndim,na_shape,v)) {
694
+ if (CumoIsNArray(v)) {
695
+ if (CUMO_TEST_INPLACE(v)) {
696
+ if (ndloop_check_inplace(type,cumo_na_ndim,cumo_na_shape,v)) {
697
697
  // if already copied, create outary and write-back
698
698
  if (lp->copy_flag & (1<<j)) {
699
699
  lp->writeback = j;
@@ -707,7 +707,7 @@ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
707
707
  for (j=0; j<nf->nin; j++) {
708
708
  if (lp->copy_flag & (1<<j)) {
709
709
  v = RARRAY_AREF(args,j);
710
- if (ndloop_check_inplace(type,na_ndim,na_shape,v)) {
710
+ if (ndloop_check_inplace(type,cumo_na_ndim,cumo_na_shape,v)) {
711
711
  return v;
712
712
  }
713
713
  }
@@ -718,7 +718,7 @@ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
718
718
 
719
719
 
720
720
  static VALUE
721
- ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
721
+ ndloop_get_arg_type(cumo_ndfunc_t *nf, VALUE args, VALUE t)
722
722
  {
723
723
  int i;
724
724
 
@@ -731,7 +731,7 @@ ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
731
731
  t = nf->ain[i].type;
732
732
  // if i-th type is Qnil, get the type of i-th input value
733
733
  if (!CASTABLE(t)) {
734
- t = CLASS_OF(RARRAY_AREF(args,i));
734
+ t = rb_obj_class(RARRAY_AREF(args,i));
735
735
  }
736
736
  }
737
737
  return t;
@@ -739,61 +739,61 @@ ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
739
739
 
740
740
 
741
741
  static VALUE
742
- ndloop_set_output_narray(ndfunc_t *nf, na_md_loop_t *lp, int k,
742
+ ndloop_set_output_narray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, int k,
743
743
  VALUE type, VALUE args)
744
744
  {
745
745
  int i, j;
746
- int na_ndim;
746
+ int cumo_na_ndim;
747
747
  int lp_dim;
748
748
  volatile VALUE v=Qnil;
749
- size_t *na_shape;
749
+ size_t *cumo_na_shape;
750
750
  int *dim_map;
751
- int flag = NDL_READ_WRITE;
751
+ int flag = CUMO_NDL_READ_WRITE;
752
752
  int nd;
753
753
  int max_nd = lp->ndim + nf->aout[k].dim;
754
754
 
755
- na_shape = ALLOCA_N(size_t, max_nd);
755
+ cumo_na_shape = ALLOCA_N(size_t, max_nd);
756
756
  dim_map = ALLOCA_N(int, max_nd);
757
757
 
758
758
  //printf("max_nd=%d lp->ndim=%d\n",max_nd,lp->ndim);
759
759
 
760
760
  // md-loop shape
761
- na_ndim = 0;
761
+ cumo_na_ndim = 0;
762
762
  for (i=0; i<lp->ndim; i++) {
763
- // na_shape[i] == lp->n[lp->trans_map[i]]
763
+ // cumo_na_shape[i] == lp->n[lp->trans_map[i]]
764
764
  lp_dim = lp->trans_map[i];
765
765
  //printf("i=%d lp_dim=%d\n",i,lp_dim);
766
- if (NDF_TEST(nf,NDF_CUM)) { // cumulate with shape kept
767
- na_shape[na_ndim] = lp->n[lp_dim];
766
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_CUM)) { // cumulate with shape kept
767
+ cumo_na_shape[cumo_na_ndim] = lp->n[lp_dim];
768
768
  } else
769
- if (na_test_reduce(lp->reduce,lp_dim)) { // accumulate dimension
770
- if (NDF_TEST(nf,NDF_KEEP_DIM)) {
771
- na_shape[na_ndim] = 1; // leave it
769
+ if (cumo_na_test_reduce(lp->reduce,lp_dim)) { // accumulate dimension
770
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_KEEP_DIM)) {
771
+ cumo_na_shape[cumo_na_ndim] = 1; // leave it
772
772
  } else {
773
773
  continue; // delete dimension
774
774
  }
775
775
  } else {
776
- na_shape[na_ndim] = lp->n[lp_dim];
776
+ cumo_na_shape[cumo_na_ndim] = lp->n[lp_dim];
777
777
  }
778
- //printf("i=%d lp_dim=%d na_shape[%d]=%ld\n",i,lp_dim,i,na_shape[i]);
779
- dim_map[na_ndim++] = lp_dim;
780
- //dim_map[lp_dim] = na_ndim++;
778
+ //printf("i=%d lp_dim=%d cumo_na_shape[%d]=%ld\n",i,lp_dim,i,cumo_na_shape[i]);
779
+ dim_map[cumo_na_ndim++] = lp_dim;
780
+ //dim_map[lp_dim] = cumo_na_ndim++;
781
781
  }
782
782
 
783
783
  // user-specified shape
784
784
  for (i=0; i<nf->aout[k].dim; i++) {
785
- na_shape[na_ndim] = nf->aout[k].shape[i];
786
- dim_map[na_ndim++] = i + lp->ndim;
785
+ cumo_na_shape[cumo_na_ndim] = nf->aout[k].shape[i];
786
+ dim_map[cumo_na_ndim++] = i + lp->ndim;
787
787
  }
788
788
 
789
789
  // find inplace from input arrays
790
- if (k==0 && NDF_TEST(nf,NDF_INPLACE)) {
791
- v = ndloop_find_inplace(nf,lp,type,na_ndim,na_shape,args);
790
+ if (k==0 && CUMO_NDF_TEST(nf,CUMO_NDF_INPLACE)) {
791
+ v = ndloop_find_inplace(nf,lp,type,cumo_na_ndim,cumo_na_shape,args);
792
792
  }
793
793
  if (!RTEST(v)) {
794
794
  // new object
795
- v = nary_new(type, na_ndim, na_shape);
796
- flag = NDL_WRITE;
795
+ v = cumo_na_new(type, cumo_na_ndim, cumo_na_shape);
796
+ flag = CUMO_NDL_WRITE;
797
797
  }
798
798
 
799
799
  j = lp->nin + k;
@@ -807,7 +807,7 @@ ndloop_set_output_narray(ndfunc_t *nf, na_md_loop_t *lp, int k,
807
807
  }
808
808
 
809
809
  static VALUE
810
- ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
810
+ ndloop_set_output(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE args)
811
811
  {
812
812
  int i, j, k, idx;
813
813
  volatile VALUE v, t, results;
@@ -848,7 +848,7 @@ ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
848
848
  idx = nf->ain[k].dim;
849
849
  v = RARRAY_AREF(results,idx);
850
850
  init = RARRAY_AREF(args,k);
851
- na_store(v,init);
851
+ cumo_na_store(v,init);
852
852
  }
853
853
 
854
854
  return results;
@@ -860,12 +860,12 @@ ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
860
860
  // For example, compressing [2,3] shape into [6] so that we can process
861
861
  // all elements with one user loop.
862
862
  static void
863
- ndfunc_contract_loop(na_md_loop_t *lp)
863
+ cumo_ndfunc_contract_loop(cumo_na_md_loop_t *lp)
864
864
  {
865
865
  int i,j,k,success,cnt=0;
866
866
  int red0, redi;
867
867
 
868
- redi = na_test_reduce(lp->reduce,0);
868
+ redi = cumo_na_test_reduce(lp->reduce,0);
869
869
 
870
870
  //for (i=0; i<lp->ndim; i++) {
871
871
  // printf("lp->n[%d]=%lu\n",i,lp->n[i]);
@@ -873,7 +873,7 @@ ndfunc_contract_loop(na_md_loop_t *lp)
873
873
 
874
874
  for (i=1; i<lp->ndim; i++) {
875
875
  red0 = redi;
876
- redi = na_test_reduce(lp->reduce,i);
876
+ redi = cumo_na_test_reduce(lp->reduce,i);
877
877
  //printf("contract i=%d reduce_cond=%d %d\n",i,red0,redi);
878
878
  if (red0 != redi) {
879
879
  continue;
@@ -932,7 +932,7 @@ ndfunc_contract_loop(na_md_loop_t *lp)
932
932
  //
933
933
  // For example, for element-wise function, lp->user.ndim is 1, and lp->ndim -= 1.
934
934
  static void
935
- ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
935
+ cumo_ndfunc_set_user_loop(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
936
936
  {
937
937
  int j, ud=0;
938
938
 
@@ -940,7 +940,7 @@ ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
940
940
  // Increase user.ndim by number of dimensions to reduce for reduction function.
941
941
  ud = lp->reduce_dim;
942
942
  }
943
- else if (lp->ndim > 0 && NDF_TEST(nf,NDF_HAS_LOOP)) {
943
+ else if (lp->ndim > 0 && CUMO_NDF_TEST(nf,CUMO_NDF_HAS_LOOP)) {
944
944
  // Set user.ndim to 1 (default is 0) for element-wise function.
945
945
  ud = 1;
946
946
  }
@@ -964,29 +964,29 @@ ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
964
964
  //printf("lp->reduce_dim=%d lp->user.ndim=%d lp->ndim=%d\n",lp->reduce_dim,lp->user.ndim,lp->ndim);
965
965
 
966
966
  skip_ud:
967
- // user function shape is the latter part of na_md_loop shape.
967
+ // user function shape is the latter part of cumo_na_md_loop shape.
968
968
  lp->user.n = &(lp->n[lp->ndim]);
969
969
  for (j=0; j<lp->narg; j++) {
970
970
  LARG(lp,j).iter = &LITER(lp,lp->ndim,j);
971
- //printf("in ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
971
+ //printf("in cumo_ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
972
972
  }
973
973
  }
974
974
 
975
975
 
976
976
  // Initialize lp->user for indexer loop.
977
977
  static void
978
- ndfunc_set_user_indexer_loop(ndfunc_t *nf, na_md_loop_t *lp)
978
+ cumo_ndfunc_set_user_indexer_loop(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
979
979
  {
980
980
  int j;
981
981
 
982
982
  lp->user.ndim = lp->ndim;
983
983
  lp->ndim = 0;
984
984
 
985
- if (NDF_TEST(nf,NDF_FLAT_REDUCE)) {
985
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE)) {
986
986
  // in
987
987
  LARG(lp,0).ndim = lp->user.ndim;
988
988
  LARG(lp,0).shape = &(lp->n[lp->ndim]);
989
- // out is constructed at na_make_reduction_arg from in and lp->reduce
989
+ // out is constructed at cumo_na_make_reduction_arg from in and lp->reduce
990
990
 
991
991
  lp->user.n = &(lp->n[lp->ndim]);
992
992
  for (j=0; j<lp->narg; j++) {
@@ -1015,10 +1015,10 @@ ndfunc_set_user_indexer_loop(ndfunc_t *nf, na_md_loop_t *lp)
1015
1015
  // Judge whether a (contiguous) buffer copy is required or not, and malloc if it is required.
1016
1016
  //
1017
1017
  // CASES TO REQUIRE A BUFFER COPY:
1018
- // 1) ndloop has `idx` but does not support NDF_INDEX_LOOP.
1019
- // 2) ndloop has non-contiguous arrays but does not support NDF_STRIDE_LOOP.
1018
+ // 1) ndloop has `idx` but does not support CUMO_NDF_INDEX_LOOP.
1019
+ // 2) ndloop has non-contiguous arrays but does not support CUMO_NDF_STRIDE_LOOP.
1020
1020
  static void
1021
- ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1021
+ cumo_ndfunc_set_bufcp(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1022
1022
  {
1023
1023
  unsigned int f;
1024
1024
  int i, j;
@@ -1026,7 +1026,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1026
1026
  bool zero_step;
1027
1027
  ssize_t n, sz, elmsz, stride, n_total; //, last_step;
1028
1028
  size_t *buf_shape;
1029
- na_loop_iter_t *buf_iter=NULL, *src_iter;
1029
+ cumo_na_loop_iter_t *buf_iter=NULL, *src_iter;
1030
1030
 
1031
1031
  unsigned int loop_spec = ndloop_func_loop_spec(nf, lp->user.ndim);
1032
1032
  //if (loop_spec==0) return;
@@ -1087,7 +1087,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1087
1087
  // over loop_spec or reduce_loop is not contiguous
1088
1088
  if (f & loop_spec || (lp->reduce_dim > 1 && ndim > 0)) {
1089
1089
  //printf("(buf,nd=%d)",nd);
1090
- buf_iter = ALLOC_N(na_loop_iter_t,nd+3);
1090
+ buf_iter = ALLOC_N(cumo_na_loop_iter_t,nd+3);
1091
1091
  buf_shape = ALLOC_N(size_t,nd);
1092
1092
  buf_iter[nd].pos = 0;
1093
1093
  buf_iter[nd].step = 0;
@@ -1104,14 +1104,14 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1104
1104
  buf_shape[i] = n;
1105
1105
  sz *= n;
1106
1106
  }
1107
- LBUFCP(lp,j) = ALLOC(na_buffer_copy_t);
1107
+ LBUFCP(lp,j) = ALLOC(cumo_na_buffer_copy_t);
1108
1108
  LBUFCP(lp,j)->ndim = ndim;
1109
1109
  LBUFCP(lp,j)->elmsz = elmsz;
1110
1110
  LBUFCP(lp,j)->n = buf_shape;
1111
1111
  LBUFCP(lp,j)->src_iter = src_iter;
1112
1112
  LBUFCP(lp,j)->buf_iter = buf_iter;
1113
1113
  LARG(lp,j).iter = buf_iter;
1114
- //printf("in ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1114
+ //printf("in cumo_ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1115
1115
  LBUFCP(lp,j)->src_ptr = LARG(lp,j).ptr;
1116
1116
  if (cumo_cuda_runtime_is_device_memory(LARG(lp,j).ptr)) {
1117
1117
  LARG(lp,j).ptr = LBUFCP(lp,j)->buf_ptr = cumo_cuda_runtime_malloc(sz);
@@ -1130,7 +1130,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1130
1130
  last_step = src_iter[ndim-1].step;
1131
1131
  if (lp->reduce_dim>1) {
1132
1132
  //printf("(reduce_dim=%d,ndim=%d,nd=%d,n=%ld,lst=%ld)\n",lp->reduce_dim,ndim,nd,n_total,last_step);
1133
- buf_iter = ALLOC_N(na_loop_iter_t,2);
1133
+ buf_iter = ALLOC_N(cumo_na_loop_iter_t,2);
1134
1134
  buf_iter[0].pos = LARG(lp,j).iter[0].pos;
1135
1135
  buf_iter[0].step = last_step;
1136
1136
  buf_iter[0].idx = NULL;
@@ -1138,7 +1138,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1138
1138
  buf_iter[1].step = 0;
1139
1139
  buf_iter[1].idx = NULL;
1140
1140
  LARG(lp,j).iter = buf_iter;
1141
- //printf("in ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1141
+ //printf("in cumo_ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1142
1142
  lp->xargs[j].free_user_iter = 1;
1143
1143
  }
1144
1144
  }
@@ -1161,7 +1161,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
1161
1161
 
1162
1162
  // Make contiguous memory for ops not supporting index or stride (step) loop
1163
1163
  static void
1164
- ndloop_copy_to_buffer(na_buffer_copy_t *lp)
1164
+ ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
1165
1165
  {
1166
1166
  size_t *c;
1167
1167
  char *src, *buf;
@@ -1195,7 +1195,7 @@ ndloop_copy_to_buffer(na_buffer_copy_t *lp)
1195
1195
  // i-th dimension
1196
1196
  for (; i<nd; i++) {
1197
1197
  if (LITER_SRC(lp,i).idx) {
1198
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("ndloop_copy_to_buffer", "any");
1198
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("ndloop_copy_to_buffer", "any");
1199
1199
  cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
1200
1200
  LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).idx[c[i]];
1201
1201
  } else {
@@ -1227,7 +1227,7 @@ ndloop_copy_to_buffer(na_buffer_copy_t *lp)
1227
1227
  }
1228
1228
 
1229
1229
  static void
1230
- ndloop_copy_from_buffer(na_buffer_copy_t *lp)
1230
+ ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
1231
1231
  {
1232
1232
  size_t *c;
1233
1233
  char *src, *buf;
@@ -1291,25 +1291,25 @@ ndloop_copy_from_buffer(na_buffer_copy_t *lp)
1291
1291
 
1292
1292
 
1293
1293
  static void
1294
- ndfunc_write_back(ndfunc_t *nf, na_md_loop_t *lp, VALUE orig_args, VALUE results)
1294
+ cumo_ndfunc_write_back(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE orig_args, VALUE results)
1295
1295
  {
1296
1296
  VALUE src, dst;
1297
1297
 
1298
1298
  if (lp->writeback >= 0) {
1299
1299
  dst = RARRAY_AREF(orig_args,lp->writeback);
1300
1300
  src = RARRAY_AREF(results,0);
1301
- na_store(dst,src);
1301
+ cumo_na_store(dst,src);
1302
1302
  RARRAY_ASET(results,0,dst);
1303
1303
  }
1304
1304
  }
1305
1305
 
1306
1306
 
1307
1307
  static VALUE
1308
- ndloop_extract(VALUE results, ndfunc_t *nf)
1308
+ ndloop_extract(VALUE results, cumo_ndfunc_t *nf)
1309
1309
  {
1310
1310
  // long n, i;
1311
1311
  // VALUE x, y;
1312
- // narray_t *na;
1312
+ // cumo_narray_t *na;
1313
1313
 
1314
1314
  // extract result objects
1315
1315
  switch(nf->nout) {
@@ -1318,24 +1318,24 @@ ndloop_extract(VALUE results, ndfunc_t *nf)
1318
1318
  case 1:
1319
1319
  return RARRAY_AREF(results,0);
1320
1320
  // x = RARRAY_AREF(results,0);
1321
- // if (NDF_TEST(nf,NDF_EXTRACT)) {
1322
- // if (IsNArray(x)){
1323
- // GetNArray(x,na);
1324
- // if (NA_NDIM(na)==0) {
1325
- // x = rb_funcall(x, id_extract, 0);
1321
+ // if (CUMO_NDF_TEST(nf,CUMO_NDF_EXTRACT)) {
1322
+ // if (CumoIsNArray(x)){
1323
+ // CumoGetNArray(x,na);
1324
+ // if (CUMO_NA_NDIM(na)==0) {
1325
+ // x = rb_funcall(x, cumo_id_extract, 0);
1326
1326
  // }
1327
1327
  // }
1328
1328
  // }
1329
1329
  // return x;
1330
1330
  }
1331
- // if (NDF_TEST(nf,NDF_EXTRACT)) {
1331
+ // if (CUMO_NDF_TEST(nf,CUMO_NDF_EXTRACT)) {
1332
1332
  // n = RARRAY_LEN(results);
1333
1333
  // for (i=0; i<n; i++) {
1334
1334
  // x = RARRAY_AREF(results,i);
1335
- // if (IsNArray(x)){
1336
- // GetNArray(x,na);
1337
- // if (NA_NDIM(na)==0) {
1338
- // y = rb_funcall(x, id_extract, 0);
1335
+ // if (CumoIsNArray(x)){
1336
+ // CumoGetNArray(x,na);
1337
+ // if (CUMO_NA_NDIM(na)==0) {
1338
+ // y = rb_funcall(x, cumo_id_extract, 0);
1339
1339
  // RARRAY_ASET(results,i,y);
1340
1340
  // }
1341
1341
  // }
@@ -1345,7 +1345,7 @@ ndloop_extract(VALUE results, ndfunc_t *nf)
1345
1345
  }
1346
1346
 
1347
1347
  static bool
1348
- loop_is_using_idx(na_md_loop_t *lp)
1348
+ loop_is_using_idx(cumo_na_md_loop_t *lp)
1349
1349
  {
1350
1350
  int i, j;
1351
1351
  int nd = lp->ndim;
@@ -1367,14 +1367,14 @@ loop_is_using_idx(na_md_loop_t *lp)
1367
1367
  }
1368
1368
 
1369
1369
  static void
1370
- loop_narray(ndfunc_t *nf, na_md_loop_t *lp);
1370
+ loop_narray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp);
1371
1371
 
1372
1372
  static VALUE
1373
1373
  ndloop_run(VALUE vlp)
1374
1374
  {
1375
1375
  volatile VALUE args, orig_args, results;
1376
- na_md_loop_t *lp = (na_md_loop_t*)(vlp);
1377
- ndfunc_t *nf;
1376
+ cumo_na_md_loop_t *lp = (cumo_na_md_loop_t*)(vlp);
1377
+ cumo_ndfunc_t *nf;
1378
1378
 
1379
1379
  orig_args = lp->vargs;
1380
1380
  nf = lp->ndfunc;
@@ -1384,49 +1384,49 @@ ndloop_run(VALUE vlp)
1384
1384
  // setup ndloop iterator with arguments
1385
1385
  ndloop_init_args(nf, lp, args);
1386
1386
  results = ndloop_set_output(nf, lp, args);
1387
- //if (na_debug_flag) {
1387
+ //if (cumo_na_debug_flag) {
1388
1388
  // printf("-- ndloop_set_output --\n");
1389
1389
  // print_ndloop(lp);
1390
1390
  //}
1391
1391
 
1392
1392
  // contract loop (compact dimessions)
1393
- if (NDF_TEST(nf,NDF_INDEXER_LOOP) && NDF_TEST(nf,NDF_FLAT_REDUCE)) {
1393
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP) && CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE)) {
1394
1394
  // do nothing
1395
1395
  // TODO(sonots): support compacting dimensions in reduction indexer loop if it allows speed up.
1396
1396
  } else {
1397
1397
  if (lp->loop_func == loop_narray) {
1398
- ndfunc_contract_loop(lp);
1399
- if (na_debug_flag) {
1400
- printf("-- ndfunc_contract_loop --\n");
1398
+ cumo_ndfunc_contract_loop(lp);
1399
+ if (cumo_na_debug_flag) {
1400
+ printf("-- cumo_ndfunc_contract_loop --\n");
1401
1401
  print_ndloop(lp);
1402
1402
  }
1403
1403
  }
1404
1404
  }
1405
1405
 
1406
1406
  // setup lp->user
1407
- if (NDF_TEST(nf,NDF_INDEXER_LOOP)) {
1408
- ndfunc_set_user_indexer_loop(nf, lp);
1409
- if (na_debug_flag) {
1410
- printf("-- ndfunc_set_user_indexer_loop --\n");
1407
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP)) {
1408
+ cumo_ndfunc_set_user_indexer_loop(nf, lp);
1409
+ if (cumo_na_debug_flag) {
1410
+ printf("-- cumo_ndfunc_set_user_indexer_loop --\n");
1411
1411
  print_ndloop(lp);
1412
1412
  }
1413
1413
  } else {
1414
- ndfunc_set_user_loop(nf, lp);
1415
- if (na_debug_flag) {
1416
- printf("-- ndfunc_set_user_loop --\n");
1414
+ cumo_ndfunc_set_user_loop(nf, lp);
1415
+ if (cumo_na_debug_flag) {
1416
+ printf("-- cumo_ndfunc_set_user_loop --\n");
1417
1417
  print_ndloop(lp);
1418
1418
  }
1419
1419
  }
1420
1420
 
1421
1421
  // setup buffering during loop
1422
- if (NDF_TEST(nf,NDF_INDEXER_LOOP) && NDF_TEST(nf,NDF_FLAT_REDUCE) && !loop_is_using_idx(lp)) {
1422
+ if (CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP) && CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE) && !loop_is_using_idx(lp)) {
1423
1423
  // do nothing
1424
1424
  } else {
1425
1425
  if (lp->loop_func == loop_narray) {
1426
- ndfunc_set_bufcp(nf, lp);
1426
+ cumo_ndfunc_set_bufcp(nf, lp);
1427
1427
  }
1428
- if (na_debug_flag) {
1429
- printf("-- ndfunc_set_bufcp --\n");
1428
+ if (cumo_na_debug_flag) {
1429
+ printf("-- cumo_ndfunc_set_bufcp --\n");
1430
1430
  print_ndloop(lp);
1431
1431
  }
1432
1432
  }
@@ -1439,7 +1439,7 @@ ndloop_run(VALUE vlp)
1439
1439
  }
1440
1440
 
1441
1441
  // write-back will be placed here
1442
- ndfunc_write_back(nf, lp, orig_args, results);
1442
+ cumo_ndfunc_write_back(nf, lp, orig_args, results);
1443
1443
 
1444
1444
  // extract result objects
1445
1445
  return ndloop_extract(results, nf);
@@ -1449,7 +1449,7 @@ ndloop_run(VALUE vlp)
1449
1449
  // ---------------------------------------------------------------------------
1450
1450
 
1451
1451
  static void
1452
- loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1452
+ loop_narray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1453
1453
  {
1454
1454
  size_t *c;
1455
1455
  int i, j;
@@ -1459,7 +1459,7 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1459
1459
  rb_bug("bug? lp->ndim = %d\n", lp->ndim);
1460
1460
  }
1461
1461
 
1462
- if (nd==0 || NDF_TEST(nf,NDF_INDEXER_LOOP)) {
1462
+ if (nd==0 || CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP)) {
1463
1463
  for (j=0; j<lp->nin; j++) {
1464
1464
  if (lp->xargs[j].bufcp) {
1465
1465
  //printf("copy_to_buffer j=%d\n",j);
@@ -1468,7 +1468,7 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1468
1468
  }
1469
1469
  (*(nf->func))(&(lp->user));
1470
1470
  for (j=0; j<lp->narg; j++) {
1471
- if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1471
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & CUMO_NDL_WRITE)) {
1472
1472
  //printf("copy_from_buffer j=%d\n",j);
1473
1473
  // copy data to work buffer
1474
1474
  ndloop_copy_from_buffer(lp->xargs[j].bufcp);
@@ -1505,7 +1505,7 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1505
1505
  }
1506
1506
  (*(nf->func))(&(lp->user));
1507
1507
  for (j=0; j<lp->narg; j++) {
1508
- if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1508
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & CUMO_NDL_WRITE)) {
1509
1509
  // copy data to work buffer
1510
1510
  //printf("copy_from_buffer j=%d\n",j);
1511
1511
  ndloop_copy_from_buffer(lp->xargs[j].bufcp);
@@ -1526,12 +1526,12 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1526
1526
 
1527
1527
 
1528
1528
  static VALUE
1529
- na_ndloop_main(ndfunc_t *nf, VALUE args, void *opt_ptr)
1529
+ cumo_na_ndloop_main(cumo_ndfunc_t *nf, VALUE args, void *opt_ptr)
1530
1530
  {
1531
1531
  unsigned int copy_flag;
1532
- na_md_loop_t lp;
1532
+ cumo_na_md_loop_t lp;
1533
1533
 
1534
- if (na_debug_flag) print_ndfunc(nf);
1534
+ if (cumo_na_debug_flag) print_ndfunc(nf);
1535
1535
 
1536
1536
  // cast arguments to NArray
1537
1537
  copy_flag = ndloop_cast_args(nf, args);
@@ -1545,10 +1545,10 @@ na_ndloop_main(ndfunc_t *nf, VALUE args, void *opt_ptr)
1545
1545
 
1546
1546
  VALUE
1547
1547
  #ifdef HAVE_STDARG_PROTOTYPES
1548
- na_ndloop(ndfunc_t *nf, int argc, ...)
1548
+ cumo_na_ndloop(cumo_ndfunc_t *nf, int argc, ...)
1549
1549
  #else
1550
- na_ndloop(nf, argc, va_alist)
1551
- ndfunc_t *nf;
1550
+ cumo_na_ndloop(nf, argc, va_alist)
1551
+ cumo_ndfunc_t *nf;
1552
1552
  int argc;
1553
1553
  va_dcl
1554
1554
  #endif
@@ -1569,22 +1569,22 @@ na_ndloop(nf, argc, va_alist)
1569
1569
 
1570
1570
  args = rb_ary_new4(argc, argv);
1571
1571
 
1572
- return na_ndloop_main(nf, args, NULL);
1572
+ return cumo_na_ndloop_main(nf, args, NULL);
1573
1573
  }
1574
1574
 
1575
1575
 
1576
1576
  VALUE
1577
- na_ndloop2(ndfunc_t *nf, VALUE args)
1577
+ cumo_na_ndloop2(cumo_ndfunc_t *nf, VALUE args)
1578
1578
  {
1579
- return na_ndloop_main(nf, args, NULL);
1579
+ return cumo_na_ndloop_main(nf, args, NULL);
1580
1580
  }
1581
1581
 
1582
1582
  VALUE
1583
1583
  #ifdef HAVE_STDARG_PROTOTYPES
1584
- na_ndloop3(ndfunc_t *nf, void *ptr, int argc, ...)
1584
+ cumo_na_ndloop3(cumo_ndfunc_t *nf, void *ptr, int argc, ...)
1585
1585
  #else
1586
- na_ndloop3(nf, ptr, argc, va_alist)
1587
- ndfunc_t *nf;
1586
+ cumo_na_ndloop3(nf, ptr, argc, va_alist)
1587
+ cumo_ndfunc_t *nf;
1588
1588
  void *ptr;
1589
1589
  int argc;
1590
1590
  va_dcl
@@ -1606,30 +1606,30 @@ na_ndloop3(nf, ptr, argc, va_alist)
1606
1606
 
1607
1607
  args = rb_ary_new4(argc, argv);
1608
1608
 
1609
- return na_ndloop_main(nf, args, ptr);
1609
+ return cumo_na_ndloop_main(nf, args, ptr);
1610
1610
  }
1611
1611
 
1612
1612
  VALUE
1613
- na_ndloop4(ndfunc_t *nf, void *ptr, VALUE args)
1613
+ cumo_na_ndloop4(cumo_ndfunc_t *nf, void *ptr, VALUE args)
1614
1614
  {
1615
- return na_ndloop_main(nf, args, ptr);
1615
+ return cumo_na_ndloop_main(nf, args, ptr);
1616
1616
  }
1617
1617
 
1618
1618
  //----------------------------------------------------------------------
1619
1619
 
1620
1620
  VALUE
1621
- na_info_str(VALUE ary)
1621
+ cumo_na_info_str(VALUE ary)
1622
1622
  {
1623
1623
  int nd, i;
1624
1624
  char tmp[32];
1625
1625
  VALUE buf;
1626
- narray_t *na;
1626
+ cumo_narray_t *na;
1627
1627
 
1628
- GetNArray(ary,na);
1628
+ CumoGetNArray(ary,na);
1629
1629
  nd = na->ndim;
1630
1630
 
1631
- buf = rb_str_new2(rb_class2name(CLASS_OF(ary)));
1632
- if (NA_TYPE(na) == NARRAY_VIEW_T) {
1631
+ buf = rb_str_new2(rb_class2name(rb_obj_class(ary)));
1632
+ if (CUMO_NA_TYPE(na) == CUMO_NARRAY_VIEW_T) {
1633
1633
  rb_str_cat(buf,"(view)",6);
1634
1634
  }
1635
1635
  rb_str_cat(buf,"#shape=[",8);
@@ -1648,19 +1648,20 @@ na_info_str(VALUE ary)
1648
1648
 
1649
1649
  //----------------------------------------------------------------------
1650
1650
 
1651
- #define ncol cumo_na_inspect_cols
1652
- #define nrow cumo_na_inspect_rows
1653
- extern int ncol, nrow;
1651
+ extern int cumo_na_inspect_cols_;
1652
+ extern int cumo_na_inspect_rows_;
1653
+ #define ncol cumo_na_inspect_cols_
1654
+ #define nrow cumo_na_inspect_rows_
1654
1655
 
1655
1656
  static void
1656
- loop_inspect(ndfunc_t *nf, na_md_loop_t *lp)
1657
+ loop_inspect(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1657
1658
  {
1658
1659
  int nd, i, ii;
1659
1660
  size_t *c;
1660
1661
  int col=0, row=0;
1661
1662
  long len;
1662
1663
  VALUE str;
1663
- na_text_func_t func = (na_text_func_t)(nf->func);
1664
+ cumo_na_text_func_t func = (cumo_na_text_func_t)(nf->func);
1664
1665
  VALUE buf, opt;
1665
1666
 
1666
1667
  nd = lp->ndim;
@@ -1734,23 +1735,23 @@ loop_inspect(ndfunc_t *nf, na_md_loop_t *lp)
1734
1735
 
1735
1736
 
1736
1737
  VALUE
1737
- na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt)
1738
+ cumo_na_ndloop_inspect(VALUE nary, cumo_na_text_func_t func, VALUE opt)
1738
1739
  {
1739
1740
  volatile VALUE args;
1740
- na_md_loop_t lp;
1741
+ cumo_na_md_loop_t lp;
1741
1742
  VALUE buf;
1742
- ndfunc_arg_in_t ain[3] = {{Qnil,0},{sym_loop_opt},{sym_option}};
1743
- ndfunc_t nf = { (na_iter_func_t)func, NO_LOOP, 3, 0, ain, 0 };
1744
- //nf = ndfunc_alloc(NULL, NO_LOOP, 1, 0, Qnil);
1743
+ cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{cumo_sym_loop_opt},{cumo_sym_option}};
1744
+ cumo_ndfunc_t nf = { (cumo_na_iter_func_t)func, CUMO_NO_LOOP, 3, 0, ain, 0 };
1745
+ //nf = cumo_ndfunc_alloc(NULL, CUMO_NO_LOOP, 1, 0, Qnil);
1745
1746
 
1746
- buf = na_info_str(nary);
1747
+ buf = cumo_na_info_str(nary);
1747
1748
 
1748
- if (na_get_pointer(nary)==NULL) {
1749
+ if (cumo_na_get_pointer(nary)==NULL) {
1749
1750
  return rb_str_cat(buf,"(empty)",7);
1750
1751
  }
1751
1752
 
1752
1753
  //rb_p(args);
1753
- //if (na_debug_flag) print_ndfunc(&nf);
1754
+ //if (cumo_na_debug_flag) print_ndfunc(&nf);
1754
1755
 
1755
1756
  args = rb_ary_new3(3,nary,buf,opt);
1756
1757
 
@@ -1769,21 +1770,21 @@ na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt)
1769
1770
  //----------------------------------------------------------------------
1770
1771
 
1771
1772
  static void
1772
- loop_store_subnarray(ndfunc_t *nf, na_md_loop_t *lp, int i0, size_t *c, VALUE a)
1773
+ loop_store_subnarray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, int i0, size_t *c, VALUE a)
1773
1774
  {
1774
1775
  int nd = lp->ndim;
1775
1776
  int i, j;
1776
- narray_t *na;
1777
+ cumo_narray_t *na;
1777
1778
  int *dim_map;
1778
1779
  VALUE a_type;
1779
1780
 
1780
- a_type = CLASS_OF(LARG(lp,0).value);
1781
- if (CLASS_OF(a) != a_type) {
1782
- a = rb_funcall(a_type, id_cast, 1, a);
1781
+ a_type = rb_obj_class(LARG(lp,0).value);
1782
+ if (rb_obj_class(a) != a_type) {
1783
+ a = rb_funcall(a_type, cumo_id_cast, 1, a);
1783
1784
  }
1784
- GetNArray(a,na);
1785
+ CumoGetNArray(a,na);
1785
1786
  if (na->ndim != nd-i0+1) {
1786
- rb_raise(nary_eShapeError, "mismatched dimension of sub-narray: "
1787
+ rb_raise(cumo_na_eShapeError, "mismatched dimension of sub-narray: "
1787
1788
  "nd_src=%d, nd_dst=%d", na->ndim, nd-i0+1);
1788
1789
  }
1789
1790
  dim_map = ALLOCA_N(int, na->ndim);
@@ -1791,7 +1792,7 @@ loop_store_subnarray(ndfunc_t *nf, na_md_loop_t *lp, int i0, size_t *c, VALUE a)
1791
1792
  dim_map[i] = lp->trans_map[i+i0];
1792
1793
  //printf("dim_map[i=%d] = %d, i0=%d\n", i, dim_map[i], i0);
1793
1794
  }
1794
- ndloop_set_stepidx(lp, 1, a, dim_map, NDL_READ);
1795
+ ndloop_set_stepidx(lp, 1, a, dim_map, CUMO_NDL_READ);
1795
1796
  LARG(lp,1).shape = &(na->shape[na->ndim-1]);
1796
1797
 
1797
1798
  // loop body
@@ -1825,7 +1826,7 @@ loop_store_subnarray(ndfunc_t *nf, na_md_loop_t *lp, int i0, size_t *c, VALUE a)
1825
1826
 
1826
1827
 
1827
1828
  static void
1828
- loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1829
+ loop_store_rarray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1829
1830
  {
1830
1831
  size_t *c;
1831
1832
  int i;
@@ -1856,7 +1857,7 @@ loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1856
1857
  } else {
1857
1858
  a[i+1] = Qnil;
1858
1859
  }
1859
- } else if (IsNArray(a[i])) {
1860
+ } else if (CumoIsNArray(a[i])) {
1860
1861
  //printf("a[i=%d]=0x%lx\n",i,a[i]);
1861
1862
  loop_store_subnarray(nf,lp,i,c,a[i]);
1862
1863
  goto loop_next;
@@ -1871,7 +1872,7 @@ loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1871
1872
  }
1872
1873
 
1873
1874
  //printf("a[i=%d]=0x%lx\n",i,a[i]);
1874
- if (IsNArray(a[i])) {
1875
+ if (CumoIsNArray(a[i])) {
1875
1876
  loop_store_subnarray(nf,lp,i,c,a[i]);
1876
1877
  } else {
1877
1878
  LARG(lp,1).value = a[i];
@@ -1891,13 +1892,13 @@ loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1891
1892
  }
1892
1893
 
1893
1894
  VALUE
1894
- na_ndloop_store_rarray(ndfunc_t *nf, VALUE nary, VALUE rary)
1895
+ cumo_na_ndloop_store_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE rary)
1895
1896
  {
1896
- na_md_loop_t lp;
1897
+ cumo_na_md_loop_t lp;
1897
1898
  VALUE args;
1898
1899
 
1899
1900
  //rb_p(args);
1900
- if (na_debug_flag) print_ndfunc(nf);
1901
+ if (cumo_na_debug_flag) print_ndfunc(nf);
1901
1902
 
1902
1903
  args = rb_assoc_new(nary,rary);
1903
1904
 
@@ -1912,13 +1913,13 @@ na_ndloop_store_rarray(ndfunc_t *nf, VALUE nary, VALUE rary)
1912
1913
 
1913
1914
 
1914
1915
  VALUE
1915
- na_ndloop_store_rarray2(ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt)
1916
+ cumo_na_ndloop_store_rarray2(cumo_ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt)
1916
1917
  {
1917
- na_md_loop_t lp;
1918
+ cumo_na_md_loop_t lp;
1918
1919
  VALUE args;
1919
1920
 
1920
1921
  //rb_p(args);
1921
- if (na_debug_flag) print_ndfunc(nf);
1922
+ if (cumo_na_debug_flag) print_ndfunc(nf);
1922
1923
 
1923
1924
  //args = rb_assoc_new(rary,nary);
1924
1925
  args = rb_ary_new3(3,nary,rary,opt);
@@ -1936,7 +1937,7 @@ na_ndloop_store_rarray2(ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt)
1936
1937
  //----------------------------------------------------------------------
1937
1938
 
1938
1939
  static void
1939
- loop_narray_to_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1940
+ loop_narray_to_rarray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
1940
1941
  {
1941
1942
  size_t *c;
1942
1943
  int i;
@@ -1984,13 +1985,13 @@ loop_narray_to_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1984
1985
  }
1985
1986
 
1986
1987
  VALUE
1987
- na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt)
1988
+ cumo_na_ndloop_cast_narray_to_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE fmt)
1988
1989
  {
1989
- na_md_loop_t lp;
1990
+ cumo_na_md_loop_t lp;
1990
1991
  VALUE args, a0;
1991
1992
 
1992
1993
  //rb_p(args);
1993
- if (na_debug_flag) print_ndfunc(nf);
1994
+ if (cumo_na_debug_flag) print_ndfunc(nf);
1994
1995
 
1995
1996
  a0 = rb_ary_new();
1996
1997
  args = rb_ary_new3(3,nary,a0,fmt);
@@ -2009,7 +2010,7 @@ na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt)
2009
2010
  //----------------------------------------------------------------------
2010
2011
 
2011
2012
  static void
2012
- loop_narray_with_index(ndfunc_t *nf, na_md_loop_t *lp)
2013
+ loop_narray_with_index(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
2013
2014
  {
2014
2015
  size_t *c;
2015
2016
  int i,j;
@@ -2059,10 +2060,10 @@ loop_narray_with_index(ndfunc_t *nf, na_md_loop_t *lp)
2059
2060
 
2060
2061
  VALUE
2061
2062
  #ifdef HAVE_STDARG_PROTOTYPES
2062
- na_ndloop_with_index(ndfunc_t *nf, int argc, ...)
2063
+ cumo_na_ndloop_with_index(cumo_ndfunc_t *nf, int argc, ...)
2063
2064
  #else
2064
- na_ndloop_with_index(nf, argc, va_alist)
2065
- ndfunc_t *nf;
2065
+ cumo_na_ndloop_with_index(nf, argc, va_alist)
2066
+ cumo_ndfunc_t *nf;
2066
2067
  int argc;
2067
2068
  va_dcl
2068
2069
  #endif
@@ -2072,7 +2073,7 @@ na_ndloop_with_index(nf, argc, va_alist)
2072
2073
  int i;
2073
2074
  VALUE *argv;
2074
2075
  volatile VALUE args;
2075
- na_md_loop_t lp;
2076
+ cumo_na_md_loop_t lp;
2076
2077
 
2077
2078
  argv = ALLOCA_N(VALUE,argc);
2078
2079
 
@@ -2084,8 +2085,8 @@ na_ndloop_with_index(nf, argc, va_alist)
2084
2085
 
2085
2086
  args = rb_ary_new4(argc, argv);
2086
2087
 
2087
- //return na_ndloop_main(nf, args, NULL);
2088
- if (na_debug_flag) print_ndfunc(nf);
2088
+ //return cumo_na_ndloop_main(nf, args, NULL);
2089
+ if (cumo_na_debug_flag) print_ndfunc(nf);
2089
2090
 
2090
2091
  // cast arguments to NArray
2091
2092
  //copy_flag = ndloop_cast_args(nf, args);
@@ -2098,8 +2099,8 @@ na_ndloop_with_index(nf, argc, va_alist)
2098
2099
 
2099
2100
 
2100
2101
  void
2101
- Init_cumo_nary_ndloop()
2102
+ Init_cumo_na_ndloop()
2102
2103
  {
2103
- id_cast = rb_intern("cast");
2104
- id_extract = rb_intern("extract");
2104
+ cumo_id_cast = rb_intern("cast");
2105
+ cumo_id_extract = rb_intern("extract");
2105
2106
  }