cumo 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/3rd_party/LICENSE.txt +60 -0
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +13 -1
- data/LICENSE.txt +1 -62
- data/README.md +33 -29
- data/bench/cumo_bench.rb +47 -25
- data/bench/numo_bench.rb +27 -25
- data/docs/src-tree.md +16 -0
- data/ext/cumo/cuda/cublas.c +69 -219
- data/ext/cumo/cuda/memory_pool_impl.hpp +1 -0
- data/ext/cumo/cuda/runtime.c +2 -14
- data/ext/cumo/cumo.c +16 -16
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cublas.h +6 -129
- data/ext/cumo/include/cumo/cuda/runtime.h +16 -0
- data/ext/cumo/include/cumo/indexer.h +46 -63
- data/ext/cumo/include/cumo/intern.h +58 -112
- data/ext/cumo/include/cumo/narray.h +214 -185
- data/ext/cumo/include/cumo/narray_kernel.h +66 -37
- data/ext/cumo/include/cumo/ndloop.h +42 -42
- data/ext/cumo/include/cumo/reduce_kernel.h +55 -71
- data/ext/cumo/include/cumo/template.h +56 -51
- data/ext/cumo/include/cumo/template_kernel.h +31 -31
- data/ext/cumo/include/cumo/types/bit.h +3 -3
- data/ext/cumo/include/cumo/types/bit_kernel.h +2 -2
- data/ext/cumo/include/cumo/types/complex.h +126 -126
- data/ext/cumo/include/cumo/types/complex_kernel.h +126 -126
- data/ext/cumo/include/cumo/types/complex_macro.h +28 -28
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +20 -20
- data/ext/cumo/include/cumo/types/dcomplex.h +5 -5
- data/ext/cumo/include/cumo/types/dcomplex_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro.h +1 -1
- data/ext/cumo/include/cumo/types/int_macro_kernel.h +1 -1
- data/ext/cumo/include/cumo/types/robj_macro.h +30 -30
- data/ext/cumo/include/cumo/types/scomplex.h +5 -5
- data/ext/cumo/include/cumo/types/scomplex_kernel.h +1 -1
- data/ext/cumo/narray/array.c +143 -143
- data/ext/cumo/narray/data.c +184 -184
- data/ext/cumo/narray/gen/cogen.rb +5 -2
- data/ext/cumo/narray/gen/cogen_kernel.rb +5 -2
- data/ext/cumo/narray/gen/def/dcomplex.rb +1 -1
- data/ext/cumo/narray/gen/def/scomplex.rb +1 -1
- data/ext/cumo/narray/gen/erbln.rb +132 -0
- data/ext/cumo/narray/gen/erbpp2.rb +18 -13
- data/ext/cumo/narray/gen/narray_def.rb +3 -3
- data/ext/cumo/narray/gen/spec.rb +2 -2
- data/ext/cumo/narray/gen/tmpl/accum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +22 -22
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/accum_index.c +30 -30
- data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +14 -14
- data/ext/cumo/narray/gen/tmpl/allocate.c +11 -11
- data/ext/cumo/narray/gen/tmpl/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl/aref_cpu.c +4 -4
- data/ext/cumo/narray/gen/tmpl/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl/binary.c +28 -28
- data/ext/cumo/narray/gen/tmpl/binary2.c +18 -18
- data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/binary_s.c +13 -13
- data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/bincount.c +23 -23
- data/ext/cumo/narray/gen/tmpl/cast.c +7 -7
- data/ext/cumo/narray/gen/tmpl/cast_array.c +3 -3
- data/ext/cumo/narray/gen/tmpl/clip.c +38 -38
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +2 -2
- data/ext/cumo/narray/gen/tmpl/cond_binary.c +19 -19
- data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +7 -7
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +15 -15
- data/ext/cumo/narray/gen/tmpl/cum.c +15 -15
- data/ext/cumo/narray/gen/tmpl/each.c +9 -9
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +9 -9
- data/ext/cumo/narray/gen/tmpl/ewcomp.c +15 -15
- data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/extract_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl/extract_data.c +12 -12
- data/ext/cumo/narray/gen/tmpl/eye.c +9 -9
- data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +3 -3
- data/ext/cumo/narray/gen/tmpl/fill.c +9 -9
- data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +1 -1
- data/ext/cumo/narray/gen/tmpl/format.c +11 -11
- data/ext/cumo/narray/gen/tmpl/format_to_a.c +8 -8
- data/ext/cumo/narray/gen/tmpl/frexp.c +13 -13
- data/ext/cumo/narray/gen/tmpl/gemm.c +252 -108
- data/ext/cumo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/cumo/narray/gen/tmpl/lib.c +2 -2
- data/ext/cumo/narray/gen/tmpl/logseq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +17 -17
- data/ext/cumo/narray/gen/tmpl/median.c +10 -10
- data/ext/cumo/narray/gen/tmpl/minmax.c +10 -10
- data/ext/cumo/narray/gen/tmpl/new_dim0.c +3 -3
- data/ext/cumo/narray/gen/tmpl/poly.c +6 -6
- data/ext/cumo/narray/gen/tmpl/pow.c +28 -28
- data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/rand.c +10 -10
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +7 -7
- data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/seq.c +7 -7
- data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +6 -6
- data/ext/cumo/narray/gen/tmpl/set2.c +20 -20
- data/ext/cumo/narray/gen/tmpl/sort.c +11 -11
- data/ext/cumo/narray/gen/tmpl/sort_index.c +18 -18
- data/ext/cumo/narray/gen/tmpl/store.c +6 -6
- data/ext/cumo/narray/gen/tmpl/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/store_bit.c +23 -23
- data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl/store_from.c +16 -16
- data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/to_a.c +10 -10
- data/ext/cumo/narray/gen/tmpl/unary.c +25 -25
- data/ext/cumo/narray/gen/tmpl/unary2.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +15 -15
- data/ext/cumo/narray/gen/tmpl/unary_ret2.c +13 -13
- data/ext/cumo/narray/gen/tmpl/unary_s.c +17 -17
- data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +9 -9
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +5 -5
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/binary.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +21 -21
- data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +28 -28
- data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +29 -29
- data/ext/cumo/narray/gen/tmpl_bit/each.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +10 -10
- data/ext/cumo/narray/gen/tmpl_bit/extract.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +8 -8
- data/ext/cumo/narray/gen/tmpl_bit/fill.c +17 -17
- data/ext/cumo/narray/gen/tmpl_bit/format.c +14 -14
- data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +11 -11
- data/ext/cumo/narray/gen/tmpl_bit/inspect.c +3 -3
- data/ext/cumo/narray/gen/tmpl_bit/mask.c +33 -33
- data/ext/cumo/narray/gen/tmpl_bit/store_array.c +19 -19
- data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +22 -22
- data/ext/cumo/narray/gen/tmpl_bit/store_from.c +18 -18
- data/ext/cumo/narray/gen/tmpl_bit/to_a.c +12 -12
- data/ext/cumo/narray/gen/tmpl_bit/unary.c +24 -24
- data/ext/cumo/narray/gen/tmpl_bit/where.c +16 -16
- data/ext/cumo/narray/gen/tmpl_bit/where2.c +20 -20
- data/ext/cumo/narray/index.c +213 -213
- data/ext/cumo/narray/math.c +27 -27
- data/ext/cumo/narray/narray.c +484 -484
- data/ext/cumo/narray/ndloop.c +259 -258
- data/ext/cumo/narray/rand.c +3 -3
- data/ext/cumo/narray/step.c +70 -70
- data/ext/cumo/narray/struct.c +139 -139
- metadata +6 -7
- data/ext/cumo/include/cumo/intern_fwd.h +0 -38
- data/lib/erbpp.rb +0 -294
- data/lib/erbpp/line_number.rb +0 -137
- data/lib/erbpp/narray_def.rb +0 -381
data/ext/cumo/narray/ndloop.c
CHANGED
@@ -17,33 +17,33 @@
|
|
17
17
|
#define va_init_list(a,b) va_start(a)
|
18
18
|
#endif
|
19
19
|
|
20
|
-
typedef struct
|
20
|
+
typedef struct CUMO_NA_BUFFER_COPY {
|
21
21
|
int ndim;
|
22
22
|
size_t elmsz;
|
23
23
|
size_t *n;
|
24
24
|
char *src_ptr;
|
25
25
|
char *buf_ptr;
|
26
|
-
|
27
|
-
|
28
|
-
}
|
29
|
-
|
30
|
-
typedef struct
|
31
|
-
|
32
|
-
|
33
|
-
int flag; //
|
26
|
+
cumo_na_loop_iter_t *src_iter;
|
27
|
+
cumo_na_loop_iter_t *buf_iter;
|
28
|
+
} cumo_na_buffer_copy_t;
|
29
|
+
|
30
|
+
typedef struct CUMO_NA_LOOP_XARGS {
|
31
|
+
cumo_na_loop_iter_t *iter; // moved from cumo_na_loop_t
|
32
|
+
cumo_na_buffer_copy_t *bufcp; // copy data to buffer
|
33
|
+
int flag; // CUMO_NDL_READ CUMO_NDL_WRITE
|
34
34
|
bool free_user_iter; // alloc LARG(lp,j).iter=lp->xargs[j].iter
|
35
|
-
}
|
35
|
+
} cumo_na_loop_xargs_t;
|
36
36
|
|
37
|
-
typedef struct
|
37
|
+
typedef struct CUMO_NA_MD_LOOP {
|
38
38
|
int narg;
|
39
39
|
int nin;
|
40
40
|
int ndim; // n of total dimention looped at loop_narray. NOTE: lp->ndim + lp-.user.ndim is the total dimension.
|
41
41
|
unsigned int copy_flag; // set i-th bit if i-th arg is cast
|
42
42
|
void *ptr; // memory for n
|
43
|
-
|
43
|
+
cumo_na_loop_iter_t *iter_ptr; // memory for iter
|
44
44
|
size_t *n; // n of elements for each dim (shape)
|
45
|
-
|
46
|
-
|
45
|
+
cumo_na_loop_t user; // loop in user function
|
46
|
+
cumo_na_loop_xargs_t *xargs; // extra data for each arg
|
47
47
|
int writeback; // write back result to i-th arg
|
48
48
|
int init_aidx; // index of initializer argument
|
49
49
|
int reduce_dim; // number of dimensions to reduce in reduction kernel, e.g., for an array of shape: [2,3,4],
|
@@ -53,35 +53,35 @@ typedef struct NA_MD_LOOP {
|
|
53
53
|
VALUE reduce; // dimension indicies to reduce in reduction kernel (in bits), e.g., for an array of shape:
|
54
54
|
// [2,3,4], 111b for sum(), 010b for sum(axis: 1), 110b for sum(axis: [1,2])
|
55
55
|
VALUE loop_opt;
|
56
|
-
|
56
|
+
cumo_ndfunc_t *ndfunc;
|
57
57
|
void (*loop_func)();
|
58
|
-
}
|
58
|
+
} cumo_na_md_loop_t;
|
59
59
|
|
60
60
|
#define LARG(lp,iarg) ((lp)->user.args[iarg])
|
61
61
|
#define LITER(lp,idim,iarg) ((lp)->xargs[iarg].iter[idim])
|
62
62
|
#define LITER_SRC(lp,idim) ((lp)->src_iter[idim])
|
63
63
|
#define LBUFCP(lp,j) ((lp)->xargs[j].bufcp)
|
64
64
|
|
65
|
-
#define CASTABLE(t) (RTEST(t) && (t)!=
|
65
|
+
#define CASTABLE(t) (RTEST(t) && (t)!=CUMO_OVERWRITE)
|
66
66
|
|
67
|
-
#define
|
68
|
-
#define
|
69
|
-
#define
|
67
|
+
#define CUMO_NDL_READ 1
|
68
|
+
#define CUMO_NDL_WRITE 2
|
69
|
+
#define CUMO_NDL_READ_WRITE (CUMO_NDL_READ|CUMO_NDL_WRITE)
|
70
70
|
|
71
|
-
static ID
|
72
|
-
static ID
|
71
|
+
static ID cumo_id_cast;
|
72
|
+
static ID cumo_id_extract;
|
73
73
|
|
74
74
|
static inline VALUE
|
75
|
-
|
75
|
+
cumo_na_type_s_cast(VALUE type, VALUE obj)
|
76
76
|
{
|
77
|
-
return rb_funcall(type,
|
77
|
+
return rb_funcall(type,cumo_id_cast,1,obj);
|
78
78
|
}
|
79
79
|
|
80
80
|
static void
|
81
|
-
print_ndfunc(
|
81
|
+
print_ndfunc(cumo_ndfunc_t *nf) {
|
82
82
|
volatile VALUE t;
|
83
83
|
int i, k;
|
84
|
-
printf("
|
84
|
+
printf("cumo_ndfunc_t = 0x%"SZF"x {\n",(size_t)nf);
|
85
85
|
printf(" func = 0x%"SZF"x\n", (size_t)nf->func);
|
86
86
|
printf(" flag = 0x%"SZF"x\n", (size_t)nf->flag);
|
87
87
|
printf(" nin = %d\n", nf->nin);
|
@@ -106,9 +106,9 @@ print_ndfunc(ndfunc_t *nf) {
|
|
106
106
|
|
107
107
|
|
108
108
|
static void
|
109
|
-
print_ndloop(
|
109
|
+
print_ndloop(cumo_na_md_loop_t *lp) {
|
110
110
|
int i,j,nd;
|
111
|
-
printf("
|
111
|
+
printf("cumo_na_md_loop_t = 0x%"SZF"x {\n",(size_t)lp);
|
112
112
|
printf(" narg = %d\n", lp->narg);
|
113
113
|
printf(" nin = %d\n", lp->nin);
|
114
114
|
printf(" ndim = %d\n", lp->ndim);
|
@@ -190,18 +190,18 @@ print_ndloop(na_md_loop_t *lp) {
|
|
190
190
|
}
|
191
191
|
|
192
192
|
|
193
|
-
// returns 0x01 if
|
194
|
-
// returns 0x02 if
|
193
|
+
// returns 0x01 if CUMO_NDF_HAS_LOOP, but not supporting CUMO_NDF_STRIDE_LOOP
|
194
|
+
// returns 0x02 if CUMO_NDF_HAS_LOOP, but not supporting CUMO_NDF_INDEX_LOOP
|
195
195
|
static unsigned int
|
196
|
-
ndloop_func_loop_spec(
|
196
|
+
ndloop_func_loop_spec(cumo_ndfunc_t *nf, int user_ndim)
|
197
197
|
{
|
198
198
|
unsigned int f=0;
|
199
199
|
// If user function supports LOOP
|
200
|
-
if (user_ndim > 0 ||
|
201
|
-
if (!
|
200
|
+
if (user_ndim > 0 || CUMO_NDF_TEST(nf,CUMO_NDF_HAS_LOOP)) {
|
201
|
+
if (!CUMO_NDF_TEST(nf,CUMO_NDF_STRIDE_LOOP)) {
|
202
202
|
f |= 1;
|
203
203
|
}
|
204
|
-
if (!
|
204
|
+
if (!CUMO_NDF_TEST(nf,CUMO_NDF_INDEX_LOOP)) {
|
205
205
|
f |= 2;
|
206
206
|
}
|
207
207
|
}
|
@@ -214,7 +214,7 @@ ndloop_func_loop_spec(ndfunc_t *nf, int user_ndim)
|
|
214
214
|
static int
|
215
215
|
ndloop_cast_required(VALUE type, VALUE value)
|
216
216
|
{
|
217
|
-
return CASTABLE(type) && type !=
|
217
|
+
return CASTABLE(type) && type != rb_obj_class(value);
|
218
218
|
}
|
219
219
|
|
220
220
|
static int
|
@@ -237,7 +237,7 @@ ndloop_cast_error(VALUE type, VALUE value)
|
|
237
237
|
// to type specified by nf->args[j].type
|
238
238
|
// returns copy_flag where nth-bit is set if nth argument is converted.
|
239
239
|
static unsigned int
|
240
|
-
ndloop_cast_args(
|
240
|
+
ndloop_cast_args(cumo_ndfunc_t *nf, VALUE args)
|
241
241
|
{
|
242
242
|
int j;
|
243
243
|
unsigned int copy_flag=0;
|
@@ -253,7 +253,7 @@ ndloop_cast_args(ndfunc_t *nf, VALUE args)
|
|
253
253
|
continue;
|
254
254
|
|
255
255
|
if (ndloop_castable_type(type)) {
|
256
|
-
RARRAY_ASET(args,j,
|
256
|
+
RARRAY_ASET(args,j,cumo_na_type_s_cast(type, value));
|
257
257
|
copy_flag |= 1<<j;
|
258
258
|
} else {
|
259
259
|
ndloop_cast_error(type, value);
|
@@ -266,18 +266,18 @@ ndloop_cast_args(ndfunc_t *nf, VALUE args)
|
|
266
266
|
|
267
267
|
|
268
268
|
static void
|
269
|
-
ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at,
|
269
|
+
ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, cumo_na_md_loop_t *lp)
|
270
270
|
{
|
271
|
-
if (type==
|
271
|
+
if (type==cumo_sym_reduce) {
|
272
272
|
lp->reduce = value;
|
273
273
|
}
|
274
|
-
else if (type==
|
274
|
+
else if (type==cumo_sym_option) {
|
275
275
|
lp->user.option = value;
|
276
276
|
}
|
277
|
-
else if (type==
|
277
|
+
else if (type==cumo_sym_loop_opt) {
|
278
278
|
lp->loop_opt = value;
|
279
279
|
}
|
280
|
-
else if (type==
|
280
|
+
else if (type==cumo_sym_init) {
|
281
281
|
lp->init_aidx = at;
|
282
282
|
}
|
283
283
|
else {
|
@@ -292,7 +292,7 @@ max2(int x, int y)
|
|
292
292
|
}
|
293
293
|
|
294
294
|
static void
|
295
|
-
ndloop_find_max_dimension(
|
295
|
+
ndloop_find_max_dimension(cumo_na_md_loop_t *lp, cumo_ndfunc_t *nf, VALUE args)
|
296
296
|
{
|
297
297
|
int j;
|
298
298
|
int nin=0; // number of input objects (except for symbols)
|
@@ -307,8 +307,8 @@ ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
|
|
307
307
|
} else {
|
308
308
|
nin++;
|
309
309
|
user_nd = max2(user_nd, nf->ain[j].dim);
|
310
|
-
if (
|
311
|
-
loop_nd = max2(loop_nd,
|
310
|
+
if (CumoIsNArray(v))
|
311
|
+
loop_nd = max2(loop_nd, CUMO_RNARRAY_NDIM(v) - nf->ain[j].dim);
|
312
312
|
}
|
313
313
|
}
|
314
314
|
|
@@ -329,9 +329,9 @@ ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
|
|
329
329
|
*/
|
330
330
|
|
331
331
|
static void
|
332
|
-
ndloop_alloc(
|
332
|
+
ndloop_alloc(cumo_na_md_loop_t *lp, cumo_ndfunc_t *nf, VALUE args,
|
333
333
|
void *opt_ptr, unsigned int copy_flag,
|
334
|
-
void (*loop_func)(
|
334
|
+
void (*loop_func)(cumo_ndfunc_t*, cumo_na_md_loop_t*))
|
335
335
|
{
|
336
336
|
int i,j;
|
337
337
|
int narg;
|
@@ -342,7 +342,7 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
|
|
342
342
|
|
343
343
|
long args_len;
|
344
344
|
|
345
|
-
|
345
|
+
cumo_na_loop_iter_t *iter;
|
346
346
|
|
347
347
|
int trans_dim;
|
348
348
|
unsigned int f;
|
@@ -375,19 +375,19 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
|
|
375
375
|
max_nd = lp->ndim + lp->user.ndim;
|
376
376
|
|
377
377
|
n1 = sizeof(size_t)*(max_nd+1);
|
378
|
-
n2 = sizeof(
|
378
|
+
n2 = sizeof(cumo_na_loop_xargs_t)*narg;
|
379
379
|
n2 = ((n2-1)/8+1)*8;
|
380
|
-
n3 = sizeof(
|
380
|
+
n3 = sizeof(cumo_na_loop_args_t)*narg;
|
381
381
|
n3 = ((n3-1)/8+1)*8;
|
382
|
-
n4 = sizeof(
|
382
|
+
n4 = sizeof(cumo_na_loop_iter_t)*narg*(max_nd+1);
|
383
383
|
n4 = ((n4-1)/8+1)*8;
|
384
384
|
n5 = sizeof(int)*(max_nd+1);
|
385
385
|
|
386
386
|
lp->ptr = buf = (char*)xmalloc(n1+n2+n3+n4+n5);
|
387
387
|
lp->n = (size_t*)buf; buf+=n1;
|
388
|
-
lp->xargs = (
|
389
|
-
lp->user.args = (
|
390
|
-
lp->iter_ptr = iter = (
|
388
|
+
lp->xargs = (cumo_na_loop_xargs_t*)buf; buf+=n2;
|
389
|
+
lp->user.args = (cumo_na_loop_args_t*)buf; buf+=n3;
|
390
|
+
lp->iter_ptr = iter = (cumo_na_loop_iter_t*)buf; buf+=n4;
|
391
391
|
lp->trans_map = (int*)buf;
|
392
392
|
|
393
393
|
for (j=0; j<narg; j++) {
|
@@ -397,7 +397,7 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
|
|
397
397
|
LARG(lp,j).ndim = 0;
|
398
398
|
lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
|
399
399
|
lp->xargs[j].bufcp = NULL;
|
400
|
-
lp->xargs[j].flag = (j<lp->nin) ?
|
400
|
+
lp->xargs[j].flag = (j<lp->nin) ? CUMO_NDL_READ : CUMO_NDL_WRITE;
|
401
401
|
lp->xargs[j].free_user_iter = 0;
|
402
402
|
}
|
403
403
|
|
@@ -414,10 +414,10 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
|
|
414
414
|
// array loop
|
415
415
|
// [*,+,*,+,*] => [*,*,*,+,+]
|
416
416
|
// trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
|
417
|
-
if (
|
417
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
|
418
418
|
trans_dim = 0;
|
419
419
|
for (i=0; i<max_nd; i++) {
|
420
|
-
if (
|
420
|
+
if (cumo_na_test_reduce(lp->reduce, i)) {
|
421
421
|
lp->trans_map[i] = -1;
|
422
422
|
} else {
|
423
423
|
lp->trans_map[i] = trans_dim++;
|
@@ -449,12 +449,12 @@ ndloop_release(VALUE vlp)
|
|
449
449
|
{
|
450
450
|
int j;
|
451
451
|
VALUE v;
|
452
|
-
|
452
|
+
cumo_na_md_loop_t *lp = (cumo_na_md_loop_t*)(vlp);
|
453
453
|
|
454
454
|
for (j=0; j < lp->narg; j++) {
|
455
455
|
v = LARG(lp,j).value;
|
456
|
-
if (
|
457
|
-
|
456
|
+
if (CumoIsNArray(v)) {
|
457
|
+
cumo_na_release_lock(v);
|
458
458
|
}
|
459
459
|
}
|
460
460
|
for (j=0; j<lp->narg; j++) {
|
@@ -483,7 +483,7 @@ ndloop_release(VALUE vlp)
|
|
483
483
|
set lp->n[i] (shape of n-d iteration) here
|
484
484
|
*/
|
485
485
|
static void
|
486
|
-
ndloop_check_shape(
|
486
|
+
ndloop_check_shape(cumo_na_md_loop_t *lp, int nf_dim, cumo_narray_t *na)
|
487
487
|
{
|
488
488
|
int i, k;
|
489
489
|
size_t n;
|
@@ -500,7 +500,7 @@ ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
|
|
500
500
|
lp->n[i] = n;
|
501
501
|
} else if (lp->n[i] != n) {
|
502
502
|
// inconsistent array shape
|
503
|
-
rb_raise(
|
503
|
+
rb_raise(cumo_na_eShapeError,"shape1[%d](=%"SZF"u) != shape2[%d](=%"SZF"u)",
|
504
504
|
i, lp->n[i], k, n);
|
505
505
|
}
|
506
506
|
}
|
@@ -512,37 +512,37 @@ ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
|
|
512
512
|
na->shape[i] == lp->n[ dim_map[i] ]
|
513
513
|
*/
|
514
514
|
static void
|
515
|
-
ndloop_set_stepidx(
|
515
|
+
ndloop_set_stepidx(cumo_na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
|
516
516
|
{
|
517
517
|
size_t n, s;
|
518
518
|
int i, k, nd;
|
519
|
-
|
520
|
-
|
519
|
+
cumo_stridx_t sdx;
|
520
|
+
cumo_narray_t *na;
|
521
521
|
|
522
522
|
LARG(lp,j).value = vna;
|
523
|
-
LARG(lp,j).elmsz =
|
524
|
-
if (rwflag ==
|
525
|
-
LARG(lp,j).ptr =
|
523
|
+
LARG(lp,j).elmsz = cumo_na_element_stride(vna);
|
524
|
+
if (rwflag == CUMO_NDL_READ) {
|
525
|
+
LARG(lp,j).ptr = cumo_na_get_pointer_for_read(vna);
|
526
526
|
} else
|
527
|
-
if (rwflag ==
|
528
|
-
LARG(lp,j).ptr =
|
527
|
+
if (rwflag == CUMO_NDL_WRITE) {
|
528
|
+
LARG(lp,j).ptr = cumo_na_get_pointer_for_write(vna);
|
529
529
|
} else
|
530
|
-
if (rwflag ==
|
531
|
-
LARG(lp,j).ptr =
|
530
|
+
if (rwflag == CUMO_NDL_READ_WRITE) {
|
531
|
+
LARG(lp,j).ptr = cumo_na_get_pointer_for_read_write(vna);
|
532
532
|
} else {
|
533
533
|
rb_bug("invalid value for read-write flag");
|
534
534
|
}
|
535
|
-
|
535
|
+
CumoGetNArray(vna,na);
|
536
536
|
nd = LARG(lp,j).ndim;
|
537
537
|
|
538
|
-
switch(
|
539
|
-
case
|
540
|
-
if (
|
538
|
+
switch(CUMO_NA_TYPE(na)) {
|
539
|
+
case CUMO_NARRAY_DATA_T:
|
540
|
+
if (CUMO_NA_DATA_PTR(na)==NULL && CUMO_NA_SIZE(na)>0) {
|
541
541
|
rb_bug("cannot read no-data NArray");
|
542
542
|
rb_raise(rb_eRuntimeError,"cannot read no-data NArray");
|
543
543
|
}
|
544
544
|
// through
|
545
|
-
case
|
545
|
+
case CUMO_NARRAY_FILEMAP_T:
|
546
546
|
s = LARG(lp,j).elmsz;
|
547
547
|
for (k=na->ndim; k--;) {
|
548
548
|
n = na->shape[k];
|
@@ -557,25 +557,25 @@ ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
|
|
557
557
|
}
|
558
558
|
LITER(lp,0,j).pos = 0;
|
559
559
|
break;
|
560
|
-
case
|
561
|
-
LITER(lp,0,j).pos =
|
560
|
+
case CUMO_NARRAY_VIEW_T:
|
561
|
+
LITER(lp,0,j).pos = CUMO_NA_VIEW_OFFSET(na);
|
562
562
|
for (k=0; k<na->ndim; k++) {
|
563
563
|
n = na->shape[k];
|
564
|
-
sdx =
|
564
|
+
sdx = CUMO_NA_VIEW_STRIDX(na)[k];
|
565
565
|
if (n > 1 || nd > 0) {
|
566
566
|
i = dim_map[k];
|
567
|
-
if (
|
567
|
+
if (CUMO_SDX_IS_INDEX(sdx)) {
|
568
568
|
LITER(lp,i,j).step = 0;
|
569
|
-
LITER(lp,i,j).idx =
|
569
|
+
LITER(lp,i,j).idx = CUMO_SDX_GET_INDEX(sdx);
|
570
570
|
} else {
|
571
|
-
LITER(lp,i,j).step =
|
571
|
+
LITER(lp,i,j).step = CUMO_SDX_GET_STRIDE(sdx);
|
572
572
|
//LITER(lp,i,j).idx = NULL;
|
573
573
|
}
|
574
574
|
} else if (n==1) {
|
575
|
-
if (
|
576
|
-
|
575
|
+
if (CUMO_SDX_IS_INDEX(sdx)) {
|
576
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("ndloop_set_stepidx", "any");
|
577
577
|
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
578
|
-
LITER(lp,0,j).pos +=
|
578
|
+
LITER(lp,0,j).pos += CUMO_SDX_GET_INDEX(sdx)[0];
|
579
579
|
}
|
580
580
|
}
|
581
581
|
nd--;
|
@@ -589,11 +589,11 @@ ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
|
|
589
589
|
|
590
590
|
|
591
591
|
static void
|
592
|
-
ndloop_init_args(
|
592
|
+
ndloop_init_args(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE args)
|
593
593
|
{
|
594
594
|
int i, j;
|
595
595
|
VALUE v;
|
596
|
-
|
596
|
+
cumo_narray_t *na;
|
597
597
|
int nf_dim;
|
598
598
|
int dim_beg;
|
599
599
|
int *dim_map;
|
@@ -612,12 +612,12 @@ na->shape[i] == lp->n[ dim_map[i] ]
|
|
612
612
|
continue;
|
613
613
|
}
|
614
614
|
v = RARRAY_AREF(args,j);
|
615
|
-
if (
|
615
|
+
if (CumoIsNArray(v)) {
|
616
616
|
// set LARG(lp,j) with v
|
617
|
-
|
617
|
+
CumoGetNArray(v,na);
|
618
618
|
nf_dim = nf->ain[j].dim;
|
619
619
|
if (nf_dim > na->ndim) {
|
620
|
-
rb_raise(
|
620
|
+
rb_raise(cumo_na_eDimensionError,"requires >= %d-dimensioal array "
|
621
621
|
"while %d-dimensional array is given",nf_dim,na->ndim);
|
622
622
|
}
|
623
623
|
ndloop_check_shape(lp, nf_dim, na);
|
@@ -626,10 +626,10 @@ na->shape[i] == lp->n[ dim_map[i] ]
|
|
626
626
|
dim_map[i] = lp->trans_map[i+dim_beg];
|
627
627
|
//printf("dim_map[%d]=%d na->shape[%d]=%d\n",i,dim_map[i],i,na->shape[i]);
|
628
628
|
}
|
629
|
-
if (nf->ain[j].type==
|
630
|
-
lp->xargs[j].flag = flag =
|
629
|
+
if (nf->ain[j].type==CUMO_OVERWRITE) {
|
630
|
+
lp->xargs[j].flag = flag = CUMO_NDL_WRITE;
|
631
631
|
} else {
|
632
|
-
lp->xargs[j].flag = flag =
|
632
|
+
lp->xargs[j].flag = flag = CUMO_NDL_READ;
|
633
633
|
}
|
634
634
|
LARG(lp,j).ndim = nf_dim;
|
635
635
|
ndloop_set_stepidx(lp, j, v, dim_map, flag);
|
@@ -658,22 +658,22 @@ na->shape[i] == lp->n[ dim_map[i] ]
|
|
658
658
|
|
659
659
|
|
660
660
|
static int
|
661
|
-
ndloop_check_inplace(VALUE type, int
|
661
|
+
ndloop_check_inplace(VALUE type, int cumo_na_ndim, size_t *cumo_na_shape, VALUE v)
|
662
662
|
{
|
663
663
|
int i;
|
664
|
-
|
664
|
+
cumo_narray_t *na;
|
665
665
|
|
666
666
|
// type check
|
667
|
-
if (type !=
|
667
|
+
if (type != rb_obj_class(v)) {
|
668
668
|
return 0;
|
669
669
|
}
|
670
|
-
|
670
|
+
CumoGetNArray(v,na);
|
671
671
|
// shape check
|
672
|
-
if (na->ndim !=
|
672
|
+
if (na->ndim != cumo_na_ndim) {
|
673
673
|
return 0;
|
674
674
|
}
|
675
|
-
for (i=0; i<
|
676
|
-
if (
|
675
|
+
for (i=0; i<cumo_na_ndim; i++) {
|
676
|
+
if (cumo_na_shape[i] != na->shape[i]) {
|
677
677
|
return 0;
|
678
678
|
}
|
679
679
|
}
|
@@ -682,8 +682,8 @@ ndloop_check_inplace(VALUE type, int na_ndim, size_t *na_shape, VALUE v)
|
|
682
682
|
}
|
683
683
|
|
684
684
|
static VALUE
|
685
|
-
ndloop_find_inplace(
|
686
|
-
int
|
685
|
+
ndloop_find_inplace(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE type,
|
686
|
+
int cumo_na_ndim, size_t *cumo_na_shape, VALUE args)
|
687
687
|
{
|
688
688
|
int j;
|
689
689
|
VALUE v;
|
@@ -691,9 +691,9 @@ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
|
|
691
691
|
// find inplace
|
692
692
|
for (j=0; j<nf->nin; j++) {
|
693
693
|
v = RARRAY_AREF(args,j);
|
694
|
-
if (
|
695
|
-
if (
|
696
|
-
if (ndloop_check_inplace(type,
|
694
|
+
if (CumoIsNArray(v)) {
|
695
|
+
if (CUMO_TEST_INPLACE(v)) {
|
696
|
+
if (ndloop_check_inplace(type,cumo_na_ndim,cumo_na_shape,v)) {
|
697
697
|
// if already copied, create outary and write-back
|
698
698
|
if (lp->copy_flag & (1<<j)) {
|
699
699
|
lp->writeback = j;
|
@@ -707,7 +707,7 @@ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
|
|
707
707
|
for (j=0; j<nf->nin; j++) {
|
708
708
|
if (lp->copy_flag & (1<<j)) {
|
709
709
|
v = RARRAY_AREF(args,j);
|
710
|
-
if (ndloop_check_inplace(type,
|
710
|
+
if (ndloop_check_inplace(type,cumo_na_ndim,cumo_na_shape,v)) {
|
711
711
|
return v;
|
712
712
|
}
|
713
713
|
}
|
@@ -718,7 +718,7 @@ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
|
|
718
718
|
|
719
719
|
|
720
720
|
static VALUE
|
721
|
-
ndloop_get_arg_type(
|
721
|
+
ndloop_get_arg_type(cumo_ndfunc_t *nf, VALUE args, VALUE t)
|
722
722
|
{
|
723
723
|
int i;
|
724
724
|
|
@@ -731,7 +731,7 @@ ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
|
|
731
731
|
t = nf->ain[i].type;
|
732
732
|
// if i-th type is Qnil, get the type of i-th input value
|
733
733
|
if (!CASTABLE(t)) {
|
734
|
-
t =
|
734
|
+
t = rb_obj_class(RARRAY_AREF(args,i));
|
735
735
|
}
|
736
736
|
}
|
737
737
|
return t;
|
@@ -739,61 +739,61 @@ ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
|
|
739
739
|
|
740
740
|
|
741
741
|
static VALUE
|
742
|
-
ndloop_set_output_narray(
|
742
|
+
ndloop_set_output_narray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, int k,
|
743
743
|
VALUE type, VALUE args)
|
744
744
|
{
|
745
745
|
int i, j;
|
746
|
-
int
|
746
|
+
int cumo_na_ndim;
|
747
747
|
int lp_dim;
|
748
748
|
volatile VALUE v=Qnil;
|
749
|
-
size_t *
|
749
|
+
size_t *cumo_na_shape;
|
750
750
|
int *dim_map;
|
751
|
-
int flag =
|
751
|
+
int flag = CUMO_NDL_READ_WRITE;
|
752
752
|
int nd;
|
753
753
|
int max_nd = lp->ndim + nf->aout[k].dim;
|
754
754
|
|
755
|
-
|
755
|
+
cumo_na_shape = ALLOCA_N(size_t, max_nd);
|
756
756
|
dim_map = ALLOCA_N(int, max_nd);
|
757
757
|
|
758
758
|
//printf("max_nd=%d lp->ndim=%d\n",max_nd,lp->ndim);
|
759
759
|
|
760
760
|
// md-loop shape
|
761
|
-
|
761
|
+
cumo_na_ndim = 0;
|
762
762
|
for (i=0; i<lp->ndim; i++) {
|
763
|
-
//
|
763
|
+
// cumo_na_shape[i] == lp->n[lp->trans_map[i]]
|
764
764
|
lp_dim = lp->trans_map[i];
|
765
765
|
//printf("i=%d lp_dim=%d\n",i,lp_dim);
|
766
|
-
if (
|
767
|
-
|
766
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_CUM)) { // cumulate with shape kept
|
767
|
+
cumo_na_shape[cumo_na_ndim] = lp->n[lp_dim];
|
768
768
|
} else
|
769
|
-
if (
|
770
|
-
if (
|
771
|
-
|
769
|
+
if (cumo_na_test_reduce(lp->reduce,lp_dim)) { // accumulate dimension
|
770
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_KEEP_DIM)) {
|
771
|
+
cumo_na_shape[cumo_na_ndim] = 1; // leave it
|
772
772
|
} else {
|
773
773
|
continue; // delete dimension
|
774
774
|
}
|
775
775
|
} else {
|
776
|
-
|
776
|
+
cumo_na_shape[cumo_na_ndim] = lp->n[lp_dim];
|
777
777
|
}
|
778
|
-
//printf("i=%d lp_dim=%d
|
779
|
-
dim_map[
|
780
|
-
//dim_map[lp_dim] =
|
778
|
+
//printf("i=%d lp_dim=%d cumo_na_shape[%d]=%ld\n",i,lp_dim,i,cumo_na_shape[i]);
|
779
|
+
dim_map[cumo_na_ndim++] = lp_dim;
|
780
|
+
//dim_map[lp_dim] = cumo_na_ndim++;
|
781
781
|
}
|
782
782
|
|
783
783
|
// user-specified shape
|
784
784
|
for (i=0; i<nf->aout[k].dim; i++) {
|
785
|
-
|
786
|
-
dim_map[
|
785
|
+
cumo_na_shape[cumo_na_ndim] = nf->aout[k].shape[i];
|
786
|
+
dim_map[cumo_na_ndim++] = i + lp->ndim;
|
787
787
|
}
|
788
788
|
|
789
789
|
// find inplace from input arrays
|
790
|
-
if (k==0 &&
|
791
|
-
v = ndloop_find_inplace(nf,lp,type,
|
790
|
+
if (k==0 && CUMO_NDF_TEST(nf,CUMO_NDF_INPLACE)) {
|
791
|
+
v = ndloop_find_inplace(nf,lp,type,cumo_na_ndim,cumo_na_shape,args);
|
792
792
|
}
|
793
793
|
if (!RTEST(v)) {
|
794
794
|
// new object
|
795
|
-
v =
|
796
|
-
flag =
|
795
|
+
v = cumo_na_new(type, cumo_na_ndim, cumo_na_shape);
|
796
|
+
flag = CUMO_NDL_WRITE;
|
797
797
|
}
|
798
798
|
|
799
799
|
j = lp->nin + k;
|
@@ -807,7 +807,7 @@ ndloop_set_output_narray(ndfunc_t *nf, na_md_loop_t *lp, int k,
|
|
807
807
|
}
|
808
808
|
|
809
809
|
static VALUE
|
810
|
-
ndloop_set_output(
|
810
|
+
ndloop_set_output(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE args)
|
811
811
|
{
|
812
812
|
int i, j, k, idx;
|
813
813
|
volatile VALUE v, t, results;
|
@@ -848,7 +848,7 @@ ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
|
|
848
848
|
idx = nf->ain[k].dim;
|
849
849
|
v = RARRAY_AREF(results,idx);
|
850
850
|
init = RARRAY_AREF(args,k);
|
851
|
-
|
851
|
+
cumo_na_store(v,init);
|
852
852
|
}
|
853
853
|
|
854
854
|
return results;
|
@@ -860,12 +860,12 @@ ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
|
|
860
860
|
// For example, compressing [2,3] shape into [6] so that we can process
|
861
861
|
// all elements with one user loop.
|
862
862
|
static void
|
863
|
-
|
863
|
+
cumo_ndfunc_contract_loop(cumo_na_md_loop_t *lp)
|
864
864
|
{
|
865
865
|
int i,j,k,success,cnt=0;
|
866
866
|
int red0, redi;
|
867
867
|
|
868
|
-
redi =
|
868
|
+
redi = cumo_na_test_reduce(lp->reduce,0);
|
869
869
|
|
870
870
|
//for (i=0; i<lp->ndim; i++) {
|
871
871
|
// printf("lp->n[%d]=%lu\n",i,lp->n[i]);
|
@@ -873,7 +873,7 @@ ndfunc_contract_loop(na_md_loop_t *lp)
|
|
873
873
|
|
874
874
|
for (i=1; i<lp->ndim; i++) {
|
875
875
|
red0 = redi;
|
876
|
-
redi =
|
876
|
+
redi = cumo_na_test_reduce(lp->reduce,i);
|
877
877
|
//printf("contract i=%d reduce_cond=%d %d\n",i,red0,redi);
|
878
878
|
if (red0 != redi) {
|
879
879
|
continue;
|
@@ -932,7 +932,7 @@ ndfunc_contract_loop(na_md_loop_t *lp)
|
|
932
932
|
//
|
933
933
|
// For example, for element-wise function, lp->user.ndim is 1, and lp->ndim -= 1.
|
934
934
|
static void
|
935
|
-
|
935
|
+
cumo_ndfunc_set_user_loop(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
936
936
|
{
|
937
937
|
int j, ud=0;
|
938
938
|
|
@@ -940,7 +940,7 @@ ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
|
|
940
940
|
// Increase user.ndim by number of dimensions to reduce for reduction function.
|
941
941
|
ud = lp->reduce_dim;
|
942
942
|
}
|
943
|
-
else if (lp->ndim > 0 &&
|
943
|
+
else if (lp->ndim > 0 && CUMO_NDF_TEST(nf,CUMO_NDF_HAS_LOOP)) {
|
944
944
|
// Set user.ndim to 1 (default is 0) for element-wise function.
|
945
945
|
ud = 1;
|
946
946
|
}
|
@@ -964,29 +964,29 @@ ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
|
|
964
964
|
//printf("lp->reduce_dim=%d lp->user.ndim=%d lp->ndim=%d\n",lp->reduce_dim,lp->user.ndim,lp->ndim);
|
965
965
|
|
966
966
|
skip_ud:
|
967
|
-
// user function shape is the latter part of
|
967
|
+
// user function shape is the latter part of cumo_na_md_loop shape.
|
968
968
|
lp->user.n = &(lp->n[lp->ndim]);
|
969
969
|
for (j=0; j<lp->narg; j++) {
|
970
970
|
LARG(lp,j).iter = &LITER(lp,lp->ndim,j);
|
971
|
-
//printf("in
|
971
|
+
//printf("in cumo_ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
|
972
972
|
}
|
973
973
|
}
|
974
974
|
|
975
975
|
|
976
976
|
// Initialize lp->user for indexer loop.
|
977
977
|
static void
|
978
|
-
|
978
|
+
cumo_ndfunc_set_user_indexer_loop(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
979
979
|
{
|
980
980
|
int j;
|
981
981
|
|
982
982
|
lp->user.ndim = lp->ndim;
|
983
983
|
lp->ndim = 0;
|
984
984
|
|
985
|
-
if (
|
985
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE)) {
|
986
986
|
// in
|
987
987
|
LARG(lp,0).ndim = lp->user.ndim;
|
988
988
|
LARG(lp,0).shape = &(lp->n[lp->ndim]);
|
989
|
-
// out is constructed at
|
989
|
+
// out is constructed at cumo_na_make_reduction_arg from in and lp->reduce
|
990
990
|
|
991
991
|
lp->user.n = &(lp->n[lp->ndim]);
|
992
992
|
for (j=0; j<lp->narg; j++) {
|
@@ -1015,10 +1015,10 @@ ndfunc_set_user_indexer_loop(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1015
1015
|
// Judge whether a (contiguous) buffer copy is required or not, and malloc if it is required.
|
1016
1016
|
//
|
1017
1017
|
// CASES TO REQUIRE A BUFFER COPY:
|
1018
|
-
// 1) ndloop has `idx` but does not support
|
1019
|
-
// 2) ndloop has non-contiguous arrays but does not support
|
1018
|
+
// 1) ndloop has `idx` but does not support CUMO_NDF_INDEX_LOOP.
|
1019
|
+
// 2) ndloop has non-contiguous arrays but does not support CUMO_NDF_STRIDE_LOOP.
|
1020
1020
|
static void
|
1021
|
-
|
1021
|
+
cumo_ndfunc_set_bufcp(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
1022
1022
|
{
|
1023
1023
|
unsigned int f;
|
1024
1024
|
int i, j;
|
@@ -1026,7 +1026,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1026
1026
|
bool zero_step;
|
1027
1027
|
ssize_t n, sz, elmsz, stride, n_total; //, last_step;
|
1028
1028
|
size_t *buf_shape;
|
1029
|
-
|
1029
|
+
cumo_na_loop_iter_t *buf_iter=NULL, *src_iter;
|
1030
1030
|
|
1031
1031
|
unsigned int loop_spec = ndloop_func_loop_spec(nf, lp->user.ndim);
|
1032
1032
|
//if (loop_spec==0) return;
|
@@ -1087,7 +1087,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1087
1087
|
// over loop_spec or reduce_loop is not contiguous
|
1088
1088
|
if (f & loop_spec || (lp->reduce_dim > 1 && ndim > 0)) {
|
1089
1089
|
//printf("(buf,nd=%d)",nd);
|
1090
|
-
buf_iter = ALLOC_N(
|
1090
|
+
buf_iter = ALLOC_N(cumo_na_loop_iter_t,nd+3);
|
1091
1091
|
buf_shape = ALLOC_N(size_t,nd);
|
1092
1092
|
buf_iter[nd].pos = 0;
|
1093
1093
|
buf_iter[nd].step = 0;
|
@@ -1104,14 +1104,14 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1104
1104
|
buf_shape[i] = n;
|
1105
1105
|
sz *= n;
|
1106
1106
|
}
|
1107
|
-
LBUFCP(lp,j) = ALLOC(
|
1107
|
+
LBUFCP(lp,j) = ALLOC(cumo_na_buffer_copy_t);
|
1108
1108
|
LBUFCP(lp,j)->ndim = ndim;
|
1109
1109
|
LBUFCP(lp,j)->elmsz = elmsz;
|
1110
1110
|
LBUFCP(lp,j)->n = buf_shape;
|
1111
1111
|
LBUFCP(lp,j)->src_iter = src_iter;
|
1112
1112
|
LBUFCP(lp,j)->buf_iter = buf_iter;
|
1113
1113
|
LARG(lp,j).iter = buf_iter;
|
1114
|
-
//printf("in
|
1114
|
+
//printf("in cumo_ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
|
1115
1115
|
LBUFCP(lp,j)->src_ptr = LARG(lp,j).ptr;
|
1116
1116
|
if (cumo_cuda_runtime_is_device_memory(LARG(lp,j).ptr)) {
|
1117
1117
|
LARG(lp,j).ptr = LBUFCP(lp,j)->buf_ptr = cumo_cuda_runtime_malloc(sz);
|
@@ -1130,7 +1130,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1130
1130
|
last_step = src_iter[ndim-1].step;
|
1131
1131
|
if (lp->reduce_dim>1) {
|
1132
1132
|
//printf("(reduce_dim=%d,ndim=%d,nd=%d,n=%ld,lst=%ld)\n",lp->reduce_dim,ndim,nd,n_total,last_step);
|
1133
|
-
buf_iter = ALLOC_N(
|
1133
|
+
buf_iter = ALLOC_N(cumo_na_loop_iter_t,2);
|
1134
1134
|
buf_iter[0].pos = LARG(lp,j).iter[0].pos;
|
1135
1135
|
buf_iter[0].step = last_step;
|
1136
1136
|
buf_iter[0].idx = NULL;
|
@@ -1138,7 +1138,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1138
1138
|
buf_iter[1].step = 0;
|
1139
1139
|
buf_iter[1].idx = NULL;
|
1140
1140
|
LARG(lp,j).iter = buf_iter;
|
1141
|
-
//printf("in
|
1141
|
+
//printf("in cumo_ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
|
1142
1142
|
lp->xargs[j].free_user_iter = 1;
|
1143
1143
|
}
|
1144
1144
|
}
|
@@ -1161,7 +1161,7 @@ ndfunc_set_bufcp(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1161
1161
|
|
1162
1162
|
// Make contiguous memory for ops not supporting index or stride (step) loop
|
1163
1163
|
static void
|
1164
|
-
ndloop_copy_to_buffer(
|
1164
|
+
ndloop_copy_to_buffer(cumo_na_buffer_copy_t *lp)
|
1165
1165
|
{
|
1166
1166
|
size_t *c;
|
1167
1167
|
char *src, *buf;
|
@@ -1195,7 +1195,7 @@ ndloop_copy_to_buffer(na_buffer_copy_t *lp)
|
|
1195
1195
|
// i-th dimension
|
1196
1196
|
for (; i<nd; i++) {
|
1197
1197
|
if (LITER_SRC(lp,i).idx) {
|
1198
|
-
|
1198
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("ndloop_copy_to_buffer", "any");
|
1199
1199
|
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
1200
1200
|
LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).idx[c[i]];
|
1201
1201
|
} else {
|
@@ -1227,7 +1227,7 @@ ndloop_copy_to_buffer(na_buffer_copy_t *lp)
|
|
1227
1227
|
}
|
1228
1228
|
|
1229
1229
|
static void
|
1230
|
-
ndloop_copy_from_buffer(
|
1230
|
+
ndloop_copy_from_buffer(cumo_na_buffer_copy_t *lp)
|
1231
1231
|
{
|
1232
1232
|
size_t *c;
|
1233
1233
|
char *src, *buf;
|
@@ -1291,25 +1291,25 @@ ndloop_copy_from_buffer(na_buffer_copy_t *lp)
|
|
1291
1291
|
|
1292
1292
|
|
1293
1293
|
static void
|
1294
|
-
|
1294
|
+
cumo_ndfunc_write_back(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, VALUE orig_args, VALUE results)
|
1295
1295
|
{
|
1296
1296
|
VALUE src, dst;
|
1297
1297
|
|
1298
1298
|
if (lp->writeback >= 0) {
|
1299
1299
|
dst = RARRAY_AREF(orig_args,lp->writeback);
|
1300
1300
|
src = RARRAY_AREF(results,0);
|
1301
|
-
|
1301
|
+
cumo_na_store(dst,src);
|
1302
1302
|
RARRAY_ASET(results,0,dst);
|
1303
1303
|
}
|
1304
1304
|
}
|
1305
1305
|
|
1306
1306
|
|
1307
1307
|
static VALUE
|
1308
|
-
ndloop_extract(VALUE results,
|
1308
|
+
ndloop_extract(VALUE results, cumo_ndfunc_t *nf)
|
1309
1309
|
{
|
1310
1310
|
// long n, i;
|
1311
1311
|
// VALUE x, y;
|
1312
|
-
//
|
1312
|
+
// cumo_narray_t *na;
|
1313
1313
|
|
1314
1314
|
// extract result objects
|
1315
1315
|
switch(nf->nout) {
|
@@ -1318,24 +1318,24 @@ ndloop_extract(VALUE results, ndfunc_t *nf)
|
|
1318
1318
|
case 1:
|
1319
1319
|
return RARRAY_AREF(results,0);
|
1320
1320
|
// x = RARRAY_AREF(results,0);
|
1321
|
-
// if (
|
1322
|
-
// if (
|
1323
|
-
//
|
1324
|
-
// if (
|
1325
|
-
// x = rb_funcall(x,
|
1321
|
+
// if (CUMO_NDF_TEST(nf,CUMO_NDF_EXTRACT)) {
|
1322
|
+
// if (CumoIsNArray(x)){
|
1323
|
+
// CumoGetNArray(x,na);
|
1324
|
+
// if (CUMO_NA_NDIM(na)==0) {
|
1325
|
+
// x = rb_funcall(x, cumo_id_extract, 0);
|
1326
1326
|
// }
|
1327
1327
|
// }
|
1328
1328
|
// }
|
1329
1329
|
// return x;
|
1330
1330
|
}
|
1331
|
-
// if (
|
1331
|
+
// if (CUMO_NDF_TEST(nf,CUMO_NDF_EXTRACT)) {
|
1332
1332
|
// n = RARRAY_LEN(results);
|
1333
1333
|
// for (i=0; i<n; i++) {
|
1334
1334
|
// x = RARRAY_AREF(results,i);
|
1335
|
-
// if (
|
1336
|
-
//
|
1337
|
-
// if (
|
1338
|
-
// y = rb_funcall(x,
|
1335
|
+
// if (CumoIsNArray(x)){
|
1336
|
+
// CumoGetNArray(x,na);
|
1337
|
+
// if (CUMO_NA_NDIM(na)==0) {
|
1338
|
+
// y = rb_funcall(x, cumo_id_extract, 0);
|
1339
1339
|
// RARRAY_ASET(results,i,y);
|
1340
1340
|
// }
|
1341
1341
|
// }
|
@@ -1345,7 +1345,7 @@ ndloop_extract(VALUE results, ndfunc_t *nf)
|
|
1345
1345
|
}
|
1346
1346
|
|
1347
1347
|
static bool
|
1348
|
-
loop_is_using_idx(
|
1348
|
+
loop_is_using_idx(cumo_na_md_loop_t *lp)
|
1349
1349
|
{
|
1350
1350
|
int i, j;
|
1351
1351
|
int nd = lp->ndim;
|
@@ -1367,14 +1367,14 @@ loop_is_using_idx(na_md_loop_t *lp)
|
|
1367
1367
|
}
|
1368
1368
|
|
1369
1369
|
static void
|
1370
|
-
loop_narray(
|
1370
|
+
loop_narray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp);
|
1371
1371
|
|
1372
1372
|
static VALUE
|
1373
1373
|
ndloop_run(VALUE vlp)
|
1374
1374
|
{
|
1375
1375
|
volatile VALUE args, orig_args, results;
|
1376
|
-
|
1377
|
-
|
1376
|
+
cumo_na_md_loop_t *lp = (cumo_na_md_loop_t*)(vlp);
|
1377
|
+
cumo_ndfunc_t *nf;
|
1378
1378
|
|
1379
1379
|
orig_args = lp->vargs;
|
1380
1380
|
nf = lp->ndfunc;
|
@@ -1384,49 +1384,49 @@ ndloop_run(VALUE vlp)
|
|
1384
1384
|
// setup ndloop iterator with arguments
|
1385
1385
|
ndloop_init_args(nf, lp, args);
|
1386
1386
|
results = ndloop_set_output(nf, lp, args);
|
1387
|
-
//if (
|
1387
|
+
//if (cumo_na_debug_flag) {
|
1388
1388
|
// printf("-- ndloop_set_output --\n");
|
1389
1389
|
// print_ndloop(lp);
|
1390
1390
|
//}
|
1391
1391
|
|
1392
1392
|
// contract loop (compact dimessions)
|
1393
|
-
if (
|
1393
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP) && CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE)) {
|
1394
1394
|
// do nothing
|
1395
1395
|
// TODO(sonots): support compacting dimensions in reduction indexer loop if it allows speed up.
|
1396
1396
|
} else {
|
1397
1397
|
if (lp->loop_func == loop_narray) {
|
1398
|
-
|
1399
|
-
if (
|
1400
|
-
printf("--
|
1398
|
+
cumo_ndfunc_contract_loop(lp);
|
1399
|
+
if (cumo_na_debug_flag) {
|
1400
|
+
printf("-- cumo_ndfunc_contract_loop --\n");
|
1401
1401
|
print_ndloop(lp);
|
1402
1402
|
}
|
1403
1403
|
}
|
1404
1404
|
}
|
1405
1405
|
|
1406
1406
|
// setup lp->user
|
1407
|
-
if (
|
1408
|
-
|
1409
|
-
if (
|
1410
|
-
printf("--
|
1407
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP)) {
|
1408
|
+
cumo_ndfunc_set_user_indexer_loop(nf, lp);
|
1409
|
+
if (cumo_na_debug_flag) {
|
1410
|
+
printf("-- cumo_ndfunc_set_user_indexer_loop --\n");
|
1411
1411
|
print_ndloop(lp);
|
1412
1412
|
}
|
1413
1413
|
} else {
|
1414
|
-
|
1415
|
-
if (
|
1416
|
-
printf("--
|
1414
|
+
cumo_ndfunc_set_user_loop(nf, lp);
|
1415
|
+
if (cumo_na_debug_flag) {
|
1416
|
+
printf("-- cumo_ndfunc_set_user_loop --\n");
|
1417
1417
|
print_ndloop(lp);
|
1418
1418
|
}
|
1419
1419
|
}
|
1420
1420
|
|
1421
1421
|
// setup buffering during loop
|
1422
|
-
if (
|
1422
|
+
if (CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP) && CUMO_NDF_TEST(nf,CUMO_NDF_FLAT_REDUCE) && !loop_is_using_idx(lp)) {
|
1423
1423
|
// do nothing
|
1424
1424
|
} else {
|
1425
1425
|
if (lp->loop_func == loop_narray) {
|
1426
|
-
|
1426
|
+
cumo_ndfunc_set_bufcp(nf, lp);
|
1427
1427
|
}
|
1428
|
-
if (
|
1429
|
-
printf("--
|
1428
|
+
if (cumo_na_debug_flag) {
|
1429
|
+
printf("-- cumo_ndfunc_set_bufcp --\n");
|
1430
1430
|
print_ndloop(lp);
|
1431
1431
|
}
|
1432
1432
|
}
|
@@ -1439,7 +1439,7 @@ ndloop_run(VALUE vlp)
|
|
1439
1439
|
}
|
1440
1440
|
|
1441
1441
|
// write-back will be placed here
|
1442
|
-
|
1442
|
+
cumo_ndfunc_write_back(nf, lp, orig_args, results);
|
1443
1443
|
|
1444
1444
|
// extract result objects
|
1445
1445
|
return ndloop_extract(results, nf);
|
@@ -1449,7 +1449,7 @@ ndloop_run(VALUE vlp)
|
|
1449
1449
|
// ---------------------------------------------------------------------------
|
1450
1450
|
|
1451
1451
|
static void
|
1452
|
-
loop_narray(
|
1452
|
+
loop_narray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
1453
1453
|
{
|
1454
1454
|
size_t *c;
|
1455
1455
|
int i, j;
|
@@ -1459,7 +1459,7 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1459
1459
|
rb_bug("bug? lp->ndim = %d\n", lp->ndim);
|
1460
1460
|
}
|
1461
1461
|
|
1462
|
-
if (nd==0 ||
|
1462
|
+
if (nd==0 || CUMO_NDF_TEST(nf,CUMO_NDF_INDEXER_LOOP)) {
|
1463
1463
|
for (j=0; j<lp->nin; j++) {
|
1464
1464
|
if (lp->xargs[j].bufcp) {
|
1465
1465
|
//printf("copy_to_buffer j=%d\n",j);
|
@@ -1468,7 +1468,7 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1468
1468
|
}
|
1469
1469
|
(*(nf->func))(&(lp->user));
|
1470
1470
|
for (j=0; j<lp->narg; j++) {
|
1471
|
-
if (lp->xargs[j].bufcp && (lp->xargs[j].flag &
|
1471
|
+
if (lp->xargs[j].bufcp && (lp->xargs[j].flag & CUMO_NDL_WRITE)) {
|
1472
1472
|
//printf("copy_from_buffer j=%d\n",j);
|
1473
1473
|
// copy data to work buffer
|
1474
1474
|
ndloop_copy_from_buffer(lp->xargs[j].bufcp);
|
@@ -1505,7 +1505,7 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1505
1505
|
}
|
1506
1506
|
(*(nf->func))(&(lp->user));
|
1507
1507
|
for (j=0; j<lp->narg; j++) {
|
1508
|
-
if (lp->xargs[j].bufcp && (lp->xargs[j].flag &
|
1508
|
+
if (lp->xargs[j].bufcp && (lp->xargs[j].flag & CUMO_NDL_WRITE)) {
|
1509
1509
|
// copy data to work buffer
|
1510
1510
|
//printf("copy_from_buffer j=%d\n",j);
|
1511
1511
|
ndloop_copy_from_buffer(lp->xargs[j].bufcp);
|
@@ -1526,12 +1526,12 @@ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1526
1526
|
|
1527
1527
|
|
1528
1528
|
static VALUE
|
1529
|
-
|
1529
|
+
cumo_na_ndloop_main(cumo_ndfunc_t *nf, VALUE args, void *opt_ptr)
|
1530
1530
|
{
|
1531
1531
|
unsigned int copy_flag;
|
1532
|
-
|
1532
|
+
cumo_na_md_loop_t lp;
|
1533
1533
|
|
1534
|
-
if (
|
1534
|
+
if (cumo_na_debug_flag) print_ndfunc(nf);
|
1535
1535
|
|
1536
1536
|
// cast arguments to NArray
|
1537
1537
|
copy_flag = ndloop_cast_args(nf, args);
|
@@ -1545,10 +1545,10 @@ na_ndloop_main(ndfunc_t *nf, VALUE args, void *opt_ptr)
|
|
1545
1545
|
|
1546
1546
|
VALUE
|
1547
1547
|
#ifdef HAVE_STDARG_PROTOTYPES
|
1548
|
-
|
1548
|
+
cumo_na_ndloop(cumo_ndfunc_t *nf, int argc, ...)
|
1549
1549
|
#else
|
1550
|
-
|
1551
|
-
|
1550
|
+
cumo_na_ndloop(nf, argc, va_alist)
|
1551
|
+
cumo_ndfunc_t *nf;
|
1552
1552
|
int argc;
|
1553
1553
|
va_dcl
|
1554
1554
|
#endif
|
@@ -1569,22 +1569,22 @@ na_ndloop(nf, argc, va_alist)
|
|
1569
1569
|
|
1570
1570
|
args = rb_ary_new4(argc, argv);
|
1571
1571
|
|
1572
|
-
return
|
1572
|
+
return cumo_na_ndloop_main(nf, args, NULL);
|
1573
1573
|
}
|
1574
1574
|
|
1575
1575
|
|
1576
1576
|
VALUE
|
1577
|
-
|
1577
|
+
cumo_na_ndloop2(cumo_ndfunc_t *nf, VALUE args)
|
1578
1578
|
{
|
1579
|
-
return
|
1579
|
+
return cumo_na_ndloop_main(nf, args, NULL);
|
1580
1580
|
}
|
1581
1581
|
|
1582
1582
|
VALUE
|
1583
1583
|
#ifdef HAVE_STDARG_PROTOTYPES
|
1584
|
-
|
1584
|
+
cumo_na_ndloop3(cumo_ndfunc_t *nf, void *ptr, int argc, ...)
|
1585
1585
|
#else
|
1586
|
-
|
1587
|
-
|
1586
|
+
cumo_na_ndloop3(nf, ptr, argc, va_alist)
|
1587
|
+
cumo_ndfunc_t *nf;
|
1588
1588
|
void *ptr;
|
1589
1589
|
int argc;
|
1590
1590
|
va_dcl
|
@@ -1606,30 +1606,30 @@ na_ndloop3(nf, ptr, argc, va_alist)
|
|
1606
1606
|
|
1607
1607
|
args = rb_ary_new4(argc, argv);
|
1608
1608
|
|
1609
|
-
return
|
1609
|
+
return cumo_na_ndloop_main(nf, args, ptr);
|
1610
1610
|
}
|
1611
1611
|
|
1612
1612
|
VALUE
|
1613
|
-
|
1613
|
+
cumo_na_ndloop4(cumo_ndfunc_t *nf, void *ptr, VALUE args)
|
1614
1614
|
{
|
1615
|
-
return
|
1615
|
+
return cumo_na_ndloop_main(nf, args, ptr);
|
1616
1616
|
}
|
1617
1617
|
|
1618
1618
|
//----------------------------------------------------------------------
|
1619
1619
|
|
1620
1620
|
VALUE
|
1621
|
-
|
1621
|
+
cumo_na_info_str(VALUE ary)
|
1622
1622
|
{
|
1623
1623
|
int nd, i;
|
1624
1624
|
char tmp[32];
|
1625
1625
|
VALUE buf;
|
1626
|
-
|
1626
|
+
cumo_narray_t *na;
|
1627
1627
|
|
1628
|
-
|
1628
|
+
CumoGetNArray(ary,na);
|
1629
1629
|
nd = na->ndim;
|
1630
1630
|
|
1631
|
-
buf = rb_str_new2(rb_class2name(
|
1632
|
-
if (
|
1631
|
+
buf = rb_str_new2(rb_class2name(rb_obj_class(ary)));
|
1632
|
+
if (CUMO_NA_TYPE(na) == CUMO_NARRAY_VIEW_T) {
|
1633
1633
|
rb_str_cat(buf,"(view)",6);
|
1634
1634
|
}
|
1635
1635
|
rb_str_cat(buf,"#shape=[",8);
|
@@ -1648,19 +1648,20 @@ na_info_str(VALUE ary)
|
|
1648
1648
|
|
1649
1649
|
//----------------------------------------------------------------------
|
1650
1650
|
|
1651
|
-
|
1652
|
-
|
1653
|
-
|
1651
|
+
extern int cumo_na_inspect_cols_;
|
1652
|
+
extern int cumo_na_inspect_rows_;
|
1653
|
+
#define ncol cumo_na_inspect_cols_
|
1654
|
+
#define nrow cumo_na_inspect_rows_
|
1654
1655
|
|
1655
1656
|
static void
|
1656
|
-
loop_inspect(
|
1657
|
+
loop_inspect(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
1657
1658
|
{
|
1658
1659
|
int nd, i, ii;
|
1659
1660
|
size_t *c;
|
1660
1661
|
int col=0, row=0;
|
1661
1662
|
long len;
|
1662
1663
|
VALUE str;
|
1663
|
-
|
1664
|
+
cumo_na_text_func_t func = (cumo_na_text_func_t)(nf->func);
|
1664
1665
|
VALUE buf, opt;
|
1665
1666
|
|
1666
1667
|
nd = lp->ndim;
|
@@ -1734,23 +1735,23 @@ loop_inspect(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1734
1735
|
|
1735
1736
|
|
1736
1737
|
VALUE
|
1737
|
-
|
1738
|
+
cumo_na_ndloop_inspect(VALUE nary, cumo_na_text_func_t func, VALUE opt)
|
1738
1739
|
{
|
1739
1740
|
volatile VALUE args;
|
1740
|
-
|
1741
|
+
cumo_na_md_loop_t lp;
|
1741
1742
|
VALUE buf;
|
1742
|
-
|
1743
|
-
|
1744
|
-
//nf =
|
1743
|
+
cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{cumo_sym_loop_opt},{cumo_sym_option}};
|
1744
|
+
cumo_ndfunc_t nf = { (cumo_na_iter_func_t)func, CUMO_NO_LOOP, 3, 0, ain, 0 };
|
1745
|
+
//nf = cumo_ndfunc_alloc(NULL, CUMO_NO_LOOP, 1, 0, Qnil);
|
1745
1746
|
|
1746
|
-
buf =
|
1747
|
+
buf = cumo_na_info_str(nary);
|
1747
1748
|
|
1748
|
-
if (
|
1749
|
+
if (cumo_na_get_pointer(nary)==NULL) {
|
1749
1750
|
return rb_str_cat(buf,"(empty)",7);
|
1750
1751
|
}
|
1751
1752
|
|
1752
1753
|
//rb_p(args);
|
1753
|
-
//if (
|
1754
|
+
//if (cumo_na_debug_flag) print_ndfunc(&nf);
|
1754
1755
|
|
1755
1756
|
args = rb_ary_new3(3,nary,buf,opt);
|
1756
1757
|
|
@@ -1769,21 +1770,21 @@ na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt)
|
|
1769
1770
|
//----------------------------------------------------------------------
|
1770
1771
|
|
1771
1772
|
static void
|
1772
|
-
loop_store_subnarray(
|
1773
|
+
loop_store_subnarray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp, int i0, size_t *c, VALUE a)
|
1773
1774
|
{
|
1774
1775
|
int nd = lp->ndim;
|
1775
1776
|
int i, j;
|
1776
|
-
|
1777
|
+
cumo_narray_t *na;
|
1777
1778
|
int *dim_map;
|
1778
1779
|
VALUE a_type;
|
1779
1780
|
|
1780
|
-
a_type =
|
1781
|
-
if (
|
1782
|
-
a = rb_funcall(a_type,
|
1781
|
+
a_type = rb_obj_class(LARG(lp,0).value);
|
1782
|
+
if (rb_obj_class(a) != a_type) {
|
1783
|
+
a = rb_funcall(a_type, cumo_id_cast, 1, a);
|
1783
1784
|
}
|
1784
|
-
|
1785
|
+
CumoGetNArray(a,na);
|
1785
1786
|
if (na->ndim != nd-i0+1) {
|
1786
|
-
rb_raise(
|
1787
|
+
rb_raise(cumo_na_eShapeError, "mismatched dimension of sub-narray: "
|
1787
1788
|
"nd_src=%d, nd_dst=%d", na->ndim, nd-i0+1);
|
1788
1789
|
}
|
1789
1790
|
dim_map = ALLOCA_N(int, na->ndim);
|
@@ -1791,7 +1792,7 @@ loop_store_subnarray(ndfunc_t *nf, na_md_loop_t *lp, int i0, size_t *c, VALUE a)
|
|
1791
1792
|
dim_map[i] = lp->trans_map[i+i0];
|
1792
1793
|
//printf("dim_map[i=%d] = %d, i0=%d\n", i, dim_map[i], i0);
|
1793
1794
|
}
|
1794
|
-
ndloop_set_stepidx(lp, 1, a, dim_map,
|
1795
|
+
ndloop_set_stepidx(lp, 1, a, dim_map, CUMO_NDL_READ);
|
1795
1796
|
LARG(lp,1).shape = &(na->shape[na->ndim-1]);
|
1796
1797
|
|
1797
1798
|
// loop body
|
@@ -1825,7 +1826,7 @@ loop_store_subnarray(ndfunc_t *nf, na_md_loop_t *lp, int i0, size_t *c, VALUE a)
|
|
1825
1826
|
|
1826
1827
|
|
1827
1828
|
static void
|
1828
|
-
loop_store_rarray(
|
1829
|
+
loop_store_rarray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
1829
1830
|
{
|
1830
1831
|
size_t *c;
|
1831
1832
|
int i;
|
@@ -1856,7 +1857,7 @@ loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1856
1857
|
} else {
|
1857
1858
|
a[i+1] = Qnil;
|
1858
1859
|
}
|
1859
|
-
} else if (
|
1860
|
+
} else if (CumoIsNArray(a[i])) {
|
1860
1861
|
//printf("a[i=%d]=0x%lx\n",i,a[i]);
|
1861
1862
|
loop_store_subnarray(nf,lp,i,c,a[i]);
|
1862
1863
|
goto loop_next;
|
@@ -1871,7 +1872,7 @@ loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1871
1872
|
}
|
1872
1873
|
|
1873
1874
|
//printf("a[i=%d]=0x%lx\n",i,a[i]);
|
1874
|
-
if (
|
1875
|
+
if (CumoIsNArray(a[i])) {
|
1875
1876
|
loop_store_subnarray(nf,lp,i,c,a[i]);
|
1876
1877
|
} else {
|
1877
1878
|
LARG(lp,1).value = a[i];
|
@@ -1891,13 +1892,13 @@ loop_store_rarray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1891
1892
|
}
|
1892
1893
|
|
1893
1894
|
VALUE
|
1894
|
-
|
1895
|
+
cumo_na_ndloop_store_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE rary)
|
1895
1896
|
{
|
1896
|
-
|
1897
|
+
cumo_na_md_loop_t lp;
|
1897
1898
|
VALUE args;
|
1898
1899
|
|
1899
1900
|
//rb_p(args);
|
1900
|
-
if (
|
1901
|
+
if (cumo_na_debug_flag) print_ndfunc(nf);
|
1901
1902
|
|
1902
1903
|
args = rb_assoc_new(nary,rary);
|
1903
1904
|
|
@@ -1912,13 +1913,13 @@ na_ndloop_store_rarray(ndfunc_t *nf, VALUE nary, VALUE rary)
|
|
1912
1913
|
|
1913
1914
|
|
1914
1915
|
VALUE
|
1915
|
-
|
1916
|
+
cumo_na_ndloop_store_rarray2(cumo_ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt)
|
1916
1917
|
{
|
1917
|
-
|
1918
|
+
cumo_na_md_loop_t lp;
|
1918
1919
|
VALUE args;
|
1919
1920
|
|
1920
1921
|
//rb_p(args);
|
1921
|
-
if (
|
1922
|
+
if (cumo_na_debug_flag) print_ndfunc(nf);
|
1922
1923
|
|
1923
1924
|
//args = rb_assoc_new(rary,nary);
|
1924
1925
|
args = rb_ary_new3(3,nary,rary,opt);
|
@@ -1936,7 +1937,7 @@ na_ndloop_store_rarray2(ndfunc_t *nf, VALUE nary, VALUE rary, VALUE opt)
|
|
1936
1937
|
//----------------------------------------------------------------------
|
1937
1938
|
|
1938
1939
|
static void
|
1939
|
-
loop_narray_to_rarray(
|
1940
|
+
loop_narray_to_rarray(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
1940
1941
|
{
|
1941
1942
|
size_t *c;
|
1942
1943
|
int i;
|
@@ -1984,13 +1985,13 @@ loop_narray_to_rarray(ndfunc_t *nf, na_md_loop_t *lp)
|
|
1984
1985
|
}
|
1985
1986
|
|
1986
1987
|
VALUE
|
1987
|
-
|
1988
|
+
cumo_na_ndloop_cast_narray_to_rarray(cumo_ndfunc_t *nf, VALUE nary, VALUE fmt)
|
1988
1989
|
{
|
1989
|
-
|
1990
|
+
cumo_na_md_loop_t lp;
|
1990
1991
|
VALUE args, a0;
|
1991
1992
|
|
1992
1993
|
//rb_p(args);
|
1993
|
-
if (
|
1994
|
+
if (cumo_na_debug_flag) print_ndfunc(nf);
|
1994
1995
|
|
1995
1996
|
a0 = rb_ary_new();
|
1996
1997
|
args = rb_ary_new3(3,nary,a0,fmt);
|
@@ -2009,7 +2010,7 @@ na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt)
|
|
2009
2010
|
//----------------------------------------------------------------------
|
2010
2011
|
|
2011
2012
|
static void
|
2012
|
-
loop_narray_with_index(
|
2013
|
+
loop_narray_with_index(cumo_ndfunc_t *nf, cumo_na_md_loop_t *lp)
|
2013
2014
|
{
|
2014
2015
|
size_t *c;
|
2015
2016
|
int i,j;
|
@@ -2059,10 +2060,10 @@ loop_narray_with_index(ndfunc_t *nf, na_md_loop_t *lp)
|
|
2059
2060
|
|
2060
2061
|
VALUE
|
2061
2062
|
#ifdef HAVE_STDARG_PROTOTYPES
|
2062
|
-
|
2063
|
+
cumo_na_ndloop_with_index(cumo_ndfunc_t *nf, int argc, ...)
|
2063
2064
|
#else
|
2064
|
-
|
2065
|
-
|
2065
|
+
cumo_na_ndloop_with_index(nf, argc, va_alist)
|
2066
|
+
cumo_ndfunc_t *nf;
|
2066
2067
|
int argc;
|
2067
2068
|
va_dcl
|
2068
2069
|
#endif
|
@@ -2072,7 +2073,7 @@ na_ndloop_with_index(nf, argc, va_alist)
|
|
2072
2073
|
int i;
|
2073
2074
|
VALUE *argv;
|
2074
2075
|
volatile VALUE args;
|
2075
|
-
|
2076
|
+
cumo_na_md_loop_t lp;
|
2076
2077
|
|
2077
2078
|
argv = ALLOCA_N(VALUE,argc);
|
2078
2079
|
|
@@ -2084,8 +2085,8 @@ na_ndloop_with_index(nf, argc, va_alist)
|
|
2084
2085
|
|
2085
2086
|
args = rb_ary_new4(argc, argv);
|
2086
2087
|
|
2087
|
-
//return
|
2088
|
-
if (
|
2088
|
+
//return cumo_na_ndloop_main(nf, args, NULL);
|
2089
|
+
if (cumo_na_debug_flag) print_ndfunc(nf);
|
2089
2090
|
|
2090
2091
|
// cast arguments to NArray
|
2091
2092
|
//copy_flag = ndloop_cast_args(nf, args);
|
@@ -2098,8 +2099,8 @@ na_ndloop_with_index(nf, argc, va_alist)
|
|
2098
2099
|
|
2099
2100
|
|
2100
2101
|
void
|
2101
|
-
|
2102
|
+
Init_cumo_na_ndloop()
|
2102
2103
|
{
|
2103
|
-
|
2104
|
-
|
2104
|
+
cumo_id_cast = rb_intern("cast");
|
2105
|
+
cumo_id_extract = rb_intern("extract");
|
2105
2106
|
}
|