numo-narray-alt 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +14 -0
- data/LICENSE +30 -0
- data/README.md +71 -0
- data/Rakefile +24 -0
- data/ext/numo/narray/SFMT-params.h +97 -0
- data/ext/numo/narray/SFMT-params19937.h +48 -0
- data/ext/numo/narray/SFMT.c +602 -0
- data/ext/numo/narray/SFMT.h +147 -0
- data/ext/numo/narray/array.c +575 -0
- data/ext/numo/narray/data.c +958 -0
- data/ext/numo/narray/extconf.rb +84 -0
- data/ext/numo/narray/index.c +1092 -0
- data/ext/numo/narray/kwargs.c +142 -0
- data/ext/numo/narray/math.c +133 -0
- data/ext/numo/narray/narray.c +1976 -0
- data/ext/numo/narray/narray.def +28 -0
- data/ext/numo/narray/ndloop.c +1840 -0
- data/ext/numo/narray/numo/compat.h +23 -0
- data/ext/numo/narray/numo/intern.h +115 -0
- data/ext/numo/narray/numo/narray.h +480 -0
- data/ext/numo/narray/numo/ndloop.h +93 -0
- data/ext/numo/narray/numo/template.h +149 -0
- data/ext/numo/narray/numo/types/bit.h +38 -0
- data/ext/numo/narray/numo/types/complex.h +404 -0
- data/ext/numo/narray/numo/types/complex_macro.h +384 -0
- data/ext/numo/narray/numo/types/dcomplex.h +42 -0
- data/ext/numo/narray/numo/types/dfloat.h +44 -0
- data/ext/numo/narray/numo/types/float_def.h +34 -0
- data/ext/numo/narray/numo/types/float_macro.h +202 -0
- data/ext/numo/narray/numo/types/int16.h +27 -0
- data/ext/numo/narray/numo/types/int32.h +23 -0
- data/ext/numo/narray/numo/types/int64.h +23 -0
- data/ext/numo/narray/numo/types/int8.h +23 -0
- data/ext/numo/narray/numo/types/int_macro.h +66 -0
- data/ext/numo/narray/numo/types/real_accum.h +481 -0
- data/ext/numo/narray/numo/types/robj_macro.h +78 -0
- data/ext/numo/narray/numo/types/robject.h +25 -0
- data/ext/numo/narray/numo/types/scomplex.h +42 -0
- data/ext/numo/narray/numo/types/sfloat.h +45 -0
- data/ext/numo/narray/numo/types/uint16.h +24 -0
- data/ext/numo/narray/numo/types/uint32.h +20 -0
- data/ext/numo/narray/numo/types/uint64.h +20 -0
- data/ext/numo/narray/numo/types/uint8.h +20 -0
- data/ext/numo/narray/numo/types/uint_macro.h +57 -0
- data/ext/numo/narray/numo/types/xint_macro.h +166 -0
- data/ext/numo/narray/rand.c +40 -0
- data/ext/numo/narray/src/t_bit.c +3236 -0
- data/ext/numo/narray/src/t_dcomplex.c +6776 -0
- data/ext/numo/narray/src/t_dfloat.c +9417 -0
- data/ext/numo/narray/src/t_int16.c +5757 -0
- data/ext/numo/narray/src/t_int32.c +5757 -0
- data/ext/numo/narray/src/t_int64.c +5759 -0
- data/ext/numo/narray/src/t_int8.c +5355 -0
- data/ext/numo/narray/src/t_robject.c +5567 -0
- data/ext/numo/narray/src/t_scomplex.c +6731 -0
- data/ext/numo/narray/src/t_sfloat.c +9374 -0
- data/ext/numo/narray/src/t_uint16.c +5753 -0
- data/ext/numo/narray/src/t_uint32.c +5753 -0
- data/ext/numo/narray/src/t_uint64.c +5755 -0
- data/ext/numo/narray/src/t_uint8.c +5351 -0
- data/ext/numo/narray/step.c +266 -0
- data/ext/numo/narray/struct.c +814 -0
- data/lib/numo/narray/extra.rb +1266 -0
- data/lib/numo/narray.rb +4 -0
- metadata +106 -0
@@ -0,0 +1,1840 @@
|
|
1
|
+
/*
|
2
|
+
ndloop.c
|
3
|
+
Ruby/Numo::NArray - Numerical Array class for Ruby
|
4
|
+
Copyright (C) 1999-2020 Masahiro TANAKA
|
5
|
+
*/
|
6
|
+
#include <ruby.h>
|
7
|
+
|
8
|
+
#include "numo/narray.h"
|
9
|
+
|
10
|
+
#if 0
|
11
|
+
#define DBG(x) x
|
12
|
+
#else
|
13
|
+
#define DBG(x)
|
14
|
+
#endif
|
15
|
+
|
16
|
+
#ifdef HAVE_STDARG_PROTOTYPES
|
17
|
+
#include <stdarg.h>
|
18
|
+
#define va_init_list(a, b) va_start(a, b)
|
19
|
+
#else
|
20
|
+
#include <varargs.h>
|
21
|
+
#define va_init_list(a, b) va_start(a)
|
22
|
+
#endif
|
23
|
+
|
24
|
+
typedef struct NA_BUFFER_COPY {
|
25
|
+
int ndim;
|
26
|
+
size_t elmsz;
|
27
|
+
size_t* n;
|
28
|
+
char* src_ptr;
|
29
|
+
char* buf_ptr;
|
30
|
+
na_loop_iter_t* src_iter;
|
31
|
+
na_loop_iter_t* buf_iter;
|
32
|
+
} na_buffer_copy_t;
|
33
|
+
|
34
|
+
typedef struct NA_LOOP_XARGS {
|
35
|
+
na_loop_iter_t* iter; // moved from na_loop_t
|
36
|
+
na_buffer_copy_t* bufcp; // copy data to buffer
|
37
|
+
int flag; // NDL_READ NDL_WRITE
|
38
|
+
bool free_user_iter; // alloc LARG(lp,j).iter=lp->xargs[j].iter
|
39
|
+
} na_loop_xargs_t;
|
40
|
+
|
41
|
+
typedef struct NA_MD_LOOP {
|
42
|
+
int narg;
|
43
|
+
int nin;
|
44
|
+
int ndim; // n of total dimension
|
45
|
+
unsigned int copy_flag; // set i-th bit if i-th arg is cast
|
46
|
+
void* ptr; // memory for n
|
47
|
+
na_loop_iter_t* iter_ptr; // memory for iter
|
48
|
+
size_t* n; // n of elements for each dim
|
49
|
+
na_loop_t user; // loop in user function
|
50
|
+
na_loop_xargs_t* xargs; // extra data for each arg
|
51
|
+
int writeback; // write back result to i-th arg
|
52
|
+
int init_aidx; // index of initializer argument
|
53
|
+
int reduce_dim;
|
54
|
+
int* trans_map;
|
55
|
+
VALUE vargs;
|
56
|
+
VALUE reduce;
|
57
|
+
VALUE loop_opt;
|
58
|
+
ndfunc_t* ndfunc;
|
59
|
+
void (*loop_func)(ndfunc_t*, struct NA_MD_LOOP*);
|
60
|
+
} na_md_loop_t;
|
61
|
+
|
62
|
+
#define LARG(lp, iarg) ((lp)->user.args[iarg])
|
63
|
+
#define LITER(lp, idim, iarg) ((lp)->xargs[iarg].iter[idim])
|
64
|
+
#define LITER_SRC(lp, idim) ((lp)->src_iter[idim])
|
65
|
+
#define LBUFCP(lp, j) ((lp)->xargs[j].bufcp)
|
66
|
+
|
67
|
+
#define CASTABLE(t) (RTEST(t) && (t) != OVERWRITE)
|
68
|
+
|
69
|
+
#define NDL_READ 1
|
70
|
+
#define NDL_WRITE 2
|
71
|
+
#define NDL_READ_WRITE (NDL_READ | NDL_WRITE)
|
72
|
+
|
73
|
+
static ID id_cast;
|
74
|
+
static ID id_extract;
|
75
|
+
|
76
|
+
static inline VALUE nary_type_s_cast(VALUE type, VALUE obj) {
|
77
|
+
return rb_funcall(type, id_cast, 1, obj);
|
78
|
+
}
|
79
|
+
|
80
|
+
static void print_ndfunc(ndfunc_t* nf) {
|
81
|
+
volatile VALUE t;
|
82
|
+
int i, k;
|
83
|
+
printf("ndfunc_t = 0x%" SZF "x {\n", (size_t)nf);
|
84
|
+
printf(" func = 0x%" SZF "x\n", (size_t)nf->func);
|
85
|
+
printf(" flag = 0x%" SZF "x\n", (size_t)nf->flag);
|
86
|
+
printf(" nin = %d\n", nf->nin);
|
87
|
+
printf(" nout = %d\n", nf->nout);
|
88
|
+
printf(" ain = 0x%" SZF "x\n", (size_t)nf->ain);
|
89
|
+
for (i = 0; i < nf->nin; i++) {
|
90
|
+
t = rb_inspect(nf->ain[i].type);
|
91
|
+
printf(" ain[%d].type = %s\n", i, StringValuePtr(t));
|
92
|
+
printf(" ain[%d].dim = %d\n", i, nf->ain[i].dim);
|
93
|
+
}
|
94
|
+
printf(" aout = 0x%" SZF "x\n", (size_t)nf->aout);
|
95
|
+
for (i = 0; i < nf->nout; i++) {
|
96
|
+
t = rb_inspect(nf->aout[i].type);
|
97
|
+
printf(" aout[%d].type = %s\n", i, StringValuePtr(t));
|
98
|
+
printf(" aout[%d].dim = %d\n", i, nf->aout[i].dim);
|
99
|
+
for (k = 0; k < nf->aout[i].dim; k++) {
|
100
|
+
printf(" aout[%d].shape[%d] = %" SZF "u\n", i, k, nf->aout[i].shape[k]);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
printf("}\n");
|
104
|
+
}
|
105
|
+
|
106
|
+
static void print_ndloop(na_md_loop_t* lp) {
|
107
|
+
int i, j, nd;
|
108
|
+
printf("na_md_loop_t = 0x%" SZF "x {\n", (size_t)lp);
|
109
|
+
printf(" narg = %d\n", lp->narg);
|
110
|
+
printf(" nin = %d\n", lp->nin);
|
111
|
+
printf(" ndim = %d\n", lp->ndim);
|
112
|
+
printf(" copy_flag = %x\n", lp->copy_flag);
|
113
|
+
printf(" writeback = %d\n", lp->writeback);
|
114
|
+
printf(" init_aidx = %d\n", lp->init_aidx);
|
115
|
+
printf(" reduce_dim = %d\n", lp->reduce_dim);
|
116
|
+
printf(" trans_map = 0x%" SZF "x\n", (size_t)lp->trans_map);
|
117
|
+
nd = lp->ndim + lp->user.ndim;
|
118
|
+
for (i = 0; i < nd; i++) {
|
119
|
+
printf(" trans_map[%d] = %d\n", i, lp->trans_map[i]);
|
120
|
+
}
|
121
|
+
printf(" n = 0x%" SZF "x\n", (size_t)lp->n);
|
122
|
+
nd = lp->ndim + lp->user.ndim;
|
123
|
+
for (i = 0; i <= lp->ndim; i++) {
|
124
|
+
printf(" n[%d] = %" SZF "u\n", i, lp->n[i]);
|
125
|
+
}
|
126
|
+
printf(" user.n = 0x%" SZF "x\n", (size_t)lp->user.n);
|
127
|
+
if (lp->user.n) {
|
128
|
+
for (i = 0; i <= lp->user.ndim; i++) {
|
129
|
+
printf(" user.n[%d] = %" SZF "u\n", i, lp->user.n[i]);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
printf(" xargs = 0x%" SZF "x\n", (size_t)lp->xargs);
|
133
|
+
printf(" iter_ptr = 0x%" SZF "x\n", (size_t)lp->iter_ptr);
|
134
|
+
printf(" user.narg = %d\n", lp->user.narg);
|
135
|
+
printf(" user.ndim = %d\n", lp->user.ndim);
|
136
|
+
printf(" user.args = 0x%" SZF "x\n", (size_t)lp->user.args);
|
137
|
+
for (j = 0; j < lp->narg; j++) {
|
138
|
+
}
|
139
|
+
printf(" user.opt_ptr = 0x%" SZF "x\n", (size_t)lp->user.opt_ptr);
|
140
|
+
if (lp->reduce == Qnil) {
|
141
|
+
printf(" reduce = nil\n");
|
142
|
+
} else {
|
143
|
+
printf(" reduce = 0x%x\n", NUM2INT(lp->reduce));
|
144
|
+
}
|
145
|
+
for (j = 0; j < lp->narg; j++) {
|
146
|
+
printf("--user.args[%d]--\n", j);
|
147
|
+
printf(" user.args[%d].ptr = 0x%" SZF "x\n", j, (size_t)LARG(lp, j).ptr);
|
148
|
+
printf(" user.args[%d].elmsz = %" SZF "d\n", j, LARG(lp, j).elmsz);
|
149
|
+
printf(" user.args[%d].value = 0x%" PRI_VALUE_PREFIX "x\n", j, LARG(lp, j).value);
|
150
|
+
printf(" user.args[%d].ndim = %d\n", j, LARG(lp, j).ndim);
|
151
|
+
printf(" user.args[%d].shape = 0x%" SZF "x\n", j, (size_t)LARG(lp, j).shape);
|
152
|
+
if (LARG(lp, j).shape) {
|
153
|
+
for (i = 0; i < LARG(lp, j).ndim; i++) {
|
154
|
+
printf(" user.args[%d].shape[%d] = %" SZF "d\n", j, i, LARG(lp, j).shape[i]);
|
155
|
+
}
|
156
|
+
}
|
157
|
+
printf(" user.args[%d].iter = 0x%" SZF "x\n", j, (size_t)lp->user.args[j].iter);
|
158
|
+
if (lp->user.args[j].iter) {
|
159
|
+
for (i = 0; i < lp->user.ndim; i++) {
|
160
|
+
printf(" &user.args[%d].iter[%d] = 0x%" SZF "x\n", j, i, (size_t)&lp->user.args[j].iter[i]);
|
161
|
+
printf(" user.args[%d].iter[%d].pos = %" SZF "u\n", j, i, lp->user.args[j].iter[i].pos);
|
162
|
+
printf(" user.args[%d].iter[%d].step = %" SZF "d\n", j, i, lp->user.args[j].iter[i].step);
|
163
|
+
printf(" user.args[%d].iter[%d].idx = 0x%" SZF "x\n", j, i, (size_t)lp->user.args[j].iter[i].idx);
|
164
|
+
}
|
165
|
+
}
|
166
|
+
//
|
167
|
+
printf(" xargs[%d].flag = %d\n", j, lp->xargs[j].flag);
|
168
|
+
printf(" xargs[%d].free_user_iter = %d\n", j, lp->xargs[j].free_user_iter);
|
169
|
+
for (i = 0; i <= nd; i++) {
|
170
|
+
printf(" &xargs[%d].iter[%d] = 0x%" SZF "x\n", j, i, (size_t)&LITER(lp, i, j));
|
171
|
+
printf(" xargs[%d].iter[%d].pos = %" SZF "u\n", j, i, LITER(lp, i, j).pos);
|
172
|
+
printf(" xargs[%d].iter[%d].step = %" SZF "d\n", j, i, LITER(lp, i, j).step);
|
173
|
+
printf(" xargs[%d].iter[%d].idx = 0x%" SZF "x\n", j, i, (size_t)LITER(lp, i, j).idx);
|
174
|
+
}
|
175
|
+
printf(" xargs[%d].bufcp = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp);
|
176
|
+
if (lp->xargs[j].bufcp) {
|
177
|
+
printf(" xargs[%d].bufcp->ndim = %d\n", j, lp->xargs[j].bufcp->ndim);
|
178
|
+
printf(" xargs[%d].bufcp->elmsz = %" SZF "d\n", j, lp->xargs[j].bufcp->elmsz);
|
179
|
+
printf(" xargs[%d].bufcp->n = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->n);
|
180
|
+
printf(" xargs[%d].bufcp->src_ptr = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->src_ptr);
|
181
|
+
printf(" xargs[%d].bufcp->buf_ptr = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->buf_ptr);
|
182
|
+
printf(" xargs[%d].bufcp->src_iter = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->src_iter);
|
183
|
+
printf(" xargs[%d].bufcp->buf_iter = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->buf_iter);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
printf("}\n");
|
187
|
+
}
|
188
|
+
|
189
|
+
static unsigned int ndloop_func_loop_spec(ndfunc_t* nf, int user_ndim) {
|
190
|
+
unsigned int f = 0;
|
191
|
+
// If user function supports LOOP
|
192
|
+
if (user_ndim > 0 || NDF_TEST(nf, NDF_HAS_LOOP)) {
|
193
|
+
if (!NDF_TEST(nf, NDF_STRIDE_LOOP)) {
|
194
|
+
f |= 1;
|
195
|
+
}
|
196
|
+
if (!NDF_TEST(nf, NDF_INDEX_LOOP)) {
|
197
|
+
f |= 2;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
return f;
|
201
|
+
}
|
202
|
+
|
203
|
+
static int ndloop_cast_required(VALUE type, VALUE value) {
|
204
|
+
return CASTABLE(type) && type != rb_obj_class(value);
|
205
|
+
}
|
206
|
+
|
207
|
+
static int ndloop_castable_type(VALUE type) {
|
208
|
+
return rb_obj_is_kind_of(type, rb_cClass) && RTEST(rb_class_inherited_p(type, cNArray));
|
209
|
+
}
|
210
|
+
|
211
|
+
static void ndloop_cast_error(VALUE type, VALUE value) {
|
212
|
+
VALUE x = rb_inspect(type);
|
213
|
+
char* s = StringValueCStr(x);
|
214
|
+
rb_bug("fail cast from %s to %s", rb_obj_classname(value), s);
|
215
|
+
rb_raise(rb_eTypeError, "fail cast from %s to %s", rb_obj_classname(value), s);
|
216
|
+
}
|
217
|
+
|
218
|
+
// convert input argeuments given by RARRAY_PTR(args)[j]
|
219
|
+
// to type specified by nf->args[j].type
|
220
|
+
// returns copy_flag where nth-bit is set if nth argument is converted.
|
221
|
+
static unsigned int ndloop_cast_args(ndfunc_t* nf, VALUE args) {
|
222
|
+
int j;
|
223
|
+
unsigned int copy_flag = 0;
|
224
|
+
VALUE type, value;
|
225
|
+
|
226
|
+
for (j = 0; j < nf->nin; j++) {
|
227
|
+
|
228
|
+
type = nf->ain[j].type;
|
229
|
+
if (TYPE(type) == T_SYMBOL) continue;
|
230
|
+
value = RARRAY_AREF(args, j);
|
231
|
+
if (!ndloop_cast_required(type, value)) continue;
|
232
|
+
|
233
|
+
if (ndloop_castable_type(type)) {
|
234
|
+
RARRAY_ASET(args, j, nary_type_s_cast(type, value));
|
235
|
+
copy_flag |= 1 << j;
|
236
|
+
} else {
|
237
|
+
ndloop_cast_error(type, value);
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
RB_GC_GUARD(type);
|
242
|
+
RB_GC_GUARD(value);
|
243
|
+
return copy_flag;
|
244
|
+
}
|
245
|
+
|
246
|
+
static void ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, na_md_loop_t* lp) {
|
247
|
+
if (type == sym_reduce) {
|
248
|
+
lp->reduce = value;
|
249
|
+
} else if (type == sym_option) {
|
250
|
+
lp->user.option = value;
|
251
|
+
} else if (type == sym_loop_opt) {
|
252
|
+
lp->loop_opt = value;
|
253
|
+
} else if (type == sym_init) {
|
254
|
+
lp->init_aidx = at;
|
255
|
+
} else {
|
256
|
+
rb_bug("ndloop parse_options: unknown type");
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
260
|
+
static inline int max2(int x, int y) {
|
261
|
+
return x > y ? x : y;
|
262
|
+
}
|
263
|
+
|
264
|
+
static void ndloop_find_max_dimension(na_md_loop_t* lp, ndfunc_t* nf, VALUE args) {
|
265
|
+
int j;
|
266
|
+
int nin = 0; // number of input objects (except for symbols)
|
267
|
+
int user_nd = 0; // max dimension of user function
|
268
|
+
int loop_nd = 0; // max dimension of md-loop
|
269
|
+
|
270
|
+
for (j = 0; j < RARRAY_LEN(args); j++) {
|
271
|
+
VALUE t = nf->ain[j].type;
|
272
|
+
VALUE v = RARRAY_AREF(args, j);
|
273
|
+
if (TYPE(t) == T_SYMBOL) {
|
274
|
+
ndloop_handle_symbol_in_ain(t, v, j, lp);
|
275
|
+
} else {
|
276
|
+
nin++;
|
277
|
+
user_nd = max2(user_nd, nf->ain[j].dim);
|
278
|
+
if (IsNArray(v)) loop_nd = max2(loop_nd, RNARRAY_NDIM(v) - nf->ain[j].dim);
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
lp->narg = lp->user.narg = nin + nf->nout;
|
283
|
+
lp->nin = nin;
|
284
|
+
lp->ndim = loop_nd;
|
285
|
+
lp->user.ndim = user_nd;
|
286
|
+
}
|
287
|
+
|
288
|
+
/*
|
289
|
+
user-dimension:
|
290
|
+
user_nd = MAX( nf->args[j].dim )
|
291
|
+
|
292
|
+
user-support dimension:
|
293
|
+
|
294
|
+
loop dimension:
|
295
|
+
loop_nd
|
296
|
+
*/
|
297
|
+
|
298
|
+
static void ndloop_alloc(na_md_loop_t* lp, ndfunc_t* nf, VALUE args, void* opt_ptr, unsigned int copy_flag,
|
299
|
+
void (*loop_func)(ndfunc_t*, na_md_loop_t*)) {
|
300
|
+
int i, j;
|
301
|
+
int narg;
|
302
|
+
int max_nd;
|
303
|
+
|
304
|
+
char* buf;
|
305
|
+
size_t n1, n2, n3, n4, n5;
|
306
|
+
|
307
|
+
long args_len;
|
308
|
+
|
309
|
+
na_loop_iter_t* iter;
|
310
|
+
|
311
|
+
int trans_dim;
|
312
|
+
unsigned int f;
|
313
|
+
|
314
|
+
args_len = RARRAY_LEN(args);
|
315
|
+
|
316
|
+
if (args_len != nf->nin) {
|
317
|
+
rb_bug("wrong number of arguments for ndfunc (%lu for %d)", args_len, nf->nin);
|
318
|
+
}
|
319
|
+
|
320
|
+
lp->vargs = args;
|
321
|
+
lp->ndfunc = nf;
|
322
|
+
lp->loop_func = loop_func;
|
323
|
+
lp->copy_flag = copy_flag;
|
324
|
+
|
325
|
+
lp->reduce = Qnil;
|
326
|
+
lp->user.option = Qnil;
|
327
|
+
lp->user.opt_ptr = opt_ptr;
|
328
|
+
lp->user.err_type = Qfalse;
|
329
|
+
lp->loop_opt = Qnil;
|
330
|
+
lp->writeback = -1;
|
331
|
+
lp->init_aidx = -1;
|
332
|
+
|
333
|
+
lp->ptr = NULL;
|
334
|
+
lp->user.n = NULL;
|
335
|
+
|
336
|
+
ndloop_find_max_dimension(lp, nf, args);
|
337
|
+
narg = lp->nin + nf->nout;
|
338
|
+
max_nd = lp->ndim + lp->user.ndim;
|
339
|
+
|
340
|
+
n1 = sizeof(size_t) * (max_nd + 1);
|
341
|
+
n2 = sizeof(na_loop_xargs_t) * narg;
|
342
|
+
n2 = ((n2 - 1) / 8 + 1) * 8;
|
343
|
+
n3 = sizeof(na_loop_args_t) * narg;
|
344
|
+
n3 = ((n3 - 1) / 8 + 1) * 8;
|
345
|
+
n4 = sizeof(na_loop_iter_t) * narg * (max_nd + 1);
|
346
|
+
n4 = ((n4 - 1) / 8 + 1) * 8;
|
347
|
+
n5 = sizeof(int) * (max_nd + 1);
|
348
|
+
|
349
|
+
lp->ptr = buf = (char*)xmalloc(n1 + n2 + n3 + n4 + n5);
|
350
|
+
lp->n = (size_t*)buf;
|
351
|
+
buf += n1;
|
352
|
+
lp->xargs = (na_loop_xargs_t*)buf;
|
353
|
+
buf += n2;
|
354
|
+
lp->user.args = (na_loop_args_t*)buf;
|
355
|
+
buf += n3;
|
356
|
+
lp->iter_ptr = iter = (na_loop_iter_t*)buf;
|
357
|
+
buf += n4;
|
358
|
+
lp->trans_map = (int*)buf;
|
359
|
+
|
360
|
+
for (j = 0; j < narg; j++) {
|
361
|
+
LARG(lp, j).value = Qnil;
|
362
|
+
LARG(lp, j).iter = NULL;
|
363
|
+
LARG(lp, j).shape = NULL;
|
364
|
+
LARG(lp, j).ndim = 0;
|
365
|
+
lp->xargs[j].iter = &(iter[(max_nd + 1) * j]);
|
366
|
+
lp->xargs[j].bufcp = NULL;
|
367
|
+
lp->xargs[j].flag = (j < lp->nin) ? NDL_READ : NDL_WRITE;
|
368
|
+
lp->xargs[j].free_user_iter = 0;
|
369
|
+
}
|
370
|
+
|
371
|
+
for (i = 0; i <= max_nd; i++) {
|
372
|
+
lp->n[i] = 1;
|
373
|
+
for (j = 0; j < narg; j++) {
|
374
|
+
LITER(lp, i, j).pos = 0;
|
375
|
+
LITER(lp, i, j).step = 0;
|
376
|
+
LITER(lp, i, j).idx = NULL;
|
377
|
+
}
|
378
|
+
}
|
379
|
+
|
380
|
+
// transpose reduce-dimensions to last dimensions
|
381
|
+
// array loop
|
382
|
+
// [*,+,*,+,*] => [*,*,*,+,+]
|
383
|
+
// trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
|
384
|
+
if (NDF_TEST(nf, NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
|
385
|
+
trans_dim = 0;
|
386
|
+
for (i = 0; i < max_nd; i++) {
|
387
|
+
if (na_test_reduce(lp->reduce, i)) {
|
388
|
+
lp->trans_map[i] = -1;
|
389
|
+
} else {
|
390
|
+
lp->trans_map[i] = trans_dim++;
|
391
|
+
}
|
392
|
+
}
|
393
|
+
j = trans_dim;
|
394
|
+
for (i = 0; i < max_nd; i++) {
|
395
|
+
if (lp->trans_map[i] == -1) {
|
396
|
+
lp->trans_map[i] = j++;
|
397
|
+
}
|
398
|
+
}
|
399
|
+
lp->reduce_dim = max_nd - trans_dim;
|
400
|
+
f = 0;
|
401
|
+
for (i = trans_dim; i < max_nd; i++) {
|
402
|
+
f |= 1 << i;
|
403
|
+
}
|
404
|
+
lp->reduce = INT2FIX(f);
|
405
|
+
} else {
|
406
|
+
for (i = 0; i < max_nd; i++) {
|
407
|
+
lp->trans_map[i] = i;
|
408
|
+
}
|
409
|
+
lp->reduce_dim = 0;
|
410
|
+
}
|
411
|
+
}
|
412
|
+
|
413
|
+
static VALUE ndloop_release(VALUE vlp) {
|
414
|
+
int j;
|
415
|
+
VALUE v;
|
416
|
+
na_md_loop_t* lp = (na_md_loop_t*)(vlp);
|
417
|
+
|
418
|
+
for (j = 0; j < lp->narg; j++) {
|
419
|
+
v = LARG(lp, j).value;
|
420
|
+
if (IsNArray(v)) {
|
421
|
+
na_release_lock(v);
|
422
|
+
}
|
423
|
+
}
|
424
|
+
for (j = 0; j < lp->narg; j++) {
|
425
|
+
// printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
|
426
|
+
if (lp->xargs[j].bufcp) {
|
427
|
+
xfree(lp->xargs[j].bufcp->buf_iter);
|
428
|
+
xfree(lp->xargs[j].bufcp->buf_ptr);
|
429
|
+
xfree(lp->xargs[j].bufcp->n);
|
430
|
+
xfree(lp->xargs[j].bufcp);
|
431
|
+
if (lp->xargs[j].free_user_iter) {
|
432
|
+
xfree(LARG(lp, j).iter);
|
433
|
+
}
|
434
|
+
}
|
435
|
+
}
|
436
|
+
xfree(lp->ptr);
|
437
|
+
return Qnil;
|
438
|
+
}
|
439
|
+
|
440
|
+
/*
|
441
|
+
set lp->n[i] (shape of n-d iteration) here
|
442
|
+
*/
|
443
|
+
static void ndloop_check_shape(na_md_loop_t* lp, int nf_dim, narray_t* na) {
|
444
|
+
int i, k;
|
445
|
+
size_t n;
|
446
|
+
int dim_beg;
|
447
|
+
|
448
|
+
dim_beg = lp->ndim + nf_dim - na->ndim;
|
449
|
+
|
450
|
+
for (k = na->ndim - nf_dim - 1; k >= 0; k--) {
|
451
|
+
i = lp->trans_map[k + dim_beg];
|
452
|
+
n = na->shape[k];
|
453
|
+
// if n==1 then repeat this dimension
|
454
|
+
if (n != 1) {
|
455
|
+
if (lp->n[i] == 1) {
|
456
|
+
lp->n[i] = n;
|
457
|
+
} else if (lp->n[i] != n) {
|
458
|
+
// inconsistent array shape
|
459
|
+
rb_raise(nary_eShapeError, "shape1[%d](=%" SZF "u) != shape2[%d](=%" SZF "u)", i, lp->n[i], k, n);
|
460
|
+
}
|
461
|
+
}
|
462
|
+
}
|
463
|
+
}
|
464
|
+
|
465
|
+
/*
|
466
|
+
na->shape[i] == lp->n[ dim_map[i] ]
|
467
|
+
*/
|
468
|
+
static void ndloop_set_stepidx(na_md_loop_t* lp, int j, VALUE vna, int* dim_map, int rwflag) {
|
469
|
+
size_t n, s;
|
470
|
+
int i, k, nd;
|
471
|
+
stridx_t sdx;
|
472
|
+
narray_t* na;
|
473
|
+
|
474
|
+
LARG(lp, j).value = vna;
|
475
|
+
LARG(lp, j).elmsz = nary_element_stride(vna);
|
476
|
+
if (rwflag == NDL_READ) {
|
477
|
+
LARG(lp, j).ptr = na_get_pointer_for_read(vna);
|
478
|
+
} else if (rwflag == NDL_WRITE) {
|
479
|
+
LARG(lp, j).ptr = na_get_pointer_for_write(vna);
|
480
|
+
} else if (rwflag == NDL_READ_WRITE) {
|
481
|
+
LARG(lp, j).ptr = na_get_pointer_for_read_write(vna);
|
482
|
+
} else {
|
483
|
+
rb_bug("invalid value for read-write flag");
|
484
|
+
}
|
485
|
+
GetNArray(vna, na);
|
486
|
+
nd = LARG(lp, j).ndim;
|
487
|
+
|
488
|
+
switch (NA_TYPE(na)) {
|
489
|
+
case NARRAY_DATA_T:
|
490
|
+
if (NA_DATA_PTR(na) == NULL && NA_SIZE(na) > 0) {
|
491
|
+
rb_bug("cannot read no-data NArray");
|
492
|
+
rb_raise(rb_eRuntimeError, "cannot read no-data NArray");
|
493
|
+
}
|
494
|
+
// through
|
495
|
+
case NARRAY_FILEMAP_T:
|
496
|
+
s = LARG(lp, j).elmsz;
|
497
|
+
for (k = na->ndim; k--;) {
|
498
|
+
n = na->shape[k];
|
499
|
+
if (n > 1 || nd > 0) {
|
500
|
+
i = dim_map[k];
|
501
|
+
// printf("n=%d k=%d i=%d\n",n,k,i);
|
502
|
+
LITER(lp, i, j).step = s;
|
503
|
+
// LITER(lp,i,j).idx = NULL;
|
504
|
+
}
|
505
|
+
s *= n;
|
506
|
+
nd--;
|
507
|
+
}
|
508
|
+
LITER(lp, 0, j).pos = 0;
|
509
|
+
break;
|
510
|
+
case NARRAY_VIEW_T:
|
511
|
+
LITER(lp, 0, j).pos = NA_VIEW_OFFSET(na);
|
512
|
+
for (k = 0; k < na->ndim; k++) {
|
513
|
+
n = na->shape[k];
|
514
|
+
sdx = NA_VIEW_STRIDX(na)[k];
|
515
|
+
if (n > 1 || nd > 0) {
|
516
|
+
i = dim_map[k];
|
517
|
+
if (SDX_IS_INDEX(sdx)) {
|
518
|
+
LITER(lp, i, j).step = 0;
|
519
|
+
LITER(lp, i, j).idx = SDX_GET_INDEX(sdx);
|
520
|
+
} else {
|
521
|
+
LITER(lp, i, j).step = SDX_GET_STRIDE(sdx);
|
522
|
+
// LITER(lp,i,j).idx = NULL;
|
523
|
+
}
|
524
|
+
} else if (n == 1) {
|
525
|
+
if (SDX_IS_INDEX(sdx)) {
|
526
|
+
LITER(lp, 0, j).pos += SDX_GET_INDEX(sdx)[0];
|
527
|
+
}
|
528
|
+
}
|
529
|
+
nd--;
|
530
|
+
}
|
531
|
+
break;
|
532
|
+
default:
|
533
|
+
rb_bug("invalid narray internal type");
|
534
|
+
}
|
535
|
+
}
|
536
|
+
|
537
|
+
static void ndloop_init_args(ndfunc_t* nf, na_md_loop_t* lp, VALUE args) {
|
538
|
+
int i, j;
|
539
|
+
VALUE v;
|
540
|
+
narray_t* na;
|
541
|
+
int nf_dim;
|
542
|
+
int dim_beg;
|
543
|
+
int* dim_map;
|
544
|
+
int max_nd = lp->ndim + lp->user.ndim;
|
545
|
+
int flag;
|
546
|
+
size_t s;
|
547
|
+
|
548
|
+
/*
|
549
|
+
na->shape[i] == lp->n[ dim_map[i] ]
|
550
|
+
*/
|
551
|
+
dim_map = ALLOCA_N(int, max_nd);
|
552
|
+
|
553
|
+
// input arguments
|
554
|
+
for (j = 0; j < nf->nin; j++) {
|
555
|
+
if (TYPE(nf->ain[j].type) == T_SYMBOL) {
|
556
|
+
continue;
|
557
|
+
}
|
558
|
+
v = RARRAY_AREF(args, j);
|
559
|
+
if (IsNArray(v)) {
|
560
|
+
// set LARG(lp,j) with v
|
561
|
+
GetNArray(v, na);
|
562
|
+
nf_dim = nf->ain[j].dim;
|
563
|
+
if (nf_dim > na->ndim) {
|
564
|
+
rb_raise(nary_eDimensionError,
|
565
|
+
"requires >= %d-dimensioal array "
|
566
|
+
"while %d-dimensional array is given",
|
567
|
+
nf_dim, na->ndim);
|
568
|
+
}
|
569
|
+
ndloop_check_shape(lp, nf_dim, na);
|
570
|
+
dim_beg = lp->ndim + nf->ain[j].dim - na->ndim;
|
571
|
+
for (i = 0; i < na->ndim; i++) {
|
572
|
+
dim_map[i] = lp->trans_map[i + dim_beg];
|
573
|
+
// printf("dim_map[%d]=%d na->shape[%d]=%d\n",i,dim_map[i],i,na->shape[i]);
|
574
|
+
}
|
575
|
+
if (nf->ain[j].type == OVERWRITE) {
|
576
|
+
lp->xargs[j].flag = flag = NDL_WRITE;
|
577
|
+
} else {
|
578
|
+
lp->xargs[j].flag = flag = NDL_READ;
|
579
|
+
}
|
580
|
+
LARG(lp, j).ndim = nf_dim;
|
581
|
+
ndloop_set_stepidx(lp, j, v, dim_map, flag);
|
582
|
+
if (nf_dim > 0) {
|
583
|
+
LARG(lp, j).shape = na->shape + (na->ndim - nf_dim);
|
584
|
+
}
|
585
|
+
} else if (TYPE(v) == T_ARRAY) {
|
586
|
+
LARG(lp, j).value = v;
|
587
|
+
LARG(lp, j).elmsz = sizeof(VALUE);
|
588
|
+
LARG(lp, j).ptr = NULL;
|
589
|
+
for (i = 0; i <= max_nd; i++) {
|
590
|
+
LITER(lp, i, j).step = 1;
|
591
|
+
}
|
592
|
+
}
|
593
|
+
}
|
594
|
+
// check whether # of element is zero
|
595
|
+
for (s = 1, i = 0; i <= max_nd; i++) {
|
596
|
+
s *= lp->n[i];
|
597
|
+
}
|
598
|
+
if (s == 0) {
|
599
|
+
for (i = 0; i <= max_nd; i++) {
|
600
|
+
lp->n[i] = 0;
|
601
|
+
}
|
602
|
+
}
|
603
|
+
}
|
604
|
+
|
605
|
+
static int ndloop_check_inplace(VALUE type, int na_ndim, size_t* na_shape, VALUE v) {
|
606
|
+
int i;
|
607
|
+
narray_t* na;
|
608
|
+
|
609
|
+
// type check
|
610
|
+
if (type != rb_obj_class(v)) {
|
611
|
+
return 0;
|
612
|
+
}
|
613
|
+
GetNArray(v, na);
|
614
|
+
// shape check
|
615
|
+
if (na->ndim != na_ndim) {
|
616
|
+
return 0;
|
617
|
+
}
|
618
|
+
for (i = 0; i < na_ndim; i++) {
|
619
|
+
if (na_shape[i] != na->shape[i]) {
|
620
|
+
return 0;
|
621
|
+
}
|
622
|
+
}
|
623
|
+
// v is selected as output
|
624
|
+
return 1;
|
625
|
+
}
|
626
|
+
|
627
|
+
static VALUE ndloop_find_inplace(ndfunc_t* nf, na_md_loop_t* lp, VALUE type, int na_ndim, size_t* na_shape, VALUE args) {
|
628
|
+
int j;
|
629
|
+
VALUE v;
|
630
|
+
|
631
|
+
// find inplace
|
632
|
+
for (j = 0; j < nf->nin; j++) {
|
633
|
+
v = RARRAY_AREF(args, j);
|
634
|
+
if (IsNArray(v)) {
|
635
|
+
if (TEST_INPLACE(v)) {
|
636
|
+
if (ndloop_check_inplace(type, na_ndim, na_shape, v)) {
|
637
|
+
// if already copied, create outary and write-back
|
638
|
+
if (lp->copy_flag & (1 << j)) {
|
639
|
+
lp->writeback = j;
|
640
|
+
}
|
641
|
+
return v;
|
642
|
+
}
|
643
|
+
}
|
644
|
+
}
|
645
|
+
}
|
646
|
+
// find casted or copied input array
|
647
|
+
for (j = 0; j < nf->nin; j++) {
|
648
|
+
if (lp->copy_flag & (1 << j)) {
|
649
|
+
v = RARRAY_AREF(args, j);
|
650
|
+
if (ndloop_check_inplace(type, na_ndim, na_shape, v)) {
|
651
|
+
return v;
|
652
|
+
}
|
653
|
+
}
|
654
|
+
}
|
655
|
+
return Qnil;
|
656
|
+
}
|
657
|
+
|
658
|
+
static VALUE ndloop_get_arg_type(ndfunc_t* nf, VALUE args, VALUE t) {
|
659
|
+
int i;
|
660
|
+
|
661
|
+
// if type is FIXNUM, get the type of i-th argument
|
662
|
+
if (FIXNUM_P(t)) {
|
663
|
+
i = FIX2INT(t);
|
664
|
+
if (i < 0 || i >= nf->nin) {
|
665
|
+
rb_bug("invalid type: index (%d) out of # of args", i);
|
666
|
+
}
|
667
|
+
t = nf->ain[i].type;
|
668
|
+
// if i-th type is Qnil, get the type of i-th input value
|
669
|
+
if (!CASTABLE(t)) {
|
670
|
+
t = rb_obj_class(RARRAY_AREF(args, i));
|
671
|
+
}
|
672
|
+
}
|
673
|
+
return t;
|
674
|
+
}
|
675
|
+
|
676
|
+
static VALUE ndloop_set_output_narray(ndfunc_t* nf, na_md_loop_t* lp, int k, VALUE type, VALUE args) {
|
677
|
+
int i, j;
|
678
|
+
int na_ndim;
|
679
|
+
int lp_dim;
|
680
|
+
volatile VALUE v = Qnil;
|
681
|
+
size_t* na_shape;
|
682
|
+
int* dim_map;
|
683
|
+
int flag = NDL_READ_WRITE;
|
684
|
+
int nd;
|
685
|
+
int max_nd = lp->ndim + nf->aout[k].dim;
|
686
|
+
|
687
|
+
na_shape = ALLOCA_N(size_t, max_nd);
|
688
|
+
dim_map = ALLOCA_N(int, max_nd);
|
689
|
+
|
690
|
+
// printf("max_nd=%d lp->ndim=%d\n",max_nd,lp->ndim);
|
691
|
+
|
692
|
+
// md-loop shape
|
693
|
+
na_ndim = 0;
|
694
|
+
for (i = 0; i < lp->ndim; i++) {
|
695
|
+
// na_shape[i] == lp->n[lp->trans_map[i]]
|
696
|
+
lp_dim = lp->trans_map[i];
|
697
|
+
// printf("i=%d lp_dim=%d\n",i,lp_dim);
|
698
|
+
if (NDF_TEST(nf, NDF_CUM)) { // cumulate with shape kept
|
699
|
+
na_shape[na_ndim] = lp->n[lp_dim];
|
700
|
+
} else if (na_test_reduce(lp->reduce, lp_dim)) { // accumulate dimension
|
701
|
+
if (NDF_TEST(nf, NDF_KEEP_DIM)) {
|
702
|
+
na_shape[na_ndim] = 1; // leave it
|
703
|
+
} else {
|
704
|
+
continue; // delete dimension
|
705
|
+
}
|
706
|
+
} else {
|
707
|
+
na_shape[na_ndim] = lp->n[lp_dim];
|
708
|
+
}
|
709
|
+
// printf("i=%d lp_dim=%d na_shape[%d]=%ld\n",i,lp_dim,i,na_shape[i]);
|
710
|
+
dim_map[na_ndim++] = lp_dim;
|
711
|
+
// dim_map[lp_dim] = na_ndim++;
|
712
|
+
}
|
713
|
+
|
714
|
+
// user-specified shape
|
715
|
+
for (i = 0; i < nf->aout[k].dim; i++) {
|
716
|
+
na_shape[na_ndim] = nf->aout[k].shape[i];
|
717
|
+
dim_map[na_ndim++] = i + lp->ndim;
|
718
|
+
}
|
719
|
+
|
720
|
+
// find inplace from input arrays
|
721
|
+
if (k == 0 && NDF_TEST(nf, NDF_INPLACE)) {
|
722
|
+
v = ndloop_find_inplace(nf, lp, type, na_ndim, na_shape, args);
|
723
|
+
}
|
724
|
+
if (!RTEST(v)) {
|
725
|
+
// new object
|
726
|
+
v = nary_new(type, na_ndim, na_shape);
|
727
|
+
flag = NDL_WRITE;
|
728
|
+
}
|
729
|
+
|
730
|
+
j = lp->nin + k;
|
731
|
+
LARG(lp, j).ndim = nd = nf->aout[k].dim;
|
732
|
+
ndloop_set_stepidx(lp, j, v, dim_map, flag);
|
733
|
+
if (nd > 0) {
|
734
|
+
LARG(lp, j).shape = nf->aout[k].shape;
|
735
|
+
}
|
736
|
+
|
737
|
+
return v;
|
738
|
+
}
|
739
|
+
|
740
|
+
static VALUE ndloop_set_output(ndfunc_t* nf, na_md_loop_t* lp, VALUE args) {
|
741
|
+
int i, j, k, idx;
|
742
|
+
volatile VALUE v, t, results;
|
743
|
+
VALUE init;
|
744
|
+
|
745
|
+
int max_nd = lp->ndim + lp->user.ndim;
|
746
|
+
|
747
|
+
// output results
|
748
|
+
results = rb_ary_new2(nf->nout);
|
749
|
+
|
750
|
+
for (k = 0; k < nf->nout; k++) {
|
751
|
+
t = nf->aout[k].type;
|
752
|
+
t = ndloop_get_arg_type(nf, args, t);
|
753
|
+
|
754
|
+
if (rb_obj_is_kind_of(t, rb_cClass)) {
|
755
|
+
if (RTEST(rb_class_inherited_p(t, cNArray))) {
|
756
|
+
// NArray
|
757
|
+
v = ndloop_set_output_narray(nf, lp, k, t, args);
|
758
|
+
rb_ary_push(results, v);
|
759
|
+
} else if (RTEST(rb_class_inherited_p(t, rb_cArray))) {
|
760
|
+
// Ruby Array
|
761
|
+
j = lp->nin + k;
|
762
|
+
for (i = 0; i <= max_nd; i++) {
|
763
|
+
LITER(lp, i, j).step = sizeof(VALUE);
|
764
|
+
}
|
765
|
+
LARG(lp, j).value = t;
|
766
|
+
LARG(lp, j).elmsz = sizeof(VALUE);
|
767
|
+
} else {
|
768
|
+
rb_raise(rb_eRuntimeError, "ndloop_set_output: invalid for type");
|
769
|
+
}
|
770
|
+
}
|
771
|
+
}
|
772
|
+
|
773
|
+
// initialilzer
|
774
|
+
k = lp->init_aidx;
|
775
|
+
if (k > -1) {
|
776
|
+
idx = nf->ain[k].dim;
|
777
|
+
v = RARRAY_AREF(results, idx);
|
778
|
+
init = RARRAY_AREF(args, k);
|
779
|
+
na_store(v, init);
|
780
|
+
}
|
781
|
+
|
782
|
+
return results;
|
783
|
+
}
|
784
|
+
|
785
|
+
static void ndfunc_contract_loop(na_md_loop_t* lp) {
|
786
|
+
int i, j, k, success, cnt = 0;
|
787
|
+
int red0, redi;
|
788
|
+
|
789
|
+
redi = na_test_reduce(lp->reduce, 0);
|
790
|
+
|
791
|
+
// for (i=0; i<lp->ndim; i++) {
|
792
|
+
// printf("lp->n[%d]=%lu\n",i,lp->n[i]);
|
793
|
+
// }
|
794
|
+
|
795
|
+
for (i = 1; i < lp->ndim; i++) {
|
796
|
+
red0 = redi;
|
797
|
+
redi = na_test_reduce(lp->reduce, i);
|
798
|
+
// printf("contract i=%d reduce_cond=%d %d\n",i,red0,redi);
|
799
|
+
if (red0 != redi) {
|
800
|
+
continue;
|
801
|
+
}
|
802
|
+
success = 1;
|
803
|
+
for (j = 0; j < lp->narg; j++) {
|
804
|
+
if (!(LITER(lp, i, j).idx == NULL && LITER(lp, i - 1, j).idx == NULL &&
|
805
|
+
LITER(lp, i - 1, j).step == LITER(lp, i, j).step * (ssize_t)(lp->n[i]))) {
|
806
|
+
success = 0;
|
807
|
+
break;
|
808
|
+
}
|
809
|
+
}
|
810
|
+
if (success) {
|
811
|
+
// printf("contract i=%d-th and %d-th, lp->n[%d]=%"SZF"d, lp->n[%d]=%"SZF"d\n",
|
812
|
+
// i-1,i, i,lp->n[i], i-1,lp->n[i-1]);
|
813
|
+
// contract (i-1)-th and i-th dimension
|
814
|
+
lp->n[i] *= lp->n[i - 1];
|
815
|
+
// shift dimensions
|
816
|
+
for (k = i - 1; k > cnt; k--) {
|
817
|
+
lp->n[k] = lp->n[k - 1];
|
818
|
+
}
|
819
|
+
// printf("k=%d\n",k);
|
820
|
+
for (; k >= 0; k--) {
|
821
|
+
lp->n[k] = 1;
|
822
|
+
}
|
823
|
+
for (j = 0; j < lp->narg; j++) {
|
824
|
+
for (k = i - 1; k > cnt; k--) {
|
825
|
+
LITER(lp, k, j) = LITER(lp, k - 1, j);
|
826
|
+
}
|
827
|
+
}
|
828
|
+
if (redi) {
|
829
|
+
lp->reduce_dim--;
|
830
|
+
}
|
831
|
+
cnt++;
|
832
|
+
}
|
833
|
+
}
|
834
|
+
// printf("contract cnt=%d\n",cnt);
|
835
|
+
if (cnt > 0) {
|
836
|
+
for (j = 0; j < lp->narg; j++) {
|
837
|
+
LITER(lp, cnt, j).pos = LITER(lp, 0, j).pos;
|
838
|
+
lp->xargs[j].iter = &LITER(lp, cnt, j);
|
839
|
+
}
|
840
|
+
lp->n = &(lp->n[cnt]);
|
841
|
+
lp->ndim -= cnt;
|
842
|
+
// for (i=0; i<lp->ndim; i++) {printf("lp->n[%d]=%lu\n",i,lp->n[i]);}
|
843
|
+
}
|
844
|
+
}
|
845
|
+
|
846
|
+
static void ndfunc_set_user_loop(ndfunc_t* nf, na_md_loop_t* lp) {
|
847
|
+
int j, ud = 0;
|
848
|
+
|
849
|
+
if (lp->reduce_dim > 0) {
|
850
|
+
ud = lp->reduce_dim;
|
851
|
+
} else if (lp->ndim > 0 && NDF_TEST(nf, NDF_HAS_LOOP)) {
|
852
|
+
ud = 1;
|
853
|
+
} else {
|
854
|
+
goto skip_ud;
|
855
|
+
}
|
856
|
+
if (ud > lp->ndim) {
|
857
|
+
rb_bug("Reduce-dimension is larger than loop-dimension");
|
858
|
+
}
|
859
|
+
// increase user dimension
|
860
|
+
lp->user.ndim += ud;
|
861
|
+
lp->ndim -= ud;
|
862
|
+
for (j = 0; j < lp->narg; j++) {
|
863
|
+
if (LARG(lp, j).shape) {
|
864
|
+
rb_bug("HAS_LOOP or reduce-dimension=%d conflicts with user-dimension", lp->reduce_dim);
|
865
|
+
}
|
866
|
+
LARG(lp, j).ndim += ud;
|
867
|
+
LARG(lp, j).shape = &(lp->n[lp->ndim]);
|
868
|
+
// printf("LARG(lp,j).ndim=%d,LARG(lp,j).shape=%lx\n",LARG(lp,j).ndim,(size_t)LARG(lp,j).shape);
|
869
|
+
}
|
870
|
+
// printf("lp->reduce_dim=%d lp->user.ndim=%d lp->ndim=%d\n",lp->reduce_dim,lp->user.ndim,lp->ndim);
|
871
|
+
|
872
|
+
skip_ud:
|
873
|
+
lp->user.n = &(lp->n[lp->ndim]);
|
874
|
+
for (j = 0; j < lp->narg; j++) {
|
875
|
+
LARG(lp, j).iter = &LITER(lp, lp->ndim, j);
|
876
|
+
// printf("in ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
|
877
|
+
}
|
878
|
+
}
|
879
|
+
|
880
|
+
static void ndfunc_set_bufcp(na_md_loop_t* lp, unsigned int loop_spec) {
|
881
|
+
unsigned int f;
|
882
|
+
int i, j;
|
883
|
+
int nd, ndim;
|
884
|
+
bool zero_step;
|
885
|
+
ssize_t n, sz, elmsz, stride, n_total; //, last_step;
|
886
|
+
size_t* buf_shape;
|
887
|
+
na_loop_iter_t *buf_iter = NULL, *src_iter;
|
888
|
+
|
889
|
+
// if (loop_spec==0) return;
|
890
|
+
|
891
|
+
n_total = lp->user.n[0];
|
892
|
+
for (i = 1; i < lp->user.ndim; i++) {
|
893
|
+
n_total *= lp->user.n[i];
|
894
|
+
}
|
895
|
+
|
896
|
+
// for (j=0; j<lp->nin; j++) {
|
897
|
+
for (j = 0; j < lp->narg; j++) {
|
898
|
+
// ndim = nd = lp->user.ndim;
|
899
|
+
ndim = nd = LARG(lp, j).ndim;
|
900
|
+
sz = elmsz = LARG(lp, j).elmsz;
|
901
|
+
src_iter = LARG(lp, j).iter;
|
902
|
+
// last_step = src_iter[ndim-1].step;
|
903
|
+
f = 0;
|
904
|
+
zero_step = 1;
|
905
|
+
for (i = ndim; i > 0;) {
|
906
|
+
i--;
|
907
|
+
if (LARG(lp, j).shape) {
|
908
|
+
n = LARG(lp, j).shape[i];
|
909
|
+
} else {
|
910
|
+
// printf("shape is NULL\n");
|
911
|
+
n = lp->user.n[i];
|
912
|
+
}
|
913
|
+
stride = sz * n;
|
914
|
+
// printf("{j=%d,i=%d,ndim=%d,nd=%d,idx=%lx,step=%ld,n=%ld,sz=%ld,stride=%ld}\n",j,i,ndim,nd,(size_t)src_iter[i].idx,src_iter[i].step,n,sz,stride);
|
915
|
+
if (src_iter[i].idx) {
|
916
|
+
f |= 2; // INDEX LOOP
|
917
|
+
zero_step = 0;
|
918
|
+
} else {
|
919
|
+
if (src_iter[i].step != sz) {
|
920
|
+
f |= 1; // NON_CONTIGUOUS LOOP
|
921
|
+
} else {
|
922
|
+
// CONTIGUOUS LOOP
|
923
|
+
if (i == ndim - 1) { // contract if last dimension
|
924
|
+
ndim = i;
|
925
|
+
elmsz = stride;
|
926
|
+
}
|
927
|
+
}
|
928
|
+
if (src_iter[i].step != 0) {
|
929
|
+
zero_step = 0;
|
930
|
+
}
|
931
|
+
}
|
932
|
+
sz = stride;
|
933
|
+
}
|
934
|
+
// printf("[j=%d f=%d loop_spec=%d zero_step=%d]\n",j,f,loop_spec,zero_step);
|
935
|
+
|
936
|
+
if (zero_step) {
|
937
|
+
// no buffer needed
|
938
|
+
continue;
|
939
|
+
}
|
940
|
+
|
941
|
+
// should check flatten-able loop to avoid buffering
|
942
|
+
|
943
|
+
// over loop_spec or reduce_loop is not contiguous
|
944
|
+
if (f & loop_spec || (lp->reduce_dim > 1 && ndim > 0)) {
|
945
|
+
// printf("(buf,nd=%d)",nd);
|
946
|
+
buf_iter = ALLOC_N(na_loop_iter_t, nd + 3);
|
947
|
+
buf_shape = ALLOC_N(size_t, nd);
|
948
|
+
buf_iter[nd].pos = 0;
|
949
|
+
buf_iter[nd].step = 0;
|
950
|
+
buf_iter[nd].idx = NULL;
|
951
|
+
sz = LARG(lp, j).elmsz;
|
952
|
+
// last_step = sz;
|
953
|
+
for (i = nd; i > 0;) {
|
954
|
+
i--;
|
955
|
+
buf_iter[i].pos = 0;
|
956
|
+
buf_iter[i].step = sz;
|
957
|
+
buf_iter[i].idx = NULL;
|
958
|
+
// n = lp->user.n[i];
|
959
|
+
n = LARG(lp, j).shape[i];
|
960
|
+
buf_shape[i] = n;
|
961
|
+
sz *= n;
|
962
|
+
}
|
963
|
+
LBUFCP(lp, j) = ALLOC(na_buffer_copy_t);
|
964
|
+
LBUFCP(lp, j)->ndim = ndim;
|
965
|
+
LBUFCP(lp, j)->elmsz = elmsz;
|
966
|
+
LBUFCP(lp, j)->n = buf_shape;
|
967
|
+
LBUFCP(lp, j)->src_iter = src_iter;
|
968
|
+
LBUFCP(lp, j)->buf_iter = buf_iter;
|
969
|
+
LARG(lp, j).iter = buf_iter;
|
970
|
+
// printf("in ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
|
971
|
+
LBUFCP(lp, j)->src_ptr = LARG(lp, j).ptr;
|
972
|
+
LARG(lp, j).ptr = LBUFCP(lp, j)->buf_ptr = xmalloc(sz);
|
973
|
+
// printf("(LBUFCP(lp,%d)->buf_ptr=%lx)\n",j,(size_t)(LBUFCP(lp,j)->buf_ptr));
|
974
|
+
}
|
975
|
+
}
|
976
|
+
|
977
|
+
#if 0
|
978
|
+
for (j=0; j<lp->narg; j++) {
|
979
|
+
ndim = lp->user.ndim;
|
980
|
+
src_iter = LARG(lp,j).iter;
|
981
|
+
last_step = src_iter[ndim-1].step;
|
982
|
+
if (lp->reduce_dim>1) {
|
983
|
+
//printf("(reduce_dim=%d,ndim=%d,nd=%d,n=%ld,lst=%ld)\n",lp->reduce_dim,ndim,nd,n_total,last_step);
|
984
|
+
buf_iter = ALLOC_N(na_loop_iter_t,2);
|
985
|
+
buf_iter[0].pos = LARG(lp,j).iter[0].pos;
|
986
|
+
buf_iter[0].step = last_step;
|
987
|
+
buf_iter[0].idx = NULL;
|
988
|
+
buf_iter[1].pos = 0;
|
989
|
+
buf_iter[1].step = 0;
|
990
|
+
buf_iter[1].idx = NULL;
|
991
|
+
LARG(lp,j).iter = buf_iter;
|
992
|
+
//printf("in ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
|
993
|
+
lp->xargs[j].free_user_iter = 1;
|
994
|
+
}
|
995
|
+
}
|
996
|
+
#endif
|
997
|
+
|
998
|
+
// flatten reduce dimensions
|
999
|
+
if (lp->reduce_dim > 1) {
|
1000
|
+
#if 1
|
1001
|
+
for (j = 0; j < lp->narg; j++) {
|
1002
|
+
ndim = lp->user.ndim;
|
1003
|
+
LARG(lp, j).iter[0].step = LARG(lp, j).iter[ndim - 1].step;
|
1004
|
+
LARG(lp, j).iter[0].idx = NULL;
|
1005
|
+
}
|
1006
|
+
#endif
|
1007
|
+
lp->user.n[0] = n_total;
|
1008
|
+
lp->user.ndim = 1;
|
1009
|
+
}
|
1010
|
+
}
|
1011
|
+
|
1012
|
+
static void ndloop_copy_to_buffer(na_buffer_copy_t* lp) {
|
1013
|
+
size_t* c;
|
1014
|
+
char *src, *buf;
|
1015
|
+
int i;
|
1016
|
+
int nd = lp->ndim;
|
1017
|
+
size_t elmsz = lp->elmsz;
|
1018
|
+
size_t buf_pos = 0;
|
1019
|
+
DBG(size_t j);
|
1020
|
+
|
1021
|
+
// printf("\nto_buf nd=%d elmsz=%ld\n",nd,elmsz);
|
1022
|
+
DBG(printf("<to buf> ["));
|
1023
|
+
// zero-dimension
|
1024
|
+
if (nd == 0) {
|
1025
|
+
src = lp->src_ptr + LITER_SRC(lp, 0).pos;
|
1026
|
+
buf = lp->buf_ptr;
|
1027
|
+
memcpy(buf, src, elmsz);
|
1028
|
+
DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(buf))[j]); });
|
1029
|
+
goto loop_end;
|
1030
|
+
}
|
1031
|
+
// initialize loop counter
|
1032
|
+
c = ALLOCA_N(size_t, nd + 1);
|
1033
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1034
|
+
// loop body
|
1035
|
+
for (i = 0;;) {
|
1036
|
+
// i-th dimension
|
1037
|
+
for (; i < nd; i++) {
|
1038
|
+
if (LITER_SRC(lp, i).idx) {
|
1039
|
+
LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).idx[c[i]];
|
1040
|
+
} else {
|
1041
|
+
LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).step * c[i];
|
1042
|
+
}
|
1043
|
+
}
|
1044
|
+
src = lp->src_ptr + LITER_SRC(lp, nd).pos;
|
1045
|
+
buf = lp->buf_ptr + buf_pos;
|
1046
|
+
memcpy(buf, src, elmsz);
|
1047
|
+
DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(buf))[j]); });
|
1048
|
+
buf_pos += elmsz;
|
1049
|
+
// count up
|
1050
|
+
for (;;) {
|
1051
|
+
if (i <= 0) goto loop_end;
|
1052
|
+
i--;
|
1053
|
+
if (++c[i] < lp->n[i]) break;
|
1054
|
+
c[i] = 0;
|
1055
|
+
}
|
1056
|
+
}
|
1057
|
+
loop_end:;
|
1058
|
+
DBG(printf("]\n"));
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
static void ndloop_copy_from_buffer(na_buffer_copy_t* lp) {
|
1062
|
+
size_t* c;
|
1063
|
+
char *src, *buf;
|
1064
|
+
int i;
|
1065
|
+
int nd = lp->ndim;
|
1066
|
+
size_t elmsz = lp->elmsz;
|
1067
|
+
size_t buf_pos = 0;
|
1068
|
+
DBG(size_t j);
|
1069
|
+
|
1070
|
+
// printf("\nfrom_buf nd=%d elmsz=%ld\n",nd,elmsz);
|
1071
|
+
DBG(printf("<from buf> ["));
|
1072
|
+
// zero-dimension
|
1073
|
+
if (nd == 0) {
|
1074
|
+
src = lp->src_ptr + LITER_SRC(lp, 0).pos;
|
1075
|
+
buf = lp->buf_ptr;
|
1076
|
+
memcpy(src, buf, elmsz);
|
1077
|
+
DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(src))[j]); });
|
1078
|
+
goto loop_end;
|
1079
|
+
}
|
1080
|
+
// initialize loop counter
|
1081
|
+
c = ALLOCA_N(size_t, nd + 1);
|
1082
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1083
|
+
// loop body
|
1084
|
+
for (i = 0;;) {
|
1085
|
+
// i-th dimension
|
1086
|
+
for (; i < nd; i++) {
|
1087
|
+
if (LITER_SRC(lp, i).idx) {
|
1088
|
+
LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).idx[c[i]];
|
1089
|
+
} else {
|
1090
|
+
LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).step * c[i];
|
1091
|
+
}
|
1092
|
+
}
|
1093
|
+
src = lp->src_ptr + LITER_SRC(lp, nd).pos;
|
1094
|
+
buf = lp->buf_ptr + buf_pos;
|
1095
|
+
memcpy(src, buf, elmsz);
|
1096
|
+
DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(src))[j]); });
|
1097
|
+
buf_pos += elmsz;
|
1098
|
+
// count up
|
1099
|
+
for (;;) {
|
1100
|
+
if (i <= 0) goto loop_end;
|
1101
|
+
i--;
|
1102
|
+
if (++c[i] < lp->n[i]) break;
|
1103
|
+
c[i] = 0;
|
1104
|
+
}
|
1105
|
+
}
|
1106
|
+
loop_end:
|
1107
|
+
DBG(printf("]\n"));
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
static void ndfunc_write_back(ndfunc_t* nf, na_md_loop_t* lp, VALUE orig_args, VALUE results) {
|
1111
|
+
VALUE src, dst;
|
1112
|
+
|
1113
|
+
if (lp->writeback >= 0) {
|
1114
|
+
dst = RARRAY_AREF(orig_args, lp->writeback);
|
1115
|
+
src = RARRAY_AREF(results, 0);
|
1116
|
+
na_store(dst, src);
|
1117
|
+
RARRAY_ASET(results, 0, dst);
|
1118
|
+
}
|
1119
|
+
}
|
1120
|
+
|
1121
|
+
static VALUE ndloop_extract(VALUE results, ndfunc_t* nf) {
|
1122
|
+
long n, i;
|
1123
|
+
VALUE x, y;
|
1124
|
+
narray_t* na;
|
1125
|
+
|
1126
|
+
// extract result objects
|
1127
|
+
switch (nf->nout) {
|
1128
|
+
case 0:
|
1129
|
+
return Qnil;
|
1130
|
+
case 1:
|
1131
|
+
x = RARRAY_AREF(results, 0);
|
1132
|
+
if (NDF_TEST(nf, NDF_EXTRACT)) {
|
1133
|
+
if (IsNArray(x)) {
|
1134
|
+
GetNArray(x, na);
|
1135
|
+
if (NA_NDIM(na) == 0) {
|
1136
|
+
x = rb_funcall(x, id_extract, 0);
|
1137
|
+
}
|
1138
|
+
}
|
1139
|
+
}
|
1140
|
+
return x;
|
1141
|
+
}
|
1142
|
+
if (NDF_TEST(nf, NDF_EXTRACT)) {
|
1143
|
+
n = RARRAY_LEN(results);
|
1144
|
+
for (i = 0; i < n; i++) {
|
1145
|
+
x = RARRAY_AREF(results, i);
|
1146
|
+
if (IsNArray(x)) {
|
1147
|
+
GetNArray(x, na);
|
1148
|
+
if (NA_NDIM(na) == 0) {
|
1149
|
+
y = rb_funcall(x, id_extract, 0);
|
1150
|
+
RARRAY_ASET(results, i, y);
|
1151
|
+
}
|
1152
|
+
}
|
1153
|
+
}
|
1154
|
+
}
|
1155
|
+
return results;
|
1156
|
+
}
|
1157
|
+
|
1158
|
+
static void loop_narray(ndfunc_t* nf, na_md_loop_t* lp);
|
1159
|
+
|
1160
|
+
static VALUE ndloop_run(VALUE vlp) {
|
1161
|
+
unsigned int loop_spec;
|
1162
|
+
volatile VALUE args, orig_args, results;
|
1163
|
+
na_md_loop_t* lp = (na_md_loop_t*)(vlp);
|
1164
|
+
ndfunc_t* nf;
|
1165
|
+
|
1166
|
+
orig_args = lp->vargs;
|
1167
|
+
nf = lp->ndfunc;
|
1168
|
+
|
1169
|
+
args = rb_obj_dup(orig_args);
|
1170
|
+
|
1171
|
+
// setup ndloop iterator with arguments
|
1172
|
+
ndloop_init_args(nf, lp, args);
|
1173
|
+
results = ndloop_set_output(nf, lp, args);
|
1174
|
+
|
1175
|
+
// if (na_debug_flag) {
|
1176
|
+
// printf("-- ndloop_set_output --\n");
|
1177
|
+
// print_ndloop(lp);
|
1178
|
+
// }
|
1179
|
+
|
1180
|
+
// contract loop
|
1181
|
+
if (lp->loop_func == loop_narray) {
|
1182
|
+
ndfunc_contract_loop(lp);
|
1183
|
+
// if (na_debug_flag) {
|
1184
|
+
// printf("-- ndfunc_contract_loop --\n");
|
1185
|
+
// print_ndloop(lp);
|
1186
|
+
// }
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
// setup objects in which results are stored
|
1190
|
+
ndfunc_set_user_loop(nf, lp);
|
1191
|
+
|
1192
|
+
// setup buffering during loop
|
1193
|
+
if (lp->loop_func == loop_narray) {
|
1194
|
+
loop_spec = ndloop_func_loop_spec(nf, lp->user.ndim);
|
1195
|
+
ndfunc_set_bufcp(lp, loop_spec);
|
1196
|
+
}
|
1197
|
+
if (na_debug_flag) {
|
1198
|
+
printf("-- ndfunc_set_bufcp --\n");
|
1199
|
+
print_ndloop(lp);
|
1200
|
+
}
|
1201
|
+
|
1202
|
+
// loop
|
1203
|
+
(*(lp->loop_func))(nf, lp);
|
1204
|
+
|
1205
|
+
// if (na_debug_flag) {
|
1206
|
+
// printf("-- after loop --\n");
|
1207
|
+
// print_ndloop(lp);
|
1208
|
+
// }
|
1209
|
+
|
1210
|
+
if (RTEST(lp->user.err_type)) {
|
1211
|
+
rb_raise(lp->user.err_type, "error in NArray operation");
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
// write-back will be placed here
|
1215
|
+
ndfunc_write_back(nf, lp, orig_args, results);
|
1216
|
+
|
1217
|
+
// extract result objects
|
1218
|
+
return ndloop_extract(results, nf);
|
1219
|
+
}
|
1220
|
+
|
1221
|
+
// ---------------------------------------------------------------------------
|
1222
|
+
|
1223
|
+
static void loop_narray(ndfunc_t* nf, na_md_loop_t* lp) {
|
1224
|
+
size_t* c;
|
1225
|
+
int i, j;
|
1226
|
+
int nd = lp->ndim;
|
1227
|
+
|
1228
|
+
if (nd < 0) {
|
1229
|
+
rb_bug("bug? lp->ndim = %d\n", lp->ndim);
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
if (nd == 0) {
|
1233
|
+
for (j = 0; j < lp->nin; j++) {
|
1234
|
+
if (lp->xargs[j].bufcp) {
|
1235
|
+
// printf("copy_to_buffer j=%d\n",j);
|
1236
|
+
ndloop_copy_to_buffer(lp->xargs[j].bufcp);
|
1237
|
+
}
|
1238
|
+
}
|
1239
|
+
(*(nf->func))(&(lp->user));
|
1240
|
+
for (j = 0; j < lp->narg; j++) {
|
1241
|
+
if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
|
1242
|
+
// printf("copy_from_buffer j=%d\n",j);
|
1243
|
+
// copy data to work buffer
|
1244
|
+
ndloop_copy_from_buffer(lp->xargs[j].bufcp);
|
1245
|
+
}
|
1246
|
+
}
|
1247
|
+
return;
|
1248
|
+
}
|
1249
|
+
|
1250
|
+
// initialize loop counter
|
1251
|
+
c = ALLOCA_N(size_t, nd + 1);
|
1252
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1253
|
+
|
1254
|
+
// loop body
|
1255
|
+
for (i = 0;;) {
|
1256
|
+
// i-th dimension
|
1257
|
+
for (; i < nd; i++) {
|
1258
|
+
// j-th argument
|
1259
|
+
for (j = 0; j < lp->narg; j++) {
|
1260
|
+
if (LITER(lp, i, j).idx) {
|
1261
|
+
LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).idx[c[i]];
|
1262
|
+
} else {
|
1263
|
+
LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).step * c[i];
|
1264
|
+
}
|
1265
|
+
// printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
|
1266
|
+
}
|
1267
|
+
}
|
1268
|
+
for (j = 0; j < lp->nin; j++) {
|
1269
|
+
if (lp->xargs[j].bufcp) {
|
1270
|
+
// copy data to work buffer
|
1271
|
+
// cp lp->iter[j][nd..*] to lp->user.args[j].iter[0..*]
|
1272
|
+
// printf("copy_to_buffer j=%d\n",j);
|
1273
|
+
ndloop_copy_to_buffer(lp->xargs[j].bufcp);
|
1274
|
+
}
|
1275
|
+
}
|
1276
|
+
(*(nf->func))(&(lp->user));
|
1277
|
+
for (j = 0; j < lp->narg; j++) {
|
1278
|
+
if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
|
1279
|
+
// copy data to work buffer
|
1280
|
+
// printf("copy_from_buffer j=%d\n",j);
|
1281
|
+
ndloop_copy_from_buffer(lp->xargs[j].bufcp);
|
1282
|
+
}
|
1283
|
+
}
|
1284
|
+
if (RTEST(lp->user.err_type)) {
|
1285
|
+
return;
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
for (;;) {
|
1289
|
+
if (i <= 0) goto loop_end;
|
1290
|
+
i--;
|
1291
|
+
if (++c[i] < lp->n[i]) break;
|
1292
|
+
c[i] = 0;
|
1293
|
+
}
|
1294
|
+
}
|
1295
|
+
loop_end:;
|
1296
|
+
}
|
1297
|
+
|
1298
|
+
static VALUE na_ndloop_main(ndfunc_t* nf, VALUE args, void* opt_ptr) {
|
1299
|
+
unsigned int copy_flag;
|
1300
|
+
na_md_loop_t lp;
|
1301
|
+
|
1302
|
+
if (na_debug_flag) print_ndfunc(nf);
|
1303
|
+
|
1304
|
+
// cast arguments to NArray
|
1305
|
+
copy_flag = ndloop_cast_args(nf, args);
|
1306
|
+
|
1307
|
+
// allocate ndloop struct
|
1308
|
+
ndloop_alloc(&lp, nf, args, opt_ptr, copy_flag, loop_narray);
|
1309
|
+
|
1310
|
+
return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
|
1311
|
+
}
|
1312
|
+
|
1313
|
+
VALUE
|
1314
|
+
#ifdef HAVE_STDARG_PROTOTYPES
|
1315
|
+
na_ndloop(ndfunc_t* nf, int argc, ...)
|
1316
|
+
#else
|
1317
|
+
na_ndloop(nf, argc, va_alist) ndfunc_t* nf;
|
1318
|
+
int argc;
|
1319
|
+
va_dcl
|
1320
|
+
#endif
|
1321
|
+
{
|
1322
|
+
va_list ar;
|
1323
|
+
|
1324
|
+
int i;
|
1325
|
+
VALUE* argv;
|
1326
|
+
volatile VALUE args;
|
1327
|
+
|
1328
|
+
argv = ALLOCA_N(VALUE, argc);
|
1329
|
+
|
1330
|
+
va_init_list(ar, argc);
|
1331
|
+
for (i = 0; i < argc; i++) {
|
1332
|
+
argv[i] = va_arg(ar, VALUE);
|
1333
|
+
}
|
1334
|
+
va_end(ar);
|
1335
|
+
|
1336
|
+
args = rb_ary_new4(argc, argv);
|
1337
|
+
|
1338
|
+
return na_ndloop_main(nf, args, NULL);
|
1339
|
+
}
|
1340
|
+
|
1341
|
+
VALUE
|
1342
|
+
na_ndloop2(ndfunc_t* nf, VALUE args) {
|
1343
|
+
return na_ndloop_main(nf, args, NULL);
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
VALUE
|
1347
|
+
#ifdef HAVE_STDARG_PROTOTYPES
|
1348
|
+
na_ndloop3(ndfunc_t* nf, void* ptr, int argc, ...)
|
1349
|
+
#else
|
1350
|
+
na_ndloop3(nf, ptr, argc, va_alist) ndfunc_t* nf;
|
1351
|
+
void* ptr;
|
1352
|
+
int argc;
|
1353
|
+
va_dcl
|
1354
|
+
#endif
|
1355
|
+
{
|
1356
|
+
va_list ar;
|
1357
|
+
|
1358
|
+
int i;
|
1359
|
+
VALUE* argv;
|
1360
|
+
volatile VALUE args;
|
1361
|
+
|
1362
|
+
argv = ALLOCA_N(VALUE, argc);
|
1363
|
+
|
1364
|
+
va_init_list(ar, argc);
|
1365
|
+
for (i = 0; i < argc; i++) {
|
1366
|
+
argv[i] = va_arg(ar, VALUE);
|
1367
|
+
}
|
1368
|
+
va_end(ar);
|
1369
|
+
|
1370
|
+
args = rb_ary_new4(argc, argv);
|
1371
|
+
|
1372
|
+
return na_ndloop_main(nf, args, ptr);
|
1373
|
+
}
|
1374
|
+
|
1375
|
+
VALUE
|
1376
|
+
na_ndloop4(ndfunc_t* nf, void* ptr, VALUE args) {
|
1377
|
+
return na_ndloop_main(nf, args, ptr);
|
1378
|
+
}
|
1379
|
+
|
1380
|
+
//----------------------------------------------------------------------
|
1381
|
+
|
1382
|
+
VALUE
|
1383
|
+
na_info_str(VALUE ary) {
|
1384
|
+
int nd, i;
|
1385
|
+
char tmp[32];
|
1386
|
+
VALUE buf;
|
1387
|
+
narray_t* na;
|
1388
|
+
|
1389
|
+
GetNArray(ary, na);
|
1390
|
+
nd = na->ndim;
|
1391
|
+
|
1392
|
+
buf = rb_str_new2(rb_class2name(rb_obj_class(ary)));
|
1393
|
+
if (NA_TYPE(na) == NARRAY_VIEW_T) {
|
1394
|
+
rb_str_cat(buf, "(view)", 6);
|
1395
|
+
}
|
1396
|
+
rb_str_cat(buf, "#shape=[", 8);
|
1397
|
+
if (nd > 0) {
|
1398
|
+
for (i = 0;;) {
|
1399
|
+
sprintf(tmp, "%" SZF "u", na->shape[i]);
|
1400
|
+
rb_str_cat2(buf, tmp);
|
1401
|
+
if (++i == nd) break;
|
1402
|
+
rb_str_cat(buf, ",", 1);
|
1403
|
+
}
|
1404
|
+
}
|
1405
|
+
rb_str_cat(buf, "]", 1);
|
1406
|
+
return buf;
|
1407
|
+
}
|
1408
|
+
|
1409
|
+
//----------------------------------------------------------------------
|
1410
|
+
|
1411
|
+
#define ncol numo_na_inspect_cols
|
1412
|
+
#define nrow numo_na_inspect_rows
|
1413
|
+
extern int ncol, nrow;
|
1414
|
+
|
1415
|
+
static void loop_inspect(ndfunc_t* nf, na_md_loop_t* lp) {
|
1416
|
+
int nd, i, ii;
|
1417
|
+
size_t* c;
|
1418
|
+
int col = 0, row = 0;
|
1419
|
+
long len;
|
1420
|
+
VALUE str;
|
1421
|
+
na_text_func_t func = (na_text_func_t)(nf->func);
|
1422
|
+
VALUE buf, opt;
|
1423
|
+
|
1424
|
+
nd = lp->ndim;
|
1425
|
+
buf = lp->loop_opt;
|
1426
|
+
// opt = *(VALUE*)(lp->user.opt_ptr);
|
1427
|
+
opt = lp->user.option;
|
1428
|
+
|
1429
|
+
for (i = 0; i < nd; i++) {
|
1430
|
+
if (lp->n[i] == 0) {
|
1431
|
+
rb_str_cat(buf, "[]", 2);
|
1432
|
+
return;
|
1433
|
+
}
|
1434
|
+
}
|
1435
|
+
|
1436
|
+
rb_str_cat(buf, "\n", 1);
|
1437
|
+
|
1438
|
+
c = ALLOCA_N(size_t, nd + 1);
|
1439
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1440
|
+
|
1441
|
+
if (nd > 0) {
|
1442
|
+
rb_str_cat(buf, "[", 1);
|
1443
|
+
} else {
|
1444
|
+
rb_str_cat(buf, "", 0);
|
1445
|
+
}
|
1446
|
+
|
1447
|
+
col = nd * 2;
|
1448
|
+
for (i = 0;;) {
|
1449
|
+
if (i < nd - 1) {
|
1450
|
+
for (ii = 0; ii < i; ii++) rb_str_cat(buf, " ", 1);
|
1451
|
+
for (; ii < nd - 1; ii++) rb_str_cat(buf, "[", 1);
|
1452
|
+
}
|
1453
|
+
for (; i < nd; i++) {
|
1454
|
+
if (LITER(lp, i, 0).idx) {
|
1455
|
+
LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).idx[c[i]];
|
1456
|
+
} else {
|
1457
|
+
LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).step * c[i];
|
1458
|
+
}
|
1459
|
+
}
|
1460
|
+
str = (*func)(LARG(lp, 0).ptr, LITER(lp, i, 0).pos, opt);
|
1461
|
+
|
1462
|
+
len = RSTRING_LEN(str) + 2;
|
1463
|
+
if (ncol > 0 && col + len > ncol - 3) {
|
1464
|
+
rb_str_cat(buf, "...", 3);
|
1465
|
+
c[i - 1] = lp->n[i - 1];
|
1466
|
+
} else {
|
1467
|
+
rb_str_append(buf, str);
|
1468
|
+
col += len;
|
1469
|
+
}
|
1470
|
+
for (;;) {
|
1471
|
+
if (i == 0) goto loop_end;
|
1472
|
+
i--;
|
1473
|
+
if (++c[i] < lp->n[i]) break;
|
1474
|
+
rb_str_cat(buf, "]", 1);
|
1475
|
+
c[i] = 0;
|
1476
|
+
}
|
1477
|
+
// line_break:
|
1478
|
+
rb_str_cat(buf, ", ", 2);
|
1479
|
+
if (i < nd - 1) {
|
1480
|
+
rb_str_cat(buf, "\n ", 2);
|
1481
|
+
col = nd * 2;
|
1482
|
+
row++;
|
1483
|
+
if (row == nrow) {
|
1484
|
+
rb_str_cat(buf, "...", 3);
|
1485
|
+
goto loop_end;
|
1486
|
+
}
|
1487
|
+
}
|
1488
|
+
}
|
1489
|
+
loop_end:;
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
VALUE
|
1493
|
+
na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt) {
|
1494
|
+
volatile VALUE args;
|
1495
|
+
na_md_loop_t lp;
|
1496
|
+
VALUE buf;
|
1497
|
+
ndfunc_arg_in_t ain[3] = {{Qnil, 0}, {sym_loop_opt}, {sym_option}};
|
1498
|
+
ndfunc_t nf = {(na_iter_func_t)func, NO_LOOP, 3, 0, ain, 0};
|
1499
|
+
// nf = ndfunc_alloc(NULL, NO_LOOP, 1, 0, Qnil);
|
1500
|
+
|
1501
|
+
buf = na_info_str(nary);
|
1502
|
+
|
1503
|
+
if (na_get_pointer(nary) == NULL) {
|
1504
|
+
return rb_str_cat(buf, "(empty)", 7);
|
1505
|
+
}
|
1506
|
+
|
1507
|
+
// rb_p(args);
|
1508
|
+
// if (na_debug_flag) print_ndfunc(&nf);
|
1509
|
+
|
1510
|
+
args = rb_ary_new3(3, nary, buf, opt);
|
1511
|
+
|
1512
|
+
// cast arguments to NArray
|
1513
|
+
// ndloop_cast_args(nf, args);
|
1514
|
+
|
1515
|
+
// allocate ndloop struct
|
1516
|
+
ndloop_alloc(&lp, &nf, args, NULL, 0, loop_inspect);
|
1517
|
+
|
1518
|
+
rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
|
1519
|
+
|
1520
|
+
return buf;
|
1521
|
+
}
|
1522
|
+
|
1523
|
+
//----------------------------------------------------------------------
|
1524
|
+
|
1525
|
+
static void loop_store_subnarray(ndfunc_t* nf, na_md_loop_t* lp, int i0, size_t* c, VALUE a) {
|
1526
|
+
int nd = lp->ndim;
|
1527
|
+
int i, j;
|
1528
|
+
narray_t* na;
|
1529
|
+
int* dim_map;
|
1530
|
+
VALUE a_type;
|
1531
|
+
|
1532
|
+
a_type = rb_obj_class(LARG(lp, 0).value);
|
1533
|
+
if (rb_obj_class(a) != a_type) {
|
1534
|
+
a = rb_funcall(a_type, id_cast, 1, a);
|
1535
|
+
}
|
1536
|
+
GetNArray(a, na);
|
1537
|
+
if (na->ndim != nd - i0 + 1) {
|
1538
|
+
rb_raise(nary_eShapeError,
|
1539
|
+
"mismatched dimension of sub-narray: "
|
1540
|
+
"nd_src=%d, nd_dst=%d",
|
1541
|
+
na->ndim, nd - i0 + 1);
|
1542
|
+
}
|
1543
|
+
dim_map = ALLOCA_N(int, na->ndim);
|
1544
|
+
for (i = 0; i < na->ndim; i++) {
|
1545
|
+
dim_map[i] = lp->trans_map[i + i0];
|
1546
|
+
// printf("dim_map[i=%d] = %d, i0=%d\n", i, dim_map[i], i0);
|
1547
|
+
}
|
1548
|
+
ndloop_set_stepidx(lp, 1, a, dim_map, NDL_READ);
|
1549
|
+
LARG(lp, 1).shape = &(na->shape[na->ndim - 1]);
|
1550
|
+
|
1551
|
+
// loop body
|
1552
|
+
for (i = i0;;) {
|
1553
|
+
LARG(lp, 1).value = Qtrue;
|
1554
|
+
for (; i < nd; i++) {
|
1555
|
+
for (j = 0; j < 2; j++) {
|
1556
|
+
if (LITER(lp, i, j).idx) {
|
1557
|
+
LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).idx[c[i]];
|
1558
|
+
} else {
|
1559
|
+
LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).step * c[i];
|
1560
|
+
}
|
1561
|
+
}
|
1562
|
+
if (c[i] >= na->shape[i - i0]) {
|
1563
|
+
LARG(lp, 1).value = Qfalse;
|
1564
|
+
}
|
1565
|
+
}
|
1566
|
+
|
1567
|
+
(*(nf->func))(&(lp->user));
|
1568
|
+
|
1569
|
+
for (;;) {
|
1570
|
+
if (i <= i0) goto loop_end;
|
1571
|
+
i--;
|
1572
|
+
c[i]++;
|
1573
|
+
if (c[i] < lp->n[i]) break;
|
1574
|
+
c[i] = 0;
|
1575
|
+
}
|
1576
|
+
}
|
1577
|
+
loop_end:
|
1578
|
+
LARG(lp, 1).ptr = NULL;
|
1579
|
+
}
|
1580
|
+
|
1581
|
+
static void loop_store_rarray(ndfunc_t* nf, na_md_loop_t* lp) {
|
1582
|
+
size_t* c;
|
1583
|
+
int i;
|
1584
|
+
VALUE* a;
|
1585
|
+
int nd = lp->ndim;
|
1586
|
+
|
1587
|
+
// counter
|
1588
|
+
c = ALLOCA_N(size_t, nd + 1);
|
1589
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1590
|
+
|
1591
|
+
// array at each dimension
|
1592
|
+
a = ALLOCA_N(VALUE, nd + 1);
|
1593
|
+
a[0] = LARG(lp, 1).value;
|
1594
|
+
|
1595
|
+
// print_ndloop(lp);
|
1596
|
+
|
1597
|
+
// loop body
|
1598
|
+
for (i = 0;;) {
|
1599
|
+
for (; i < nd; i++) {
|
1600
|
+
if (LITER(lp, i, 0).idx) {
|
1601
|
+
LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).idx[c[i]];
|
1602
|
+
} else {
|
1603
|
+
LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).step * c[i];
|
1604
|
+
}
|
1605
|
+
if (TYPE(a[i]) == T_ARRAY) {
|
1606
|
+
if (c[i] < (size_t)RARRAY_LEN(a[i])) {
|
1607
|
+
a[i + 1] = RARRAY_AREF(a[i], c[i]);
|
1608
|
+
} else {
|
1609
|
+
a[i + 1] = Qnil;
|
1610
|
+
}
|
1611
|
+
} else if (IsNArray(a[i])) {
|
1612
|
+
// printf("a[i=%d]=0x%lx\n",i,a[i]);
|
1613
|
+
loop_store_subnarray(nf, lp, i, c, a[i]);
|
1614
|
+
goto loop_next;
|
1615
|
+
} else {
|
1616
|
+
if (c[i] == 0) {
|
1617
|
+
a[i + 1] = a[i];
|
1618
|
+
} else {
|
1619
|
+
a[i + 1] = Qnil;
|
1620
|
+
}
|
1621
|
+
}
|
1622
|
+
// printf("c[%d]=%lu\n",i,c[i]);
|
1623
|
+
}
|
1624
|
+
|
1625
|
+
// printf("a[i=%d]=0x%lx\n",i,a[i]);
|
1626
|
+
if (IsNArray(a[i])) {
|
1627
|
+
loop_store_subnarray(nf, lp, i, c, a[i]);
|
1628
|
+
} else {
|
1629
|
+
LARG(lp, 1).value = a[i];
|
1630
|
+
(*(nf->func))(&(lp->user));
|
1631
|
+
}
|
1632
|
+
|
1633
|
+
loop_next:
|
1634
|
+
for (;;) {
|
1635
|
+
if (i <= 0) goto loop_end;
|
1636
|
+
i--;
|
1637
|
+
c[i]++;
|
1638
|
+
if (c[i] < lp->n[i]) break;
|
1639
|
+
c[i] = 0;
|
1640
|
+
}
|
1641
|
+
}
|
1642
|
+
loop_end:;
|
1643
|
+
}
|
1644
|
+
|
1645
|
+
VALUE
|
1646
|
+
na_ndloop_store_rarray(ndfunc_t* nf, VALUE nary, VALUE rary) {
|
1647
|
+
na_md_loop_t lp;
|
1648
|
+
VALUE args;
|
1649
|
+
|
1650
|
+
// rb_p(args);
|
1651
|
+
if (na_debug_flag) print_ndfunc(nf);
|
1652
|
+
|
1653
|
+
args = rb_assoc_new(nary, rary);
|
1654
|
+
|
1655
|
+
// cast arguments to NArray
|
1656
|
+
// ndloop_cast_args(nf, args);
|
1657
|
+
|
1658
|
+
// allocate ndloop struct
|
1659
|
+
ndloop_alloc(&lp, nf, args, NULL, 0, loop_store_rarray);
|
1660
|
+
|
1661
|
+
return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
|
1662
|
+
}
|
1663
|
+
|
1664
|
+
VALUE
|
1665
|
+
na_ndloop_store_rarray2(ndfunc_t* nf, VALUE nary, VALUE rary, VALUE opt) {
|
1666
|
+
na_md_loop_t lp;
|
1667
|
+
VALUE args;
|
1668
|
+
|
1669
|
+
// rb_p(args);
|
1670
|
+
if (na_debug_flag) print_ndfunc(nf);
|
1671
|
+
|
1672
|
+
// args = rb_assoc_new(rary,nary);
|
1673
|
+
args = rb_ary_new3(3, nary, rary, opt);
|
1674
|
+
|
1675
|
+
// cast arguments to NArray
|
1676
|
+
// ndloop_cast_args(nf, args);
|
1677
|
+
|
1678
|
+
// allocate ndloop struct
|
1679
|
+
ndloop_alloc(&lp, nf, args, NULL, 0, loop_store_rarray);
|
1680
|
+
|
1681
|
+
return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
|
1682
|
+
}
|
1683
|
+
|
1684
|
+
//----------------------------------------------------------------------
|
1685
|
+
|
1686
|
+
static void loop_narray_to_rarray(ndfunc_t* nf, na_md_loop_t* lp) {
|
1687
|
+
size_t* c;
|
1688
|
+
int i;
|
1689
|
+
// int nargs = nf->narg + nf->nres;
|
1690
|
+
int nd = lp->ndim;
|
1691
|
+
VALUE* a;
|
1692
|
+
volatile VALUE a0;
|
1693
|
+
|
1694
|
+
// alloc counter
|
1695
|
+
c = ALLOCA_N(size_t, nd + 1);
|
1696
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1697
|
+
// c[i]=1; // for zero-dim
|
1698
|
+
// fprintf(stderr,"in loop_narray_to_rarray, nd=%d\n",nd);
|
1699
|
+
|
1700
|
+
a = ALLOCA_N(VALUE, nd + 1);
|
1701
|
+
a[0] = a0 = lp->loop_opt;
|
1702
|
+
|
1703
|
+
// loop body
|
1704
|
+
for (i = 0;;) {
|
1705
|
+
for (; i < nd; i++) {
|
1706
|
+
if (LITER(lp, i, 0).idx) {
|
1707
|
+
LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).idx[c[i]];
|
1708
|
+
} else {
|
1709
|
+
LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).step * c[i];
|
1710
|
+
}
|
1711
|
+
if (c[i] == 0) {
|
1712
|
+
a[i + 1] = rb_ary_new2(lp->n[i]);
|
1713
|
+
rb_ary_push(a[i], a[i + 1]);
|
1714
|
+
}
|
1715
|
+
}
|
1716
|
+
|
1717
|
+
// lp->user.info = a[i];
|
1718
|
+
LARG(lp, 1).value = a[i];
|
1719
|
+
(*(nf->func))(&(lp->user));
|
1720
|
+
|
1721
|
+
for (;;) {
|
1722
|
+
if (i <= 0) goto loop_end;
|
1723
|
+
i--;
|
1724
|
+
if (++c[i] < lp->n[i]) break;
|
1725
|
+
c[i] = 0;
|
1726
|
+
}
|
1727
|
+
}
|
1728
|
+
loop_end:;
|
1729
|
+
}
|
1730
|
+
|
1731
|
+
VALUE
|
1732
|
+
na_ndloop_cast_narray_to_rarray(ndfunc_t* nf, VALUE nary, VALUE fmt) {
|
1733
|
+
na_md_loop_t lp;
|
1734
|
+
VALUE args, a0;
|
1735
|
+
|
1736
|
+
// rb_p(args);
|
1737
|
+
if (na_debug_flag) print_ndfunc(nf);
|
1738
|
+
|
1739
|
+
a0 = rb_ary_new();
|
1740
|
+
args = rb_ary_new3(3, nary, a0, fmt);
|
1741
|
+
|
1742
|
+
// cast arguments to NArray
|
1743
|
+
// ndloop_cast_args(nf, args);
|
1744
|
+
|
1745
|
+
// allocate ndloop struct
|
1746
|
+
ndloop_alloc(&lp, nf, args, NULL, 0, loop_narray_to_rarray);
|
1747
|
+
|
1748
|
+
rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
|
1749
|
+
return RARRAY_AREF(a0, 0);
|
1750
|
+
}
|
1751
|
+
|
1752
|
+
//----------------------------------------------------------------------
|
1753
|
+
|
1754
|
+
static void loop_narray_with_index(ndfunc_t* nf, na_md_loop_t* lp) {
|
1755
|
+
size_t* c;
|
1756
|
+
int i, j;
|
1757
|
+
int nd = lp->ndim;
|
1758
|
+
|
1759
|
+
if (nd < 0) {
|
1760
|
+
rb_bug("bug? lp->ndim = %d\n", lp->ndim);
|
1761
|
+
}
|
1762
|
+
if (lp->n[0] == 0) { // empty array
|
1763
|
+
return;
|
1764
|
+
}
|
1765
|
+
|
1766
|
+
// pass total ndim to iterator
|
1767
|
+
lp->user.ndim += nd;
|
1768
|
+
|
1769
|
+
// alloc counter
|
1770
|
+
lp->user.opt_ptr = c = ALLOCA_N(size_t, nd + 1);
|
1771
|
+
for (i = 0; i <= nd; i++) c[i] = 0;
|
1772
|
+
|
1773
|
+
// loop body
|
1774
|
+
for (i = 0;;) {
|
1775
|
+
for (; i < nd; i++) {
|
1776
|
+
// j-th argument
|
1777
|
+
for (j = 0; j < lp->narg; j++) {
|
1778
|
+
if (LITER(lp, i, j).idx) {
|
1779
|
+
LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).idx[c[i]];
|
1780
|
+
} else {
|
1781
|
+
LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).step * c[i];
|
1782
|
+
}
|
1783
|
+
// printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
|
1784
|
+
}
|
1785
|
+
}
|
1786
|
+
|
1787
|
+
(*(nf->func))(&(lp->user));
|
1788
|
+
|
1789
|
+
for (;;) {
|
1790
|
+
if (i <= 0) goto loop_end;
|
1791
|
+
i--;
|
1792
|
+
if (++c[i] < lp->n[i]) break;
|
1793
|
+
c[i] = 0;
|
1794
|
+
}
|
1795
|
+
}
|
1796
|
+
loop_end:;
|
1797
|
+
}
|
1798
|
+
|
1799
|
+
VALUE
|
1800
|
+
#ifdef HAVE_STDARG_PROTOTYPES
|
1801
|
+
na_ndloop_with_index(ndfunc_t* nf, int argc, ...)
|
1802
|
+
#else
|
1803
|
+
na_ndloop_with_index(nf, argc, va_alist) ndfunc_t* nf;
|
1804
|
+
int argc;
|
1805
|
+
va_dcl
|
1806
|
+
#endif
|
1807
|
+
{
|
1808
|
+
va_list ar;
|
1809
|
+
|
1810
|
+
int i;
|
1811
|
+
VALUE* argv;
|
1812
|
+
volatile VALUE args;
|
1813
|
+
na_md_loop_t lp;
|
1814
|
+
|
1815
|
+
argv = ALLOCA_N(VALUE, argc);
|
1816
|
+
|
1817
|
+
va_init_list(ar, argc);
|
1818
|
+
for (i = 0; i < argc; i++) {
|
1819
|
+
argv[i] = va_arg(ar, VALUE);
|
1820
|
+
}
|
1821
|
+
va_end(ar);
|
1822
|
+
|
1823
|
+
args = rb_ary_new4(argc, argv);
|
1824
|
+
|
1825
|
+
// return na_ndloop_main(nf, args, NULL);
|
1826
|
+
if (na_debug_flag) print_ndfunc(nf);
|
1827
|
+
|
1828
|
+
// cast arguments to NArray
|
1829
|
+
// copy_flag = ndloop_cast_args(nf, args);
|
1830
|
+
|
1831
|
+
// allocate ndloop struct
|
1832
|
+
ndloop_alloc(&lp, nf, args, 0, 0, loop_narray_with_index);
|
1833
|
+
|
1834
|
+
return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
|
1835
|
+
}
|
1836
|
+
|
1837
|
+
void Init_nary_ndloop(void) {
|
1838
|
+
id_cast = rb_intern("cast");
|
1839
|
+
id_extract = rb_intern("extract");
|
1840
|
+
}
|