numo-narray-alt 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +14 -0
  3. data/LICENSE +30 -0
  4. data/README.md +71 -0
  5. data/Rakefile +24 -0
  6. data/ext/numo/narray/SFMT-params.h +97 -0
  7. data/ext/numo/narray/SFMT-params19937.h +48 -0
  8. data/ext/numo/narray/SFMT.c +602 -0
  9. data/ext/numo/narray/SFMT.h +147 -0
  10. data/ext/numo/narray/array.c +575 -0
  11. data/ext/numo/narray/data.c +958 -0
  12. data/ext/numo/narray/extconf.rb +84 -0
  13. data/ext/numo/narray/index.c +1092 -0
  14. data/ext/numo/narray/kwargs.c +142 -0
  15. data/ext/numo/narray/math.c +133 -0
  16. data/ext/numo/narray/narray.c +1976 -0
  17. data/ext/numo/narray/narray.def +28 -0
  18. data/ext/numo/narray/ndloop.c +1840 -0
  19. data/ext/numo/narray/numo/compat.h +23 -0
  20. data/ext/numo/narray/numo/intern.h +115 -0
  21. data/ext/numo/narray/numo/narray.h +480 -0
  22. data/ext/numo/narray/numo/ndloop.h +93 -0
  23. data/ext/numo/narray/numo/template.h +149 -0
  24. data/ext/numo/narray/numo/types/bit.h +38 -0
  25. data/ext/numo/narray/numo/types/complex.h +404 -0
  26. data/ext/numo/narray/numo/types/complex_macro.h +384 -0
  27. data/ext/numo/narray/numo/types/dcomplex.h +42 -0
  28. data/ext/numo/narray/numo/types/dfloat.h +44 -0
  29. data/ext/numo/narray/numo/types/float_def.h +34 -0
  30. data/ext/numo/narray/numo/types/float_macro.h +202 -0
  31. data/ext/numo/narray/numo/types/int16.h +27 -0
  32. data/ext/numo/narray/numo/types/int32.h +23 -0
  33. data/ext/numo/narray/numo/types/int64.h +23 -0
  34. data/ext/numo/narray/numo/types/int8.h +23 -0
  35. data/ext/numo/narray/numo/types/int_macro.h +66 -0
  36. data/ext/numo/narray/numo/types/real_accum.h +481 -0
  37. data/ext/numo/narray/numo/types/robj_macro.h +78 -0
  38. data/ext/numo/narray/numo/types/robject.h +25 -0
  39. data/ext/numo/narray/numo/types/scomplex.h +42 -0
  40. data/ext/numo/narray/numo/types/sfloat.h +45 -0
  41. data/ext/numo/narray/numo/types/uint16.h +24 -0
  42. data/ext/numo/narray/numo/types/uint32.h +20 -0
  43. data/ext/numo/narray/numo/types/uint64.h +20 -0
  44. data/ext/numo/narray/numo/types/uint8.h +20 -0
  45. data/ext/numo/narray/numo/types/uint_macro.h +57 -0
  46. data/ext/numo/narray/numo/types/xint_macro.h +166 -0
  47. data/ext/numo/narray/rand.c +40 -0
  48. data/ext/numo/narray/src/t_bit.c +3236 -0
  49. data/ext/numo/narray/src/t_dcomplex.c +6776 -0
  50. data/ext/numo/narray/src/t_dfloat.c +9417 -0
  51. data/ext/numo/narray/src/t_int16.c +5757 -0
  52. data/ext/numo/narray/src/t_int32.c +5757 -0
  53. data/ext/numo/narray/src/t_int64.c +5759 -0
  54. data/ext/numo/narray/src/t_int8.c +5355 -0
  55. data/ext/numo/narray/src/t_robject.c +5567 -0
  56. data/ext/numo/narray/src/t_scomplex.c +6731 -0
  57. data/ext/numo/narray/src/t_sfloat.c +9374 -0
  58. data/ext/numo/narray/src/t_uint16.c +5753 -0
  59. data/ext/numo/narray/src/t_uint32.c +5753 -0
  60. data/ext/numo/narray/src/t_uint64.c +5755 -0
  61. data/ext/numo/narray/src/t_uint8.c +5351 -0
  62. data/ext/numo/narray/step.c +266 -0
  63. data/ext/numo/narray/struct.c +814 -0
  64. data/lib/numo/narray/extra.rb +1266 -0
  65. data/lib/numo/narray.rb +4 -0
  66. metadata +106 -0
@@ -0,0 +1,1840 @@
1
+ /*
2
+ ndloop.c
3
+ Ruby/Numo::NArray - Numerical Array class for Ruby
4
+ Copyright (C) 1999-2020 Masahiro TANAKA
5
+ */
6
+ #include <ruby.h>
7
+
8
+ #include "numo/narray.h"
9
+
10
+ #if 0
11
+ #define DBG(x) x
12
+ #else
13
+ #define DBG(x)
14
+ #endif
15
+
16
+ #ifdef HAVE_STDARG_PROTOTYPES
17
+ #include <stdarg.h>
18
+ #define va_init_list(a, b) va_start(a, b)
19
+ #else
20
+ #include <varargs.h>
21
+ #define va_init_list(a, b) va_start(a)
22
+ #endif
23
+
24
+ typedef struct NA_BUFFER_COPY {
25
+ int ndim;
26
+ size_t elmsz;
27
+ size_t* n;
28
+ char* src_ptr;
29
+ char* buf_ptr;
30
+ na_loop_iter_t* src_iter;
31
+ na_loop_iter_t* buf_iter;
32
+ } na_buffer_copy_t;
33
+
34
+ typedef struct NA_LOOP_XARGS {
35
+ na_loop_iter_t* iter; // moved from na_loop_t
36
+ na_buffer_copy_t* bufcp; // copy data to buffer
37
+ int flag; // NDL_READ NDL_WRITE
38
+ bool free_user_iter; // alloc LARG(lp,j).iter=lp->xargs[j].iter
39
+ } na_loop_xargs_t;
40
+
41
+ typedef struct NA_MD_LOOP {
42
+ int narg;
43
+ int nin;
44
+ int ndim; // n of total dimension
45
+ unsigned int copy_flag; // set i-th bit if i-th arg is cast
46
+ void* ptr; // memory for n
47
+ na_loop_iter_t* iter_ptr; // memory for iter
48
+ size_t* n; // n of elements for each dim
49
+ na_loop_t user; // loop in user function
50
+ na_loop_xargs_t* xargs; // extra data for each arg
51
+ int writeback; // write back result to i-th arg
52
+ int init_aidx; // index of initializer argument
53
+ int reduce_dim;
54
+ int* trans_map;
55
+ VALUE vargs;
56
+ VALUE reduce;
57
+ VALUE loop_opt;
58
+ ndfunc_t* ndfunc;
59
+ void (*loop_func)(ndfunc_t*, struct NA_MD_LOOP*);
60
+ } na_md_loop_t;
61
+
62
+ #define LARG(lp, iarg) ((lp)->user.args[iarg])
63
+ #define LITER(lp, idim, iarg) ((lp)->xargs[iarg].iter[idim])
64
+ #define LITER_SRC(lp, idim) ((lp)->src_iter[idim])
65
+ #define LBUFCP(lp, j) ((lp)->xargs[j].bufcp)
66
+
67
+ #define CASTABLE(t) (RTEST(t) && (t) != OVERWRITE)
68
+
69
+ #define NDL_READ 1
70
+ #define NDL_WRITE 2
71
+ #define NDL_READ_WRITE (NDL_READ | NDL_WRITE)
72
+
73
+ static ID id_cast;
74
+ static ID id_extract;
75
+
76
+ static inline VALUE nary_type_s_cast(VALUE type, VALUE obj) {
77
+ return rb_funcall(type, id_cast, 1, obj);
78
+ }
79
+
80
+ static void print_ndfunc(ndfunc_t* nf) {
81
+ volatile VALUE t;
82
+ int i, k;
83
+ printf("ndfunc_t = 0x%" SZF "x {\n", (size_t)nf);
84
+ printf(" func = 0x%" SZF "x\n", (size_t)nf->func);
85
+ printf(" flag = 0x%" SZF "x\n", (size_t)nf->flag);
86
+ printf(" nin = %d\n", nf->nin);
87
+ printf(" nout = %d\n", nf->nout);
88
+ printf(" ain = 0x%" SZF "x\n", (size_t)nf->ain);
89
+ for (i = 0; i < nf->nin; i++) {
90
+ t = rb_inspect(nf->ain[i].type);
91
+ printf(" ain[%d].type = %s\n", i, StringValuePtr(t));
92
+ printf(" ain[%d].dim = %d\n", i, nf->ain[i].dim);
93
+ }
94
+ printf(" aout = 0x%" SZF "x\n", (size_t)nf->aout);
95
+ for (i = 0; i < nf->nout; i++) {
96
+ t = rb_inspect(nf->aout[i].type);
97
+ printf(" aout[%d].type = %s\n", i, StringValuePtr(t));
98
+ printf(" aout[%d].dim = %d\n", i, nf->aout[i].dim);
99
+ for (k = 0; k < nf->aout[i].dim; k++) {
100
+ printf(" aout[%d].shape[%d] = %" SZF "u\n", i, k, nf->aout[i].shape[k]);
101
+ }
102
+ }
103
+ printf("}\n");
104
+ }
105
+
106
+ static void print_ndloop(na_md_loop_t* lp) {
107
+ int i, j, nd;
108
+ printf("na_md_loop_t = 0x%" SZF "x {\n", (size_t)lp);
109
+ printf(" narg = %d\n", lp->narg);
110
+ printf(" nin = %d\n", lp->nin);
111
+ printf(" ndim = %d\n", lp->ndim);
112
+ printf(" copy_flag = %x\n", lp->copy_flag);
113
+ printf(" writeback = %d\n", lp->writeback);
114
+ printf(" init_aidx = %d\n", lp->init_aidx);
115
+ printf(" reduce_dim = %d\n", lp->reduce_dim);
116
+ printf(" trans_map = 0x%" SZF "x\n", (size_t)lp->trans_map);
117
+ nd = lp->ndim + lp->user.ndim;
118
+ for (i = 0; i < nd; i++) {
119
+ printf(" trans_map[%d] = %d\n", i, lp->trans_map[i]);
120
+ }
121
+ printf(" n = 0x%" SZF "x\n", (size_t)lp->n);
122
+ nd = lp->ndim + lp->user.ndim;
123
+ for (i = 0; i <= lp->ndim; i++) {
124
+ printf(" n[%d] = %" SZF "u\n", i, lp->n[i]);
125
+ }
126
+ printf(" user.n = 0x%" SZF "x\n", (size_t)lp->user.n);
127
+ if (lp->user.n) {
128
+ for (i = 0; i <= lp->user.ndim; i++) {
129
+ printf(" user.n[%d] = %" SZF "u\n", i, lp->user.n[i]);
130
+ }
131
+ }
132
+ printf(" xargs = 0x%" SZF "x\n", (size_t)lp->xargs);
133
+ printf(" iter_ptr = 0x%" SZF "x\n", (size_t)lp->iter_ptr);
134
+ printf(" user.narg = %d\n", lp->user.narg);
135
+ printf(" user.ndim = %d\n", lp->user.ndim);
136
+ printf(" user.args = 0x%" SZF "x\n", (size_t)lp->user.args);
137
+ for (j = 0; j < lp->narg; j++) {
138
+ }
139
+ printf(" user.opt_ptr = 0x%" SZF "x\n", (size_t)lp->user.opt_ptr);
140
+ if (lp->reduce == Qnil) {
141
+ printf(" reduce = nil\n");
142
+ } else {
143
+ printf(" reduce = 0x%x\n", NUM2INT(lp->reduce));
144
+ }
145
+ for (j = 0; j < lp->narg; j++) {
146
+ printf("--user.args[%d]--\n", j);
147
+ printf(" user.args[%d].ptr = 0x%" SZF "x\n", j, (size_t)LARG(lp, j).ptr);
148
+ printf(" user.args[%d].elmsz = %" SZF "d\n", j, LARG(lp, j).elmsz);
149
+ printf(" user.args[%d].value = 0x%" PRI_VALUE_PREFIX "x\n", j, LARG(lp, j).value);
150
+ printf(" user.args[%d].ndim = %d\n", j, LARG(lp, j).ndim);
151
+ printf(" user.args[%d].shape = 0x%" SZF "x\n", j, (size_t)LARG(lp, j).shape);
152
+ if (LARG(lp, j).shape) {
153
+ for (i = 0; i < LARG(lp, j).ndim; i++) {
154
+ printf(" user.args[%d].shape[%d] = %" SZF "d\n", j, i, LARG(lp, j).shape[i]);
155
+ }
156
+ }
157
+ printf(" user.args[%d].iter = 0x%" SZF "x\n", j, (size_t)lp->user.args[j].iter);
158
+ if (lp->user.args[j].iter) {
159
+ for (i = 0; i < lp->user.ndim; i++) {
160
+ printf(" &user.args[%d].iter[%d] = 0x%" SZF "x\n", j, i, (size_t)&lp->user.args[j].iter[i]);
161
+ printf(" user.args[%d].iter[%d].pos = %" SZF "u\n", j, i, lp->user.args[j].iter[i].pos);
162
+ printf(" user.args[%d].iter[%d].step = %" SZF "d\n", j, i, lp->user.args[j].iter[i].step);
163
+ printf(" user.args[%d].iter[%d].idx = 0x%" SZF "x\n", j, i, (size_t)lp->user.args[j].iter[i].idx);
164
+ }
165
+ }
166
+ //
167
+ printf(" xargs[%d].flag = %d\n", j, lp->xargs[j].flag);
168
+ printf(" xargs[%d].free_user_iter = %d\n", j, lp->xargs[j].free_user_iter);
169
+ for (i = 0; i <= nd; i++) {
170
+ printf(" &xargs[%d].iter[%d] = 0x%" SZF "x\n", j, i, (size_t)&LITER(lp, i, j));
171
+ printf(" xargs[%d].iter[%d].pos = %" SZF "u\n", j, i, LITER(lp, i, j).pos);
172
+ printf(" xargs[%d].iter[%d].step = %" SZF "d\n", j, i, LITER(lp, i, j).step);
173
+ printf(" xargs[%d].iter[%d].idx = 0x%" SZF "x\n", j, i, (size_t)LITER(lp, i, j).idx);
174
+ }
175
+ printf(" xargs[%d].bufcp = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp);
176
+ if (lp->xargs[j].bufcp) {
177
+ printf(" xargs[%d].bufcp->ndim = %d\n", j, lp->xargs[j].bufcp->ndim);
178
+ printf(" xargs[%d].bufcp->elmsz = %" SZF "d\n", j, lp->xargs[j].bufcp->elmsz);
179
+ printf(" xargs[%d].bufcp->n = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->n);
180
+ printf(" xargs[%d].bufcp->src_ptr = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->src_ptr);
181
+ printf(" xargs[%d].bufcp->buf_ptr = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->buf_ptr);
182
+ printf(" xargs[%d].bufcp->src_iter = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->src_iter);
183
+ printf(" xargs[%d].bufcp->buf_iter = 0x%" SZF "x\n", j, (size_t)lp->xargs[j].bufcp->buf_iter);
184
+ }
185
+ }
186
+ printf("}\n");
187
+ }
188
+
189
+ static unsigned int ndloop_func_loop_spec(ndfunc_t* nf, int user_ndim) {
190
+ unsigned int f = 0;
191
+ // If user function supports LOOP
192
+ if (user_ndim > 0 || NDF_TEST(nf, NDF_HAS_LOOP)) {
193
+ if (!NDF_TEST(nf, NDF_STRIDE_LOOP)) {
194
+ f |= 1;
195
+ }
196
+ if (!NDF_TEST(nf, NDF_INDEX_LOOP)) {
197
+ f |= 2;
198
+ }
199
+ }
200
+ return f;
201
+ }
202
+
203
+ static int ndloop_cast_required(VALUE type, VALUE value) {
204
+ return CASTABLE(type) && type != rb_obj_class(value);
205
+ }
206
+
207
+ static int ndloop_castable_type(VALUE type) {
208
+ return rb_obj_is_kind_of(type, rb_cClass) && RTEST(rb_class_inherited_p(type, cNArray));
209
+ }
210
+
211
+ static void ndloop_cast_error(VALUE type, VALUE value) {
212
+ VALUE x = rb_inspect(type);
213
+ char* s = StringValueCStr(x);
214
+ rb_bug("fail cast from %s to %s", rb_obj_classname(value), s);
215
+ rb_raise(rb_eTypeError, "fail cast from %s to %s", rb_obj_classname(value), s);
216
+ }
217
+
218
+ // convert input argeuments given by RARRAY_PTR(args)[j]
219
+ // to type specified by nf->args[j].type
220
+ // returns copy_flag where nth-bit is set if nth argument is converted.
221
+ static unsigned int ndloop_cast_args(ndfunc_t* nf, VALUE args) {
222
+ int j;
223
+ unsigned int copy_flag = 0;
224
+ VALUE type, value;
225
+
226
+ for (j = 0; j < nf->nin; j++) {
227
+
228
+ type = nf->ain[j].type;
229
+ if (TYPE(type) == T_SYMBOL) continue;
230
+ value = RARRAY_AREF(args, j);
231
+ if (!ndloop_cast_required(type, value)) continue;
232
+
233
+ if (ndloop_castable_type(type)) {
234
+ RARRAY_ASET(args, j, nary_type_s_cast(type, value));
235
+ copy_flag |= 1 << j;
236
+ } else {
237
+ ndloop_cast_error(type, value);
238
+ }
239
+ }
240
+
241
+ RB_GC_GUARD(type);
242
+ RB_GC_GUARD(value);
243
+ return copy_flag;
244
+ }
245
+
246
+ static void ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, na_md_loop_t* lp) {
247
+ if (type == sym_reduce) {
248
+ lp->reduce = value;
249
+ } else if (type == sym_option) {
250
+ lp->user.option = value;
251
+ } else if (type == sym_loop_opt) {
252
+ lp->loop_opt = value;
253
+ } else if (type == sym_init) {
254
+ lp->init_aidx = at;
255
+ } else {
256
+ rb_bug("ndloop parse_options: unknown type");
257
+ }
258
+ }
259
+
260
+ static inline int max2(int x, int y) {
261
+ return x > y ? x : y;
262
+ }
263
+
264
+ static void ndloop_find_max_dimension(na_md_loop_t* lp, ndfunc_t* nf, VALUE args) {
265
+ int j;
266
+ int nin = 0; // number of input objects (except for symbols)
267
+ int user_nd = 0; // max dimension of user function
268
+ int loop_nd = 0; // max dimension of md-loop
269
+
270
+ for (j = 0; j < RARRAY_LEN(args); j++) {
271
+ VALUE t = nf->ain[j].type;
272
+ VALUE v = RARRAY_AREF(args, j);
273
+ if (TYPE(t) == T_SYMBOL) {
274
+ ndloop_handle_symbol_in_ain(t, v, j, lp);
275
+ } else {
276
+ nin++;
277
+ user_nd = max2(user_nd, nf->ain[j].dim);
278
+ if (IsNArray(v)) loop_nd = max2(loop_nd, RNARRAY_NDIM(v) - nf->ain[j].dim);
279
+ }
280
+ }
281
+
282
+ lp->narg = lp->user.narg = nin + nf->nout;
283
+ lp->nin = nin;
284
+ lp->ndim = loop_nd;
285
+ lp->user.ndim = user_nd;
286
+ }
287
+
288
+ /*
289
+ user-dimension:
290
+ user_nd = MAX( nf->args[j].dim )
291
+
292
+ user-support dimension:
293
+
294
+ loop dimension:
295
+ loop_nd
296
+ */
297
+
298
+ static void ndloop_alloc(na_md_loop_t* lp, ndfunc_t* nf, VALUE args, void* opt_ptr, unsigned int copy_flag,
299
+ void (*loop_func)(ndfunc_t*, na_md_loop_t*)) {
300
+ int i, j;
301
+ int narg;
302
+ int max_nd;
303
+
304
+ char* buf;
305
+ size_t n1, n2, n3, n4, n5;
306
+
307
+ long args_len;
308
+
309
+ na_loop_iter_t* iter;
310
+
311
+ int trans_dim;
312
+ unsigned int f;
313
+
314
+ args_len = RARRAY_LEN(args);
315
+
316
+ if (args_len != nf->nin) {
317
+ rb_bug("wrong number of arguments for ndfunc (%lu for %d)", args_len, nf->nin);
318
+ }
319
+
320
+ lp->vargs = args;
321
+ lp->ndfunc = nf;
322
+ lp->loop_func = loop_func;
323
+ lp->copy_flag = copy_flag;
324
+
325
+ lp->reduce = Qnil;
326
+ lp->user.option = Qnil;
327
+ lp->user.opt_ptr = opt_ptr;
328
+ lp->user.err_type = Qfalse;
329
+ lp->loop_opt = Qnil;
330
+ lp->writeback = -1;
331
+ lp->init_aidx = -1;
332
+
333
+ lp->ptr = NULL;
334
+ lp->user.n = NULL;
335
+
336
+ ndloop_find_max_dimension(lp, nf, args);
337
+ narg = lp->nin + nf->nout;
338
+ max_nd = lp->ndim + lp->user.ndim;
339
+
340
+ n1 = sizeof(size_t) * (max_nd + 1);
341
+ n2 = sizeof(na_loop_xargs_t) * narg;
342
+ n2 = ((n2 - 1) / 8 + 1) * 8;
343
+ n3 = sizeof(na_loop_args_t) * narg;
344
+ n3 = ((n3 - 1) / 8 + 1) * 8;
345
+ n4 = sizeof(na_loop_iter_t) * narg * (max_nd + 1);
346
+ n4 = ((n4 - 1) / 8 + 1) * 8;
347
+ n5 = sizeof(int) * (max_nd + 1);
348
+
349
+ lp->ptr = buf = (char*)xmalloc(n1 + n2 + n3 + n4 + n5);
350
+ lp->n = (size_t*)buf;
351
+ buf += n1;
352
+ lp->xargs = (na_loop_xargs_t*)buf;
353
+ buf += n2;
354
+ lp->user.args = (na_loop_args_t*)buf;
355
+ buf += n3;
356
+ lp->iter_ptr = iter = (na_loop_iter_t*)buf;
357
+ buf += n4;
358
+ lp->trans_map = (int*)buf;
359
+
360
+ for (j = 0; j < narg; j++) {
361
+ LARG(lp, j).value = Qnil;
362
+ LARG(lp, j).iter = NULL;
363
+ LARG(lp, j).shape = NULL;
364
+ LARG(lp, j).ndim = 0;
365
+ lp->xargs[j].iter = &(iter[(max_nd + 1) * j]);
366
+ lp->xargs[j].bufcp = NULL;
367
+ lp->xargs[j].flag = (j < lp->nin) ? NDL_READ : NDL_WRITE;
368
+ lp->xargs[j].free_user_iter = 0;
369
+ }
370
+
371
+ for (i = 0; i <= max_nd; i++) {
372
+ lp->n[i] = 1;
373
+ for (j = 0; j < narg; j++) {
374
+ LITER(lp, i, j).pos = 0;
375
+ LITER(lp, i, j).step = 0;
376
+ LITER(lp, i, j).idx = NULL;
377
+ }
378
+ }
379
+
380
+ // transpose reduce-dimensions to last dimensions
381
+ // array loop
382
+ // [*,+,*,+,*] => [*,*,*,+,+]
383
+ // trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
384
+ if (NDF_TEST(nf, NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
385
+ trans_dim = 0;
386
+ for (i = 0; i < max_nd; i++) {
387
+ if (na_test_reduce(lp->reduce, i)) {
388
+ lp->trans_map[i] = -1;
389
+ } else {
390
+ lp->trans_map[i] = trans_dim++;
391
+ }
392
+ }
393
+ j = trans_dim;
394
+ for (i = 0; i < max_nd; i++) {
395
+ if (lp->trans_map[i] == -1) {
396
+ lp->trans_map[i] = j++;
397
+ }
398
+ }
399
+ lp->reduce_dim = max_nd - trans_dim;
400
+ f = 0;
401
+ for (i = trans_dim; i < max_nd; i++) {
402
+ f |= 1 << i;
403
+ }
404
+ lp->reduce = INT2FIX(f);
405
+ } else {
406
+ for (i = 0; i < max_nd; i++) {
407
+ lp->trans_map[i] = i;
408
+ }
409
+ lp->reduce_dim = 0;
410
+ }
411
+ }
412
+
413
+ static VALUE ndloop_release(VALUE vlp) {
414
+ int j;
415
+ VALUE v;
416
+ na_md_loop_t* lp = (na_md_loop_t*)(vlp);
417
+
418
+ for (j = 0; j < lp->narg; j++) {
419
+ v = LARG(lp, j).value;
420
+ if (IsNArray(v)) {
421
+ na_release_lock(v);
422
+ }
423
+ }
424
+ for (j = 0; j < lp->narg; j++) {
425
+ // printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
426
+ if (lp->xargs[j].bufcp) {
427
+ xfree(lp->xargs[j].bufcp->buf_iter);
428
+ xfree(lp->xargs[j].bufcp->buf_ptr);
429
+ xfree(lp->xargs[j].bufcp->n);
430
+ xfree(lp->xargs[j].bufcp);
431
+ if (lp->xargs[j].free_user_iter) {
432
+ xfree(LARG(lp, j).iter);
433
+ }
434
+ }
435
+ }
436
+ xfree(lp->ptr);
437
+ return Qnil;
438
+ }
439
+
440
+ /*
441
+ set lp->n[i] (shape of n-d iteration) here
442
+ */
443
+ static void ndloop_check_shape(na_md_loop_t* lp, int nf_dim, narray_t* na) {
444
+ int i, k;
445
+ size_t n;
446
+ int dim_beg;
447
+
448
+ dim_beg = lp->ndim + nf_dim - na->ndim;
449
+
450
+ for (k = na->ndim - nf_dim - 1; k >= 0; k--) {
451
+ i = lp->trans_map[k + dim_beg];
452
+ n = na->shape[k];
453
+ // if n==1 then repeat this dimension
454
+ if (n != 1) {
455
+ if (lp->n[i] == 1) {
456
+ lp->n[i] = n;
457
+ } else if (lp->n[i] != n) {
458
+ // inconsistent array shape
459
+ rb_raise(nary_eShapeError, "shape1[%d](=%" SZF "u) != shape2[%d](=%" SZF "u)", i, lp->n[i], k, n);
460
+ }
461
+ }
462
+ }
463
+ }
464
+
465
+ /*
466
+ na->shape[i] == lp->n[ dim_map[i] ]
467
+ */
468
+ static void ndloop_set_stepidx(na_md_loop_t* lp, int j, VALUE vna, int* dim_map, int rwflag) {
469
+ size_t n, s;
470
+ int i, k, nd;
471
+ stridx_t sdx;
472
+ narray_t* na;
473
+
474
+ LARG(lp, j).value = vna;
475
+ LARG(lp, j).elmsz = nary_element_stride(vna);
476
+ if (rwflag == NDL_READ) {
477
+ LARG(lp, j).ptr = na_get_pointer_for_read(vna);
478
+ } else if (rwflag == NDL_WRITE) {
479
+ LARG(lp, j).ptr = na_get_pointer_for_write(vna);
480
+ } else if (rwflag == NDL_READ_WRITE) {
481
+ LARG(lp, j).ptr = na_get_pointer_for_read_write(vna);
482
+ } else {
483
+ rb_bug("invalid value for read-write flag");
484
+ }
485
+ GetNArray(vna, na);
486
+ nd = LARG(lp, j).ndim;
487
+
488
+ switch (NA_TYPE(na)) {
489
+ case NARRAY_DATA_T:
490
+ if (NA_DATA_PTR(na) == NULL && NA_SIZE(na) > 0) {
491
+ rb_bug("cannot read no-data NArray");
492
+ rb_raise(rb_eRuntimeError, "cannot read no-data NArray");
493
+ }
494
+ // through
495
+ case NARRAY_FILEMAP_T:
496
+ s = LARG(lp, j).elmsz;
497
+ for (k = na->ndim; k--;) {
498
+ n = na->shape[k];
499
+ if (n > 1 || nd > 0) {
500
+ i = dim_map[k];
501
+ // printf("n=%d k=%d i=%d\n",n,k,i);
502
+ LITER(lp, i, j).step = s;
503
+ // LITER(lp,i,j).idx = NULL;
504
+ }
505
+ s *= n;
506
+ nd--;
507
+ }
508
+ LITER(lp, 0, j).pos = 0;
509
+ break;
510
+ case NARRAY_VIEW_T:
511
+ LITER(lp, 0, j).pos = NA_VIEW_OFFSET(na);
512
+ for (k = 0; k < na->ndim; k++) {
513
+ n = na->shape[k];
514
+ sdx = NA_VIEW_STRIDX(na)[k];
515
+ if (n > 1 || nd > 0) {
516
+ i = dim_map[k];
517
+ if (SDX_IS_INDEX(sdx)) {
518
+ LITER(lp, i, j).step = 0;
519
+ LITER(lp, i, j).idx = SDX_GET_INDEX(sdx);
520
+ } else {
521
+ LITER(lp, i, j).step = SDX_GET_STRIDE(sdx);
522
+ // LITER(lp,i,j).idx = NULL;
523
+ }
524
+ } else if (n == 1) {
525
+ if (SDX_IS_INDEX(sdx)) {
526
+ LITER(lp, 0, j).pos += SDX_GET_INDEX(sdx)[0];
527
+ }
528
+ }
529
+ nd--;
530
+ }
531
+ break;
532
+ default:
533
+ rb_bug("invalid narray internal type");
534
+ }
535
+ }
536
+
537
+ static void ndloop_init_args(ndfunc_t* nf, na_md_loop_t* lp, VALUE args) {
538
+ int i, j;
539
+ VALUE v;
540
+ narray_t* na;
541
+ int nf_dim;
542
+ int dim_beg;
543
+ int* dim_map;
544
+ int max_nd = lp->ndim + lp->user.ndim;
545
+ int flag;
546
+ size_t s;
547
+
548
+ /*
549
+ na->shape[i] == lp->n[ dim_map[i] ]
550
+ */
551
+ dim_map = ALLOCA_N(int, max_nd);
552
+
553
+ // input arguments
554
+ for (j = 0; j < nf->nin; j++) {
555
+ if (TYPE(nf->ain[j].type) == T_SYMBOL) {
556
+ continue;
557
+ }
558
+ v = RARRAY_AREF(args, j);
559
+ if (IsNArray(v)) {
560
+ // set LARG(lp,j) with v
561
+ GetNArray(v, na);
562
+ nf_dim = nf->ain[j].dim;
563
+ if (nf_dim > na->ndim) {
564
+ rb_raise(nary_eDimensionError,
565
+ "requires >= %d-dimensioal array "
566
+ "while %d-dimensional array is given",
567
+ nf_dim, na->ndim);
568
+ }
569
+ ndloop_check_shape(lp, nf_dim, na);
570
+ dim_beg = lp->ndim + nf->ain[j].dim - na->ndim;
571
+ for (i = 0; i < na->ndim; i++) {
572
+ dim_map[i] = lp->trans_map[i + dim_beg];
573
+ // printf("dim_map[%d]=%d na->shape[%d]=%d\n",i,dim_map[i],i,na->shape[i]);
574
+ }
575
+ if (nf->ain[j].type == OVERWRITE) {
576
+ lp->xargs[j].flag = flag = NDL_WRITE;
577
+ } else {
578
+ lp->xargs[j].flag = flag = NDL_READ;
579
+ }
580
+ LARG(lp, j).ndim = nf_dim;
581
+ ndloop_set_stepidx(lp, j, v, dim_map, flag);
582
+ if (nf_dim > 0) {
583
+ LARG(lp, j).shape = na->shape + (na->ndim - nf_dim);
584
+ }
585
+ } else if (TYPE(v) == T_ARRAY) {
586
+ LARG(lp, j).value = v;
587
+ LARG(lp, j).elmsz = sizeof(VALUE);
588
+ LARG(lp, j).ptr = NULL;
589
+ for (i = 0; i <= max_nd; i++) {
590
+ LITER(lp, i, j).step = 1;
591
+ }
592
+ }
593
+ }
594
+ // check whether # of element is zero
595
+ for (s = 1, i = 0; i <= max_nd; i++) {
596
+ s *= lp->n[i];
597
+ }
598
+ if (s == 0) {
599
+ for (i = 0; i <= max_nd; i++) {
600
+ lp->n[i] = 0;
601
+ }
602
+ }
603
+ }
604
+
605
+ static int ndloop_check_inplace(VALUE type, int na_ndim, size_t* na_shape, VALUE v) {
606
+ int i;
607
+ narray_t* na;
608
+
609
+ // type check
610
+ if (type != rb_obj_class(v)) {
611
+ return 0;
612
+ }
613
+ GetNArray(v, na);
614
+ // shape check
615
+ if (na->ndim != na_ndim) {
616
+ return 0;
617
+ }
618
+ for (i = 0; i < na_ndim; i++) {
619
+ if (na_shape[i] != na->shape[i]) {
620
+ return 0;
621
+ }
622
+ }
623
+ // v is selected as output
624
+ return 1;
625
+ }
626
+
627
+ static VALUE ndloop_find_inplace(ndfunc_t* nf, na_md_loop_t* lp, VALUE type, int na_ndim, size_t* na_shape, VALUE args) {
628
+ int j;
629
+ VALUE v;
630
+
631
+ // find inplace
632
+ for (j = 0; j < nf->nin; j++) {
633
+ v = RARRAY_AREF(args, j);
634
+ if (IsNArray(v)) {
635
+ if (TEST_INPLACE(v)) {
636
+ if (ndloop_check_inplace(type, na_ndim, na_shape, v)) {
637
+ // if already copied, create outary and write-back
638
+ if (lp->copy_flag & (1 << j)) {
639
+ lp->writeback = j;
640
+ }
641
+ return v;
642
+ }
643
+ }
644
+ }
645
+ }
646
+ // find casted or copied input array
647
+ for (j = 0; j < nf->nin; j++) {
648
+ if (lp->copy_flag & (1 << j)) {
649
+ v = RARRAY_AREF(args, j);
650
+ if (ndloop_check_inplace(type, na_ndim, na_shape, v)) {
651
+ return v;
652
+ }
653
+ }
654
+ }
655
+ return Qnil;
656
+ }
657
+
658
+ static VALUE ndloop_get_arg_type(ndfunc_t* nf, VALUE args, VALUE t) {
659
+ int i;
660
+
661
+ // if type is FIXNUM, get the type of i-th argument
662
+ if (FIXNUM_P(t)) {
663
+ i = FIX2INT(t);
664
+ if (i < 0 || i >= nf->nin) {
665
+ rb_bug("invalid type: index (%d) out of # of args", i);
666
+ }
667
+ t = nf->ain[i].type;
668
+ // if i-th type is Qnil, get the type of i-th input value
669
+ if (!CASTABLE(t)) {
670
+ t = rb_obj_class(RARRAY_AREF(args, i));
671
+ }
672
+ }
673
+ return t;
674
+ }
675
+
676
+ static VALUE ndloop_set_output_narray(ndfunc_t* nf, na_md_loop_t* lp, int k, VALUE type, VALUE args) {
677
+ int i, j;
678
+ int na_ndim;
679
+ int lp_dim;
680
+ volatile VALUE v = Qnil;
681
+ size_t* na_shape;
682
+ int* dim_map;
683
+ int flag = NDL_READ_WRITE;
684
+ int nd;
685
+ int max_nd = lp->ndim + nf->aout[k].dim;
686
+
687
+ na_shape = ALLOCA_N(size_t, max_nd);
688
+ dim_map = ALLOCA_N(int, max_nd);
689
+
690
+ // printf("max_nd=%d lp->ndim=%d\n",max_nd,lp->ndim);
691
+
692
+ // md-loop shape
693
+ na_ndim = 0;
694
+ for (i = 0; i < lp->ndim; i++) {
695
+ // na_shape[i] == lp->n[lp->trans_map[i]]
696
+ lp_dim = lp->trans_map[i];
697
+ // printf("i=%d lp_dim=%d\n",i,lp_dim);
698
+ if (NDF_TEST(nf, NDF_CUM)) { // cumulate with shape kept
699
+ na_shape[na_ndim] = lp->n[lp_dim];
700
+ } else if (na_test_reduce(lp->reduce, lp_dim)) { // accumulate dimension
701
+ if (NDF_TEST(nf, NDF_KEEP_DIM)) {
702
+ na_shape[na_ndim] = 1; // leave it
703
+ } else {
704
+ continue; // delete dimension
705
+ }
706
+ } else {
707
+ na_shape[na_ndim] = lp->n[lp_dim];
708
+ }
709
+ // printf("i=%d lp_dim=%d na_shape[%d]=%ld\n",i,lp_dim,i,na_shape[i]);
710
+ dim_map[na_ndim++] = lp_dim;
711
+ // dim_map[lp_dim] = na_ndim++;
712
+ }
713
+
714
+ // user-specified shape
715
+ for (i = 0; i < nf->aout[k].dim; i++) {
716
+ na_shape[na_ndim] = nf->aout[k].shape[i];
717
+ dim_map[na_ndim++] = i + lp->ndim;
718
+ }
719
+
720
+ // find inplace from input arrays
721
+ if (k == 0 && NDF_TEST(nf, NDF_INPLACE)) {
722
+ v = ndloop_find_inplace(nf, lp, type, na_ndim, na_shape, args);
723
+ }
724
+ if (!RTEST(v)) {
725
+ // new object
726
+ v = nary_new(type, na_ndim, na_shape);
727
+ flag = NDL_WRITE;
728
+ }
729
+
730
+ j = lp->nin + k;
731
+ LARG(lp, j).ndim = nd = nf->aout[k].dim;
732
+ ndloop_set_stepidx(lp, j, v, dim_map, flag);
733
+ if (nd > 0) {
734
+ LARG(lp, j).shape = nf->aout[k].shape;
735
+ }
736
+
737
+ return v;
738
+ }
739
+
740
+ static VALUE ndloop_set_output(ndfunc_t* nf, na_md_loop_t* lp, VALUE args) {
741
+ int i, j, k, idx;
742
+ volatile VALUE v, t, results;
743
+ VALUE init;
744
+
745
+ int max_nd = lp->ndim + lp->user.ndim;
746
+
747
+ // output results
748
+ results = rb_ary_new2(nf->nout);
749
+
750
+ for (k = 0; k < nf->nout; k++) {
751
+ t = nf->aout[k].type;
752
+ t = ndloop_get_arg_type(nf, args, t);
753
+
754
+ if (rb_obj_is_kind_of(t, rb_cClass)) {
755
+ if (RTEST(rb_class_inherited_p(t, cNArray))) {
756
+ // NArray
757
+ v = ndloop_set_output_narray(nf, lp, k, t, args);
758
+ rb_ary_push(results, v);
759
+ } else if (RTEST(rb_class_inherited_p(t, rb_cArray))) {
760
+ // Ruby Array
761
+ j = lp->nin + k;
762
+ for (i = 0; i <= max_nd; i++) {
763
+ LITER(lp, i, j).step = sizeof(VALUE);
764
+ }
765
+ LARG(lp, j).value = t;
766
+ LARG(lp, j).elmsz = sizeof(VALUE);
767
+ } else {
768
+ rb_raise(rb_eRuntimeError, "ndloop_set_output: invalid for type");
769
+ }
770
+ }
771
+ }
772
+
773
+ // initialilzer
774
+ k = lp->init_aidx;
775
+ if (k > -1) {
776
+ idx = nf->ain[k].dim;
777
+ v = RARRAY_AREF(results, idx);
778
+ init = RARRAY_AREF(args, k);
779
+ na_store(v, init);
780
+ }
781
+
782
+ return results;
783
+ }
784
+
785
+ static void ndfunc_contract_loop(na_md_loop_t* lp) {
786
+ int i, j, k, success, cnt = 0;
787
+ int red0, redi;
788
+
789
+ redi = na_test_reduce(lp->reduce, 0);
790
+
791
+ // for (i=0; i<lp->ndim; i++) {
792
+ // printf("lp->n[%d]=%lu\n",i,lp->n[i]);
793
+ // }
794
+
795
+ for (i = 1; i < lp->ndim; i++) {
796
+ red0 = redi;
797
+ redi = na_test_reduce(lp->reduce, i);
798
+ // printf("contract i=%d reduce_cond=%d %d\n",i,red0,redi);
799
+ if (red0 != redi) {
800
+ continue;
801
+ }
802
+ success = 1;
803
+ for (j = 0; j < lp->narg; j++) {
804
+ if (!(LITER(lp, i, j).idx == NULL && LITER(lp, i - 1, j).idx == NULL &&
805
+ LITER(lp, i - 1, j).step == LITER(lp, i, j).step * (ssize_t)(lp->n[i]))) {
806
+ success = 0;
807
+ break;
808
+ }
809
+ }
810
+ if (success) {
811
+ // printf("contract i=%d-th and %d-th, lp->n[%d]=%"SZF"d, lp->n[%d]=%"SZF"d\n",
812
+ // i-1,i, i,lp->n[i], i-1,lp->n[i-1]);
813
+ // contract (i-1)-th and i-th dimension
814
+ lp->n[i] *= lp->n[i - 1];
815
+ // shift dimensions
816
+ for (k = i - 1; k > cnt; k--) {
817
+ lp->n[k] = lp->n[k - 1];
818
+ }
819
+ // printf("k=%d\n",k);
820
+ for (; k >= 0; k--) {
821
+ lp->n[k] = 1;
822
+ }
823
+ for (j = 0; j < lp->narg; j++) {
824
+ for (k = i - 1; k > cnt; k--) {
825
+ LITER(lp, k, j) = LITER(lp, k - 1, j);
826
+ }
827
+ }
828
+ if (redi) {
829
+ lp->reduce_dim--;
830
+ }
831
+ cnt++;
832
+ }
833
+ }
834
+ // printf("contract cnt=%d\n",cnt);
835
+ if (cnt > 0) {
836
+ for (j = 0; j < lp->narg; j++) {
837
+ LITER(lp, cnt, j).pos = LITER(lp, 0, j).pos;
838
+ lp->xargs[j].iter = &LITER(lp, cnt, j);
839
+ }
840
+ lp->n = &(lp->n[cnt]);
841
+ lp->ndim -= cnt;
842
+ // for (i=0; i<lp->ndim; i++) {printf("lp->n[%d]=%lu\n",i,lp->n[i]);}
843
+ }
844
+ }
845
+
846
+ static void ndfunc_set_user_loop(ndfunc_t* nf, na_md_loop_t* lp) {
847
+ int j, ud = 0;
848
+
849
+ if (lp->reduce_dim > 0) {
850
+ ud = lp->reduce_dim;
851
+ } else if (lp->ndim > 0 && NDF_TEST(nf, NDF_HAS_LOOP)) {
852
+ ud = 1;
853
+ } else {
854
+ goto skip_ud;
855
+ }
856
+ if (ud > lp->ndim) {
857
+ rb_bug("Reduce-dimension is larger than loop-dimension");
858
+ }
859
+ // increase user dimension
860
+ lp->user.ndim += ud;
861
+ lp->ndim -= ud;
862
+ for (j = 0; j < lp->narg; j++) {
863
+ if (LARG(lp, j).shape) {
864
+ rb_bug("HAS_LOOP or reduce-dimension=%d conflicts with user-dimension", lp->reduce_dim);
865
+ }
866
+ LARG(lp, j).ndim += ud;
867
+ LARG(lp, j).shape = &(lp->n[lp->ndim]);
868
+ // printf("LARG(lp,j).ndim=%d,LARG(lp,j).shape=%lx\n",LARG(lp,j).ndim,(size_t)LARG(lp,j).shape);
869
+ }
870
+ // printf("lp->reduce_dim=%d lp->user.ndim=%d lp->ndim=%d\n",lp->reduce_dim,lp->user.ndim,lp->ndim);
871
+
872
+ skip_ud:
873
+ lp->user.n = &(lp->n[lp->ndim]);
874
+ for (j = 0; j < lp->narg; j++) {
875
+ LARG(lp, j).iter = &LITER(lp, lp->ndim, j);
876
+ // printf("in ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
877
+ }
878
+ }
879
+
880
+ static void ndfunc_set_bufcp(na_md_loop_t* lp, unsigned int loop_spec) {
881
+ unsigned int f;
882
+ int i, j;
883
+ int nd, ndim;
884
+ bool zero_step;
885
+ ssize_t n, sz, elmsz, stride, n_total; //, last_step;
886
+ size_t* buf_shape;
887
+ na_loop_iter_t *buf_iter = NULL, *src_iter;
888
+
889
+ // if (loop_spec==0) return;
890
+
891
+ n_total = lp->user.n[0];
892
+ for (i = 1; i < lp->user.ndim; i++) {
893
+ n_total *= lp->user.n[i];
894
+ }
895
+
896
+ // for (j=0; j<lp->nin; j++) {
897
+ for (j = 0; j < lp->narg; j++) {
898
+ // ndim = nd = lp->user.ndim;
899
+ ndim = nd = LARG(lp, j).ndim;
900
+ sz = elmsz = LARG(lp, j).elmsz;
901
+ src_iter = LARG(lp, j).iter;
902
+ // last_step = src_iter[ndim-1].step;
903
+ f = 0;
904
+ zero_step = 1;
905
+ for (i = ndim; i > 0;) {
906
+ i--;
907
+ if (LARG(lp, j).shape) {
908
+ n = LARG(lp, j).shape[i];
909
+ } else {
910
+ // printf("shape is NULL\n");
911
+ n = lp->user.n[i];
912
+ }
913
+ stride = sz * n;
914
+ // printf("{j=%d,i=%d,ndim=%d,nd=%d,idx=%lx,step=%ld,n=%ld,sz=%ld,stride=%ld}\n",j,i,ndim,nd,(size_t)src_iter[i].idx,src_iter[i].step,n,sz,stride);
915
+ if (src_iter[i].idx) {
916
+ f |= 2; // INDEX LOOP
917
+ zero_step = 0;
918
+ } else {
919
+ if (src_iter[i].step != sz) {
920
+ f |= 1; // NON_CONTIGUOUS LOOP
921
+ } else {
922
+ // CONTIGUOUS LOOP
923
+ if (i == ndim - 1) { // contract if last dimension
924
+ ndim = i;
925
+ elmsz = stride;
926
+ }
927
+ }
928
+ if (src_iter[i].step != 0) {
929
+ zero_step = 0;
930
+ }
931
+ }
932
+ sz = stride;
933
+ }
934
+ // printf("[j=%d f=%d loop_spec=%d zero_step=%d]\n",j,f,loop_spec,zero_step);
935
+
936
+ if (zero_step) {
937
+ // no buffer needed
938
+ continue;
939
+ }
940
+
941
+ // should check flatten-able loop to avoid buffering
942
+
943
+ // over loop_spec or reduce_loop is not contiguous
944
+ if (f & loop_spec || (lp->reduce_dim > 1 && ndim > 0)) {
945
+ // printf("(buf,nd=%d)",nd);
946
+ buf_iter = ALLOC_N(na_loop_iter_t, nd + 3);
947
+ buf_shape = ALLOC_N(size_t, nd);
948
+ buf_iter[nd].pos = 0;
949
+ buf_iter[nd].step = 0;
950
+ buf_iter[nd].idx = NULL;
951
+ sz = LARG(lp, j).elmsz;
952
+ // last_step = sz;
953
+ for (i = nd; i > 0;) {
954
+ i--;
955
+ buf_iter[i].pos = 0;
956
+ buf_iter[i].step = sz;
957
+ buf_iter[i].idx = NULL;
958
+ // n = lp->user.n[i];
959
+ n = LARG(lp, j).shape[i];
960
+ buf_shape[i] = n;
961
+ sz *= n;
962
+ }
963
+ LBUFCP(lp, j) = ALLOC(na_buffer_copy_t);
964
+ LBUFCP(lp, j)->ndim = ndim;
965
+ LBUFCP(lp, j)->elmsz = elmsz;
966
+ LBUFCP(lp, j)->n = buf_shape;
967
+ LBUFCP(lp, j)->src_iter = src_iter;
968
+ LBUFCP(lp, j)->buf_iter = buf_iter;
969
+ LARG(lp, j).iter = buf_iter;
970
+ // printf("in ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
971
+ LBUFCP(lp, j)->src_ptr = LARG(lp, j).ptr;
972
+ LARG(lp, j).ptr = LBUFCP(lp, j)->buf_ptr = xmalloc(sz);
973
+ // printf("(LBUFCP(lp,%d)->buf_ptr=%lx)\n",j,(size_t)(LBUFCP(lp,j)->buf_ptr));
974
+ }
975
+ }
976
+
977
+ #if 0
978
+ for (j=0; j<lp->narg; j++) {
979
+ ndim = lp->user.ndim;
980
+ src_iter = LARG(lp,j).iter;
981
+ last_step = src_iter[ndim-1].step;
982
+ if (lp->reduce_dim>1) {
983
+ //printf("(reduce_dim=%d,ndim=%d,nd=%d,n=%ld,lst=%ld)\n",lp->reduce_dim,ndim,nd,n_total,last_step);
984
+ buf_iter = ALLOC_N(na_loop_iter_t,2);
985
+ buf_iter[0].pos = LARG(lp,j).iter[0].pos;
986
+ buf_iter[0].step = last_step;
987
+ buf_iter[0].idx = NULL;
988
+ buf_iter[1].pos = 0;
989
+ buf_iter[1].step = 0;
990
+ buf_iter[1].idx = NULL;
991
+ LARG(lp,j).iter = buf_iter;
992
+ //printf("in ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
993
+ lp->xargs[j].free_user_iter = 1;
994
+ }
995
+ }
996
+ #endif
997
+
998
+ // flatten reduce dimensions
999
+ if (lp->reduce_dim > 1) {
1000
+ #if 1
1001
+ for (j = 0; j < lp->narg; j++) {
1002
+ ndim = lp->user.ndim;
1003
+ LARG(lp, j).iter[0].step = LARG(lp, j).iter[ndim - 1].step;
1004
+ LARG(lp, j).iter[0].idx = NULL;
1005
+ }
1006
+ #endif
1007
+ lp->user.n[0] = n_total;
1008
+ lp->user.ndim = 1;
1009
+ }
1010
+ }
1011
+
1012
+ static void ndloop_copy_to_buffer(na_buffer_copy_t* lp) {
1013
+ size_t* c;
1014
+ char *src, *buf;
1015
+ int i;
1016
+ int nd = lp->ndim;
1017
+ size_t elmsz = lp->elmsz;
1018
+ size_t buf_pos = 0;
1019
+ DBG(size_t j);
1020
+
1021
+ // printf("\nto_buf nd=%d elmsz=%ld\n",nd,elmsz);
1022
+ DBG(printf("<to buf> ["));
1023
+ // zero-dimension
1024
+ if (nd == 0) {
1025
+ src = lp->src_ptr + LITER_SRC(lp, 0).pos;
1026
+ buf = lp->buf_ptr;
1027
+ memcpy(buf, src, elmsz);
1028
+ DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(buf))[j]); });
1029
+ goto loop_end;
1030
+ }
1031
+ // initialize loop counter
1032
+ c = ALLOCA_N(size_t, nd + 1);
1033
+ for (i = 0; i <= nd; i++) c[i] = 0;
1034
+ // loop body
1035
+ for (i = 0;;) {
1036
+ // i-th dimension
1037
+ for (; i < nd; i++) {
1038
+ if (LITER_SRC(lp, i).idx) {
1039
+ LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).idx[c[i]];
1040
+ } else {
1041
+ LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).step * c[i];
1042
+ }
1043
+ }
1044
+ src = lp->src_ptr + LITER_SRC(lp, nd).pos;
1045
+ buf = lp->buf_ptr + buf_pos;
1046
+ memcpy(buf, src, elmsz);
1047
+ DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(buf))[j]); });
1048
+ buf_pos += elmsz;
1049
+ // count up
1050
+ for (;;) {
1051
+ if (i <= 0) goto loop_end;
1052
+ i--;
1053
+ if (++c[i] < lp->n[i]) break;
1054
+ c[i] = 0;
1055
+ }
1056
+ }
1057
+ loop_end:;
1058
+ DBG(printf("]\n"));
1059
+ }
1060
+
1061
+ static void ndloop_copy_from_buffer(na_buffer_copy_t* lp) {
1062
+ size_t* c;
1063
+ char *src, *buf;
1064
+ int i;
1065
+ int nd = lp->ndim;
1066
+ size_t elmsz = lp->elmsz;
1067
+ size_t buf_pos = 0;
1068
+ DBG(size_t j);
1069
+
1070
+ // printf("\nfrom_buf nd=%d elmsz=%ld\n",nd,elmsz);
1071
+ DBG(printf("<from buf> ["));
1072
+ // zero-dimension
1073
+ if (nd == 0) {
1074
+ src = lp->src_ptr + LITER_SRC(lp, 0).pos;
1075
+ buf = lp->buf_ptr;
1076
+ memcpy(src, buf, elmsz);
1077
+ DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(src))[j]); });
1078
+ goto loop_end;
1079
+ }
1080
+ // initialize loop counter
1081
+ c = ALLOCA_N(size_t, nd + 1);
1082
+ for (i = 0; i <= nd; i++) c[i] = 0;
1083
+ // loop body
1084
+ for (i = 0;;) {
1085
+ // i-th dimension
1086
+ for (; i < nd; i++) {
1087
+ if (LITER_SRC(lp, i).idx) {
1088
+ LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).idx[c[i]];
1089
+ } else {
1090
+ LITER_SRC(lp, i + 1).pos = LITER_SRC(lp, i).pos + LITER_SRC(lp, i).step * c[i];
1091
+ }
1092
+ }
1093
+ src = lp->src_ptr + LITER_SRC(lp, nd).pos;
1094
+ buf = lp->buf_ptr + buf_pos;
1095
+ memcpy(src, buf, elmsz);
1096
+ DBG(for (j = 0; j < elmsz / 8; j++) { printf("%g,", ((double*)(src))[j]); });
1097
+ buf_pos += elmsz;
1098
+ // count up
1099
+ for (;;) {
1100
+ if (i <= 0) goto loop_end;
1101
+ i--;
1102
+ if (++c[i] < lp->n[i]) break;
1103
+ c[i] = 0;
1104
+ }
1105
+ }
1106
+ loop_end:
1107
+ DBG(printf("]\n"));
1108
+ }
1109
+
1110
+ static void ndfunc_write_back(ndfunc_t* nf, na_md_loop_t* lp, VALUE orig_args, VALUE results) {
1111
+ VALUE src, dst;
1112
+
1113
+ if (lp->writeback >= 0) {
1114
+ dst = RARRAY_AREF(orig_args, lp->writeback);
1115
+ src = RARRAY_AREF(results, 0);
1116
+ na_store(dst, src);
1117
+ RARRAY_ASET(results, 0, dst);
1118
+ }
1119
+ }
1120
+
1121
+ static VALUE ndloop_extract(VALUE results, ndfunc_t* nf) {
1122
+ long n, i;
1123
+ VALUE x, y;
1124
+ narray_t* na;
1125
+
1126
+ // extract result objects
1127
+ switch (nf->nout) {
1128
+ case 0:
1129
+ return Qnil;
1130
+ case 1:
1131
+ x = RARRAY_AREF(results, 0);
1132
+ if (NDF_TEST(nf, NDF_EXTRACT)) {
1133
+ if (IsNArray(x)) {
1134
+ GetNArray(x, na);
1135
+ if (NA_NDIM(na) == 0) {
1136
+ x = rb_funcall(x, id_extract, 0);
1137
+ }
1138
+ }
1139
+ }
1140
+ return x;
1141
+ }
1142
+ if (NDF_TEST(nf, NDF_EXTRACT)) {
1143
+ n = RARRAY_LEN(results);
1144
+ for (i = 0; i < n; i++) {
1145
+ x = RARRAY_AREF(results, i);
1146
+ if (IsNArray(x)) {
1147
+ GetNArray(x, na);
1148
+ if (NA_NDIM(na) == 0) {
1149
+ y = rb_funcall(x, id_extract, 0);
1150
+ RARRAY_ASET(results, i, y);
1151
+ }
1152
+ }
1153
+ }
1154
+ }
1155
+ return results;
1156
+ }
1157
+
1158
+ static void loop_narray(ndfunc_t* nf, na_md_loop_t* lp);
1159
+
1160
+ static VALUE ndloop_run(VALUE vlp) {
1161
+ unsigned int loop_spec;
1162
+ volatile VALUE args, orig_args, results;
1163
+ na_md_loop_t* lp = (na_md_loop_t*)(vlp);
1164
+ ndfunc_t* nf;
1165
+
1166
+ orig_args = lp->vargs;
1167
+ nf = lp->ndfunc;
1168
+
1169
+ args = rb_obj_dup(orig_args);
1170
+
1171
+ // setup ndloop iterator with arguments
1172
+ ndloop_init_args(nf, lp, args);
1173
+ results = ndloop_set_output(nf, lp, args);
1174
+
1175
+ // if (na_debug_flag) {
1176
+ // printf("-- ndloop_set_output --\n");
1177
+ // print_ndloop(lp);
1178
+ // }
1179
+
1180
+ // contract loop
1181
+ if (lp->loop_func == loop_narray) {
1182
+ ndfunc_contract_loop(lp);
1183
+ // if (na_debug_flag) {
1184
+ // printf("-- ndfunc_contract_loop --\n");
1185
+ // print_ndloop(lp);
1186
+ // }
1187
+ }
1188
+
1189
+ // setup objects in which results are stored
1190
+ ndfunc_set_user_loop(nf, lp);
1191
+
1192
+ // setup buffering during loop
1193
+ if (lp->loop_func == loop_narray) {
1194
+ loop_spec = ndloop_func_loop_spec(nf, lp->user.ndim);
1195
+ ndfunc_set_bufcp(lp, loop_spec);
1196
+ }
1197
+ if (na_debug_flag) {
1198
+ printf("-- ndfunc_set_bufcp --\n");
1199
+ print_ndloop(lp);
1200
+ }
1201
+
1202
+ // loop
1203
+ (*(lp->loop_func))(nf, lp);
1204
+
1205
+ // if (na_debug_flag) {
1206
+ // printf("-- after loop --\n");
1207
+ // print_ndloop(lp);
1208
+ // }
1209
+
1210
+ if (RTEST(lp->user.err_type)) {
1211
+ rb_raise(lp->user.err_type, "error in NArray operation");
1212
+ }
1213
+
1214
+ // write-back will be placed here
1215
+ ndfunc_write_back(nf, lp, orig_args, results);
1216
+
1217
+ // extract result objects
1218
+ return ndloop_extract(results, nf);
1219
+ }
1220
+
1221
+ // ---------------------------------------------------------------------------
1222
+
1223
+ static void loop_narray(ndfunc_t* nf, na_md_loop_t* lp) {
1224
+ size_t* c;
1225
+ int i, j;
1226
+ int nd = lp->ndim;
1227
+
1228
+ if (nd < 0) {
1229
+ rb_bug("bug? lp->ndim = %d\n", lp->ndim);
1230
+ }
1231
+
1232
+ if (nd == 0) {
1233
+ for (j = 0; j < lp->nin; j++) {
1234
+ if (lp->xargs[j].bufcp) {
1235
+ // printf("copy_to_buffer j=%d\n",j);
1236
+ ndloop_copy_to_buffer(lp->xargs[j].bufcp);
1237
+ }
1238
+ }
1239
+ (*(nf->func))(&(lp->user));
1240
+ for (j = 0; j < lp->narg; j++) {
1241
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1242
+ // printf("copy_from_buffer j=%d\n",j);
1243
+ // copy data to work buffer
1244
+ ndloop_copy_from_buffer(lp->xargs[j].bufcp);
1245
+ }
1246
+ }
1247
+ return;
1248
+ }
1249
+
1250
+ // initialize loop counter
1251
+ c = ALLOCA_N(size_t, nd + 1);
1252
+ for (i = 0; i <= nd; i++) c[i] = 0;
1253
+
1254
+ // loop body
1255
+ for (i = 0;;) {
1256
+ // i-th dimension
1257
+ for (; i < nd; i++) {
1258
+ // j-th argument
1259
+ for (j = 0; j < lp->narg; j++) {
1260
+ if (LITER(lp, i, j).idx) {
1261
+ LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).idx[c[i]];
1262
+ } else {
1263
+ LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).step * c[i];
1264
+ }
1265
+ // printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
1266
+ }
1267
+ }
1268
+ for (j = 0; j < lp->nin; j++) {
1269
+ if (lp->xargs[j].bufcp) {
1270
+ // copy data to work buffer
1271
+ // cp lp->iter[j][nd..*] to lp->user.args[j].iter[0..*]
1272
+ // printf("copy_to_buffer j=%d\n",j);
1273
+ ndloop_copy_to_buffer(lp->xargs[j].bufcp);
1274
+ }
1275
+ }
1276
+ (*(nf->func))(&(lp->user));
1277
+ for (j = 0; j < lp->narg; j++) {
1278
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1279
+ // copy data to work buffer
1280
+ // printf("copy_from_buffer j=%d\n",j);
1281
+ ndloop_copy_from_buffer(lp->xargs[j].bufcp);
1282
+ }
1283
+ }
1284
+ if (RTEST(lp->user.err_type)) {
1285
+ return;
1286
+ }
1287
+
1288
+ for (;;) {
1289
+ if (i <= 0) goto loop_end;
1290
+ i--;
1291
+ if (++c[i] < lp->n[i]) break;
1292
+ c[i] = 0;
1293
+ }
1294
+ }
1295
+ loop_end:;
1296
+ }
1297
+
1298
+ static VALUE na_ndloop_main(ndfunc_t* nf, VALUE args, void* opt_ptr) {
1299
+ unsigned int copy_flag;
1300
+ na_md_loop_t lp;
1301
+
1302
+ if (na_debug_flag) print_ndfunc(nf);
1303
+
1304
+ // cast arguments to NArray
1305
+ copy_flag = ndloop_cast_args(nf, args);
1306
+
1307
+ // allocate ndloop struct
1308
+ ndloop_alloc(&lp, nf, args, opt_ptr, copy_flag, loop_narray);
1309
+
1310
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1311
+ }
1312
+
1313
+ VALUE
1314
+ #ifdef HAVE_STDARG_PROTOTYPES
1315
+ na_ndloop(ndfunc_t* nf, int argc, ...)
1316
+ #else
1317
+ na_ndloop(nf, argc, va_alist) ndfunc_t* nf;
1318
+ int argc;
1319
+ va_dcl
1320
+ #endif
1321
+ {
1322
+ va_list ar;
1323
+
1324
+ int i;
1325
+ VALUE* argv;
1326
+ volatile VALUE args;
1327
+
1328
+ argv = ALLOCA_N(VALUE, argc);
1329
+
1330
+ va_init_list(ar, argc);
1331
+ for (i = 0; i < argc; i++) {
1332
+ argv[i] = va_arg(ar, VALUE);
1333
+ }
1334
+ va_end(ar);
1335
+
1336
+ args = rb_ary_new4(argc, argv);
1337
+
1338
+ return na_ndloop_main(nf, args, NULL);
1339
+ }
1340
+
1341
+ VALUE
1342
+ na_ndloop2(ndfunc_t* nf, VALUE args) {
1343
+ return na_ndloop_main(nf, args, NULL);
1344
+ }
1345
+
1346
+ VALUE
1347
+ #ifdef HAVE_STDARG_PROTOTYPES
1348
+ na_ndloop3(ndfunc_t* nf, void* ptr, int argc, ...)
1349
+ #else
1350
+ na_ndloop3(nf, ptr, argc, va_alist) ndfunc_t* nf;
1351
+ void* ptr;
1352
+ int argc;
1353
+ va_dcl
1354
+ #endif
1355
+ {
1356
+ va_list ar;
1357
+
1358
+ int i;
1359
+ VALUE* argv;
1360
+ volatile VALUE args;
1361
+
1362
+ argv = ALLOCA_N(VALUE, argc);
1363
+
1364
+ va_init_list(ar, argc);
1365
+ for (i = 0; i < argc; i++) {
1366
+ argv[i] = va_arg(ar, VALUE);
1367
+ }
1368
+ va_end(ar);
1369
+
1370
+ args = rb_ary_new4(argc, argv);
1371
+
1372
+ return na_ndloop_main(nf, args, ptr);
1373
+ }
1374
+
1375
+ VALUE
1376
+ na_ndloop4(ndfunc_t* nf, void* ptr, VALUE args) {
1377
+ return na_ndloop_main(nf, args, ptr);
1378
+ }
1379
+
1380
+ //----------------------------------------------------------------------
1381
+
1382
+ VALUE
1383
+ na_info_str(VALUE ary) {
1384
+ int nd, i;
1385
+ char tmp[32];
1386
+ VALUE buf;
1387
+ narray_t* na;
1388
+
1389
+ GetNArray(ary, na);
1390
+ nd = na->ndim;
1391
+
1392
+ buf = rb_str_new2(rb_class2name(rb_obj_class(ary)));
1393
+ if (NA_TYPE(na) == NARRAY_VIEW_T) {
1394
+ rb_str_cat(buf, "(view)", 6);
1395
+ }
1396
+ rb_str_cat(buf, "#shape=[", 8);
1397
+ if (nd > 0) {
1398
+ for (i = 0;;) {
1399
+ sprintf(tmp, "%" SZF "u", na->shape[i]);
1400
+ rb_str_cat2(buf, tmp);
1401
+ if (++i == nd) break;
1402
+ rb_str_cat(buf, ",", 1);
1403
+ }
1404
+ }
1405
+ rb_str_cat(buf, "]", 1);
1406
+ return buf;
1407
+ }
1408
+
1409
+ //----------------------------------------------------------------------
1410
+
1411
+ #define ncol numo_na_inspect_cols
1412
+ #define nrow numo_na_inspect_rows
1413
+ extern int ncol, nrow;
1414
+
1415
+ static void loop_inspect(ndfunc_t* nf, na_md_loop_t* lp) {
1416
+ int nd, i, ii;
1417
+ size_t* c;
1418
+ int col = 0, row = 0;
1419
+ long len;
1420
+ VALUE str;
1421
+ na_text_func_t func = (na_text_func_t)(nf->func);
1422
+ VALUE buf, opt;
1423
+
1424
+ nd = lp->ndim;
1425
+ buf = lp->loop_opt;
1426
+ // opt = *(VALUE*)(lp->user.opt_ptr);
1427
+ opt = lp->user.option;
1428
+
1429
+ for (i = 0; i < nd; i++) {
1430
+ if (lp->n[i] == 0) {
1431
+ rb_str_cat(buf, "[]", 2);
1432
+ return;
1433
+ }
1434
+ }
1435
+
1436
+ rb_str_cat(buf, "\n", 1);
1437
+
1438
+ c = ALLOCA_N(size_t, nd + 1);
1439
+ for (i = 0; i <= nd; i++) c[i] = 0;
1440
+
1441
+ if (nd > 0) {
1442
+ rb_str_cat(buf, "[", 1);
1443
+ } else {
1444
+ rb_str_cat(buf, "", 0);
1445
+ }
1446
+
1447
+ col = nd * 2;
1448
+ for (i = 0;;) {
1449
+ if (i < nd - 1) {
1450
+ for (ii = 0; ii < i; ii++) rb_str_cat(buf, " ", 1);
1451
+ for (; ii < nd - 1; ii++) rb_str_cat(buf, "[", 1);
1452
+ }
1453
+ for (; i < nd; i++) {
1454
+ if (LITER(lp, i, 0).idx) {
1455
+ LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).idx[c[i]];
1456
+ } else {
1457
+ LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).step * c[i];
1458
+ }
1459
+ }
1460
+ str = (*func)(LARG(lp, 0).ptr, LITER(lp, i, 0).pos, opt);
1461
+
1462
+ len = RSTRING_LEN(str) + 2;
1463
+ if (ncol > 0 && col + len > ncol - 3) {
1464
+ rb_str_cat(buf, "...", 3);
1465
+ c[i - 1] = lp->n[i - 1];
1466
+ } else {
1467
+ rb_str_append(buf, str);
1468
+ col += len;
1469
+ }
1470
+ for (;;) {
1471
+ if (i == 0) goto loop_end;
1472
+ i--;
1473
+ if (++c[i] < lp->n[i]) break;
1474
+ rb_str_cat(buf, "]", 1);
1475
+ c[i] = 0;
1476
+ }
1477
+ // line_break:
1478
+ rb_str_cat(buf, ", ", 2);
1479
+ if (i < nd - 1) {
1480
+ rb_str_cat(buf, "\n ", 2);
1481
+ col = nd * 2;
1482
+ row++;
1483
+ if (row == nrow) {
1484
+ rb_str_cat(buf, "...", 3);
1485
+ goto loop_end;
1486
+ }
1487
+ }
1488
+ }
1489
+ loop_end:;
1490
+ }
1491
+
1492
+ VALUE
1493
+ na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt) {
1494
+ volatile VALUE args;
1495
+ na_md_loop_t lp;
1496
+ VALUE buf;
1497
+ ndfunc_arg_in_t ain[3] = {{Qnil, 0}, {sym_loop_opt}, {sym_option}};
1498
+ ndfunc_t nf = {(na_iter_func_t)func, NO_LOOP, 3, 0, ain, 0};
1499
+ // nf = ndfunc_alloc(NULL, NO_LOOP, 1, 0, Qnil);
1500
+
1501
+ buf = na_info_str(nary);
1502
+
1503
+ if (na_get_pointer(nary) == NULL) {
1504
+ return rb_str_cat(buf, "(empty)", 7);
1505
+ }
1506
+
1507
+ // rb_p(args);
1508
+ // if (na_debug_flag) print_ndfunc(&nf);
1509
+
1510
+ args = rb_ary_new3(3, nary, buf, opt);
1511
+
1512
+ // cast arguments to NArray
1513
+ // ndloop_cast_args(nf, args);
1514
+
1515
+ // allocate ndloop struct
1516
+ ndloop_alloc(&lp, &nf, args, NULL, 0, loop_inspect);
1517
+
1518
+ rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1519
+
1520
+ return buf;
1521
+ }
1522
+
1523
+ //----------------------------------------------------------------------
1524
+
1525
+ static void loop_store_subnarray(ndfunc_t* nf, na_md_loop_t* lp, int i0, size_t* c, VALUE a) {
1526
+ int nd = lp->ndim;
1527
+ int i, j;
1528
+ narray_t* na;
1529
+ int* dim_map;
1530
+ VALUE a_type;
1531
+
1532
+ a_type = rb_obj_class(LARG(lp, 0).value);
1533
+ if (rb_obj_class(a) != a_type) {
1534
+ a = rb_funcall(a_type, id_cast, 1, a);
1535
+ }
1536
+ GetNArray(a, na);
1537
+ if (na->ndim != nd - i0 + 1) {
1538
+ rb_raise(nary_eShapeError,
1539
+ "mismatched dimension of sub-narray: "
1540
+ "nd_src=%d, nd_dst=%d",
1541
+ na->ndim, nd - i0 + 1);
1542
+ }
1543
+ dim_map = ALLOCA_N(int, na->ndim);
1544
+ for (i = 0; i < na->ndim; i++) {
1545
+ dim_map[i] = lp->trans_map[i + i0];
1546
+ // printf("dim_map[i=%d] = %d, i0=%d\n", i, dim_map[i], i0);
1547
+ }
1548
+ ndloop_set_stepidx(lp, 1, a, dim_map, NDL_READ);
1549
+ LARG(lp, 1).shape = &(na->shape[na->ndim - 1]);
1550
+
1551
+ // loop body
1552
+ for (i = i0;;) {
1553
+ LARG(lp, 1).value = Qtrue;
1554
+ for (; i < nd; i++) {
1555
+ for (j = 0; j < 2; j++) {
1556
+ if (LITER(lp, i, j).idx) {
1557
+ LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).idx[c[i]];
1558
+ } else {
1559
+ LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).step * c[i];
1560
+ }
1561
+ }
1562
+ if (c[i] >= na->shape[i - i0]) {
1563
+ LARG(lp, 1).value = Qfalse;
1564
+ }
1565
+ }
1566
+
1567
+ (*(nf->func))(&(lp->user));
1568
+
1569
+ for (;;) {
1570
+ if (i <= i0) goto loop_end;
1571
+ i--;
1572
+ c[i]++;
1573
+ if (c[i] < lp->n[i]) break;
1574
+ c[i] = 0;
1575
+ }
1576
+ }
1577
+ loop_end:
1578
+ LARG(lp, 1).ptr = NULL;
1579
+ }
1580
+
1581
+ static void loop_store_rarray(ndfunc_t* nf, na_md_loop_t* lp) {
1582
+ size_t* c;
1583
+ int i;
1584
+ VALUE* a;
1585
+ int nd = lp->ndim;
1586
+
1587
+ // counter
1588
+ c = ALLOCA_N(size_t, nd + 1);
1589
+ for (i = 0; i <= nd; i++) c[i] = 0;
1590
+
1591
+ // array at each dimension
1592
+ a = ALLOCA_N(VALUE, nd + 1);
1593
+ a[0] = LARG(lp, 1).value;
1594
+
1595
+ // print_ndloop(lp);
1596
+
1597
+ // loop body
1598
+ for (i = 0;;) {
1599
+ for (; i < nd; i++) {
1600
+ if (LITER(lp, i, 0).idx) {
1601
+ LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).idx[c[i]];
1602
+ } else {
1603
+ LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).step * c[i];
1604
+ }
1605
+ if (TYPE(a[i]) == T_ARRAY) {
1606
+ if (c[i] < (size_t)RARRAY_LEN(a[i])) {
1607
+ a[i + 1] = RARRAY_AREF(a[i], c[i]);
1608
+ } else {
1609
+ a[i + 1] = Qnil;
1610
+ }
1611
+ } else if (IsNArray(a[i])) {
1612
+ // printf("a[i=%d]=0x%lx\n",i,a[i]);
1613
+ loop_store_subnarray(nf, lp, i, c, a[i]);
1614
+ goto loop_next;
1615
+ } else {
1616
+ if (c[i] == 0) {
1617
+ a[i + 1] = a[i];
1618
+ } else {
1619
+ a[i + 1] = Qnil;
1620
+ }
1621
+ }
1622
+ // printf("c[%d]=%lu\n",i,c[i]);
1623
+ }
1624
+
1625
+ // printf("a[i=%d]=0x%lx\n",i,a[i]);
1626
+ if (IsNArray(a[i])) {
1627
+ loop_store_subnarray(nf, lp, i, c, a[i]);
1628
+ } else {
1629
+ LARG(lp, 1).value = a[i];
1630
+ (*(nf->func))(&(lp->user));
1631
+ }
1632
+
1633
+ loop_next:
1634
+ for (;;) {
1635
+ if (i <= 0) goto loop_end;
1636
+ i--;
1637
+ c[i]++;
1638
+ if (c[i] < lp->n[i]) break;
1639
+ c[i] = 0;
1640
+ }
1641
+ }
1642
+ loop_end:;
1643
+ }
1644
+
1645
+ VALUE
1646
+ na_ndloop_store_rarray(ndfunc_t* nf, VALUE nary, VALUE rary) {
1647
+ na_md_loop_t lp;
1648
+ VALUE args;
1649
+
1650
+ // rb_p(args);
1651
+ if (na_debug_flag) print_ndfunc(nf);
1652
+
1653
+ args = rb_assoc_new(nary, rary);
1654
+
1655
+ // cast arguments to NArray
1656
+ // ndloop_cast_args(nf, args);
1657
+
1658
+ // allocate ndloop struct
1659
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_store_rarray);
1660
+
1661
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1662
+ }
1663
+
1664
+ VALUE
1665
+ na_ndloop_store_rarray2(ndfunc_t* nf, VALUE nary, VALUE rary, VALUE opt) {
1666
+ na_md_loop_t lp;
1667
+ VALUE args;
1668
+
1669
+ // rb_p(args);
1670
+ if (na_debug_flag) print_ndfunc(nf);
1671
+
1672
+ // args = rb_assoc_new(rary,nary);
1673
+ args = rb_ary_new3(3, nary, rary, opt);
1674
+
1675
+ // cast arguments to NArray
1676
+ // ndloop_cast_args(nf, args);
1677
+
1678
+ // allocate ndloop struct
1679
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_store_rarray);
1680
+
1681
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1682
+ }
1683
+
1684
+ //----------------------------------------------------------------------
1685
+
1686
+ static void loop_narray_to_rarray(ndfunc_t* nf, na_md_loop_t* lp) {
1687
+ size_t* c;
1688
+ int i;
1689
+ // int nargs = nf->narg + nf->nres;
1690
+ int nd = lp->ndim;
1691
+ VALUE* a;
1692
+ volatile VALUE a0;
1693
+
1694
+ // alloc counter
1695
+ c = ALLOCA_N(size_t, nd + 1);
1696
+ for (i = 0; i <= nd; i++) c[i] = 0;
1697
+ // c[i]=1; // for zero-dim
1698
+ // fprintf(stderr,"in loop_narray_to_rarray, nd=%d\n",nd);
1699
+
1700
+ a = ALLOCA_N(VALUE, nd + 1);
1701
+ a[0] = a0 = lp->loop_opt;
1702
+
1703
+ // loop body
1704
+ for (i = 0;;) {
1705
+ for (; i < nd; i++) {
1706
+ if (LITER(lp, i, 0).idx) {
1707
+ LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).idx[c[i]];
1708
+ } else {
1709
+ LITER(lp, i + 1, 0).pos = LITER(lp, i, 0).pos + LITER(lp, i, 0).step * c[i];
1710
+ }
1711
+ if (c[i] == 0) {
1712
+ a[i + 1] = rb_ary_new2(lp->n[i]);
1713
+ rb_ary_push(a[i], a[i + 1]);
1714
+ }
1715
+ }
1716
+
1717
+ // lp->user.info = a[i];
1718
+ LARG(lp, 1).value = a[i];
1719
+ (*(nf->func))(&(lp->user));
1720
+
1721
+ for (;;) {
1722
+ if (i <= 0) goto loop_end;
1723
+ i--;
1724
+ if (++c[i] < lp->n[i]) break;
1725
+ c[i] = 0;
1726
+ }
1727
+ }
1728
+ loop_end:;
1729
+ }
1730
+
1731
+ VALUE
1732
+ na_ndloop_cast_narray_to_rarray(ndfunc_t* nf, VALUE nary, VALUE fmt) {
1733
+ na_md_loop_t lp;
1734
+ VALUE args, a0;
1735
+
1736
+ // rb_p(args);
1737
+ if (na_debug_flag) print_ndfunc(nf);
1738
+
1739
+ a0 = rb_ary_new();
1740
+ args = rb_ary_new3(3, nary, a0, fmt);
1741
+
1742
+ // cast arguments to NArray
1743
+ // ndloop_cast_args(nf, args);
1744
+
1745
+ // allocate ndloop struct
1746
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_narray_to_rarray);
1747
+
1748
+ rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1749
+ return RARRAY_AREF(a0, 0);
1750
+ }
1751
+
1752
+ //----------------------------------------------------------------------
1753
+
1754
+ static void loop_narray_with_index(ndfunc_t* nf, na_md_loop_t* lp) {
1755
+ size_t* c;
1756
+ int i, j;
1757
+ int nd = lp->ndim;
1758
+
1759
+ if (nd < 0) {
1760
+ rb_bug("bug? lp->ndim = %d\n", lp->ndim);
1761
+ }
1762
+ if (lp->n[0] == 0) { // empty array
1763
+ return;
1764
+ }
1765
+
1766
+ // pass total ndim to iterator
1767
+ lp->user.ndim += nd;
1768
+
1769
+ // alloc counter
1770
+ lp->user.opt_ptr = c = ALLOCA_N(size_t, nd + 1);
1771
+ for (i = 0; i <= nd; i++) c[i] = 0;
1772
+
1773
+ // loop body
1774
+ for (i = 0;;) {
1775
+ for (; i < nd; i++) {
1776
+ // j-th argument
1777
+ for (j = 0; j < lp->narg; j++) {
1778
+ if (LITER(lp, i, j).idx) {
1779
+ LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).idx[c[i]];
1780
+ } else {
1781
+ LITER(lp, i + 1, j).pos = LITER(lp, i, j).pos + LITER(lp, i, j).step * c[i];
1782
+ }
1783
+ // printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
1784
+ }
1785
+ }
1786
+
1787
+ (*(nf->func))(&(lp->user));
1788
+
1789
+ for (;;) {
1790
+ if (i <= 0) goto loop_end;
1791
+ i--;
1792
+ if (++c[i] < lp->n[i]) break;
1793
+ c[i] = 0;
1794
+ }
1795
+ }
1796
+ loop_end:;
1797
+ }
1798
+
1799
+ VALUE
1800
+ #ifdef HAVE_STDARG_PROTOTYPES
1801
+ na_ndloop_with_index(ndfunc_t* nf, int argc, ...)
1802
+ #else
1803
+ na_ndloop_with_index(nf, argc, va_alist) ndfunc_t* nf;
1804
+ int argc;
1805
+ va_dcl
1806
+ #endif
1807
+ {
1808
+ va_list ar;
1809
+
1810
+ int i;
1811
+ VALUE* argv;
1812
+ volatile VALUE args;
1813
+ na_md_loop_t lp;
1814
+
1815
+ argv = ALLOCA_N(VALUE, argc);
1816
+
1817
+ va_init_list(ar, argc);
1818
+ for (i = 0; i < argc; i++) {
1819
+ argv[i] = va_arg(ar, VALUE);
1820
+ }
1821
+ va_end(ar);
1822
+
1823
+ args = rb_ary_new4(argc, argv);
1824
+
1825
+ // return na_ndloop_main(nf, args, NULL);
1826
+ if (na_debug_flag) print_ndfunc(nf);
1827
+
1828
+ // cast arguments to NArray
1829
+ // copy_flag = ndloop_cast_args(nf, args);
1830
+
1831
+ // allocate ndloop struct
1832
+ ndloop_alloc(&lp, nf, args, 0, 0, loop_narray_with_index);
1833
+
1834
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1835
+ }
1836
+
1837
+ void Init_nary_ndloop(void) {
1838
+ id_cast = rb_intern("cast");
1839
+ id_extract = rb_intern("extract");
1840
+ }