numo-narray 0.9.0.1-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (124) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/README.md +47 -0
  4. data/Rakefile +41 -0
  5. data/ext/numo/narray/SFMT-params.h +97 -0
  6. data/ext/numo/narray/SFMT-params19937.h +46 -0
  7. data/ext/numo/narray/SFMT.c +620 -0
  8. data/ext/numo/narray/SFMT.h +157 -0
  9. data/ext/numo/narray/array.c +525 -0
  10. data/ext/numo/narray/data.c +901 -0
  11. data/ext/numo/narray/depend.erb +33 -0
  12. data/ext/numo/narray/extconf.rb +117 -0
  13. data/ext/numo/narray/gen/bit.erb.c +811 -0
  14. data/ext/numo/narray/gen/cogen.rb +18 -0
  15. data/ext/numo/narray/gen/def/dcomplex.rb +32 -0
  16. data/ext/numo/narray/gen/def/dfloat.rb +30 -0
  17. data/ext/numo/narray/gen/def/int16.rb +29 -0
  18. data/ext/numo/narray/gen/def/int32.rb +29 -0
  19. data/ext/numo/narray/gen/def/int64.rb +29 -0
  20. data/ext/numo/narray/gen/def/int8.rb +29 -0
  21. data/ext/numo/narray/gen/def/robject.rb +30 -0
  22. data/ext/numo/narray/gen/def/scomplex.rb +32 -0
  23. data/ext/numo/narray/gen/def/sfloat.rb +30 -0
  24. data/ext/numo/narray/gen/def/uint16.rb +29 -0
  25. data/ext/numo/narray/gen/def/uint32.rb +29 -0
  26. data/ext/numo/narray/gen/def/uint64.rb +29 -0
  27. data/ext/numo/narray/gen/def/uint8.rb +29 -0
  28. data/ext/numo/narray/gen/dtype.erb.c +328 -0
  29. data/ext/numo/narray/gen/tmpl/accum.c +36 -0
  30. data/ext/numo/narray/gen/tmpl/accum_binary.c +75 -0
  31. data/ext/numo/narray/gen/tmpl/accum_index.c +58 -0
  32. data/ext/numo/narray/gen/tmpl/allocate.c +35 -0
  33. data/ext/numo/narray/gen/tmpl/aref.c +51 -0
  34. data/ext/numo/narray/gen/tmpl/aset.c +61 -0
  35. data/ext/numo/narray/gen/tmpl/binary.c +53 -0
  36. data/ext/numo/narray/gen/tmpl/binary2.c +55 -0
  37. data/ext/numo/narray/gen/tmpl/binary_s.c +34 -0
  38. data/ext/numo/narray/gen/tmpl/bit_binary.c +94 -0
  39. data/ext/numo/narray/gen/tmpl/bit_count.c +82 -0
  40. data/ext/numo/narray/gen/tmpl/bit_unary.c +77 -0
  41. data/ext/numo/narray/gen/tmpl/cast.c +37 -0
  42. data/ext/numo/narray/gen/tmpl/cast_array.c +79 -0
  43. data/ext/numo/narray/gen/tmpl/cast_numeric.c +22 -0
  44. data/ext/numo/narray/gen/tmpl/coerce_cast.c +8 -0
  45. data/ext/numo/narray/gen/tmpl/cond_binary.c +51 -0
  46. data/ext/numo/narray/gen/tmpl/cond_unary.c +45 -0
  47. data/ext/numo/narray/gen/tmpl/cum.c +42 -0
  48. data/ext/numo/narray/gen/tmpl/each.c +43 -0
  49. data/ext/numo/narray/gen/tmpl/each_with_index.c +64 -0
  50. data/ext/numo/narray/gen/tmpl/extract.c +23 -0
  51. data/ext/numo/narray/gen/tmpl/eye.c +91 -0
  52. data/ext/numo/narray/gen/tmpl/fill.c +38 -0
  53. data/ext/numo/narray/gen/tmpl/format.c +60 -0
  54. data/ext/numo/narray/gen/tmpl/format_to_a.c +47 -0
  55. data/ext/numo/narray/gen/tmpl/head.c +25 -0
  56. data/ext/numo/narray/gen/tmpl/inspect.c +16 -0
  57. data/ext/numo/narray/gen/tmpl/map_with_index.c +94 -0
  58. data/ext/numo/narray/gen/tmpl/median.c +44 -0
  59. data/ext/numo/narray/gen/tmpl/minmax.c +47 -0
  60. data/ext/numo/narray/gen/tmpl/poly.c +49 -0
  61. data/ext/numo/narray/gen/tmpl/pow.c +74 -0
  62. data/ext/numo/narray/gen/tmpl/powint.c +17 -0
  63. data/ext/numo/narray/gen/tmpl/qsort.c +149 -0
  64. data/ext/numo/narray/gen/tmpl/rand.c +33 -0
  65. data/ext/numo/narray/gen/tmpl/rand_norm.c +46 -0
  66. data/ext/numo/narray/gen/tmpl/robj_allocate.c +32 -0
  67. data/ext/numo/narray/gen/tmpl/seq.c +61 -0
  68. data/ext/numo/narray/gen/tmpl/set2.c +56 -0
  69. data/ext/numo/narray/gen/tmpl/sort.c +36 -0
  70. data/ext/numo/narray/gen/tmpl/sort_index.c +86 -0
  71. data/ext/numo/narray/gen/tmpl/store.c +31 -0
  72. data/ext/numo/narray/gen/tmpl/store_array.c +5 -0
  73. data/ext/numo/narray/gen/tmpl/store_from.c +53 -0
  74. data/ext/numo/narray/gen/tmpl/store_numeric.c +22 -0
  75. data/ext/numo/narray/gen/tmpl/to_a.c +41 -0
  76. data/ext/numo/narray/gen/tmpl/unary.c +58 -0
  77. data/ext/numo/narray/gen/tmpl/unary2.c +58 -0
  78. data/ext/numo/narray/gen/tmpl/unary_s.c +57 -0
  79. data/ext/numo/narray/index.c +822 -0
  80. data/ext/numo/narray/kwarg.c +79 -0
  81. data/ext/numo/narray/math.c +140 -0
  82. data/ext/numo/narray/narray.c +1539 -0
  83. data/ext/numo/narray/ndloop.c +1928 -0
  84. data/ext/numo/narray/numo/compat.h +23 -0
  85. data/ext/numo/narray/numo/intern.h +112 -0
  86. data/ext/numo/narray/numo/narray.h +411 -0
  87. data/ext/numo/narray/numo/ndloop.h +99 -0
  88. data/ext/numo/narray/numo/template.h +140 -0
  89. data/ext/numo/narray/numo/types/bit.h +19 -0
  90. data/ext/numo/narray/numo/types/complex.h +410 -0
  91. data/ext/numo/narray/numo/types/complex_macro.h +205 -0
  92. data/ext/numo/narray/numo/types/dcomplex.h +11 -0
  93. data/ext/numo/narray/numo/types/dfloat.h +12 -0
  94. data/ext/numo/narray/numo/types/float_def.h +34 -0
  95. data/ext/numo/narray/numo/types/float_macro.h +277 -0
  96. data/ext/numo/narray/numo/types/int16.h +12 -0
  97. data/ext/numo/narray/numo/types/int32.h +12 -0
  98. data/ext/numo/narray/numo/types/int64.h +12 -0
  99. data/ext/numo/narray/numo/types/int8.h +12 -0
  100. data/ext/numo/narray/numo/types/int_macro.h +34 -0
  101. data/ext/numo/narray/numo/types/robj_macro.h +218 -0
  102. data/ext/numo/narray/numo/types/robject.h +21 -0
  103. data/ext/numo/narray/numo/types/scomplex.h +11 -0
  104. data/ext/numo/narray/numo/types/sfloat.h +13 -0
  105. data/ext/numo/narray/numo/types/uint16.h +12 -0
  106. data/ext/numo/narray/numo/types/uint32.h +12 -0
  107. data/ext/numo/narray/numo/types/uint64.h +12 -0
  108. data/ext/numo/narray/numo/types/uint8.h +12 -0
  109. data/ext/numo/narray/numo/types/uint_macro.h +31 -0
  110. data/ext/numo/narray/numo/types/xint_macro.h +133 -0
  111. data/ext/numo/narray/rand.c +87 -0
  112. data/ext/numo/narray/step.c +506 -0
  113. data/ext/numo/narray/struct.c +872 -0
  114. data/lib/2.1/numo/narray.so +0 -0
  115. data/lib/2.2/numo/narray.so +0 -0
  116. data/lib/2.3/numo/narray.so +0 -0
  117. data/lib/erbpp/line_number.rb +126 -0
  118. data/lib/erbpp/narray_def.rb +338 -0
  119. data/lib/erbpp.rb +286 -0
  120. data/lib/numo/narray.rb +6 -0
  121. data/numo-narray.gemspec +35 -0
  122. data/spec/bit_spec.rb +93 -0
  123. data/spec/narray_spec.rb +249 -0
  124. metadata +238 -0
@@ -0,0 +1,1928 @@
1
+ /*
2
+ ndloop.c
3
+ Numerical Array Extension for Ruby
4
+ (C) Copyright 1999-2016 by Masahiro TANAKA
5
+
6
+ This program is free software.
7
+ You can distribute/modify this program
8
+ under the same terms as Ruby itself.
9
+ NO WARRANTY.
10
+ */
11
+
12
+ #include <ruby.h>
13
+ #include "numo/narray.h"
14
+
15
+ #if 0
16
+ #define DBG(x) x
17
+ #else
18
+ #define DBG(x)
19
+ #endif
20
+
21
+ #ifdef HAVE_STDARG_PROTOTYPES
22
+ #include <stdarg.h>
23
+ #define va_init_list(a,b) va_start(a,b)
24
+ #else
25
+ #include <varargs.h>
26
+ #define va_init_list(a,b) va_start(a)
27
+ #endif
28
+
29
+ typedef struct NA_BUFFER_COPY {
30
+ int ndim;
31
+ size_t elmsz;
32
+ size_t *n;
33
+ char *src_ptr;
34
+ char *buf_ptr;
35
+ na_loop_iter_t *src_iter;
36
+ na_loop_iter_t *buf_iter;
37
+ } na_buffer_copy_t;
38
+
39
+ typedef struct NA_LOOP_XARGS {
40
+ na_loop_iter_t *iter; // moved from na_loop_t
41
+ na_buffer_copy_t *bufcp; // copy data to buffer
42
+ int flag; // NDL_READ NDL_WRITE
43
+ bool free_user_iter; // alloc LARG(lp,j).iter=lp->xargs[j].iter
44
+ } na_loop_xargs_t;
45
+
46
+ typedef struct NA_MD_LOOP {
47
+ int narg;
48
+ int nin;
49
+ int ndim; // n of total dimention
50
+ unsigned int copy_flag; // set i-th bit if i-th arg is cast
51
+ size_t *n_ptr; // memory for n
52
+ na_loop_iter_t *iter_ptr; // memory for iter
53
+ size_t *n; // n of elements for each dim
54
+ na_loop_t user; // loop in user function
55
+ na_loop_xargs_t *xargs; // extra data for each arg
56
+ int writeback; // write back result to i-th arg
57
+ int init_aidx; // index of initializer argument
58
+ int reduce_dim;
59
+ int *trans_map;
60
+ VALUE vargs;
61
+ VALUE reduce;
62
+ VALUE loop_opt;
63
+ ndfunc_t *ndfunc;
64
+ void (*loop_func)();
65
+ } na_md_loop_t;
66
+
67
+ #define LARG(lp,iarg) ((lp)->user.args[iarg])
68
+ #define LITER(lp,idim,iarg) ((lp)->xargs[iarg].iter[idim])
69
+ #define LITER_SRC(lp,idim) ((lp)->src_iter[idim])
70
+ #define LBUFCP(lp,j) ((lp)->xargs[j].bufcp)
71
+
72
+ #define CASTABLE(t) (RTEST(t) && (t)!=OVERWRITE)
73
+
74
+ #define NDL_READ 1
75
+ #define NDL_WRITE 2
76
+ #define NDL_READ_WRITE 3
77
+
78
+ static inline VALUE
79
+ nary_type_s_cast(VALUE type, VALUE obj)
80
+ {
81
+ return rb_funcall(type,rb_intern("cast"),1,obj);
82
+ }
83
+
84
+ static void
85
+ print_ndfunc(ndfunc_t *nf) {
86
+ volatile VALUE t;
87
+ int i, k;
88
+ printf("ndfunc_t = 0x%"SZF"x {\n",(size_t)nf);
89
+ printf(" func = 0x%"SZF"x\n", (size_t)nf->func);
90
+ printf(" flag = 0x%"SZF"x\n", (size_t)nf->flag);
91
+ printf(" nin = %d\n", nf->nin);
92
+ printf(" nout = %d\n", nf->nout);
93
+ printf(" ain = 0x%"SZF"x\n", (size_t)nf->ain);
94
+ for (i=0; i<nf->nin; i++) {
95
+ t = rb_inspect(nf->ain[i].type);
96
+ printf(" ain[%d].type = %s\n", i, StringValuePtr(t));
97
+ printf(" ain[%d].dim = %d\n", i, nf->ain[i].dim);
98
+ }
99
+ printf(" aout = 0x%"SZF"x\n", (size_t)nf->aout);
100
+ for (i=0; i<nf->nout; i++) {
101
+ t = rb_inspect(nf->aout[i].type);
102
+ printf(" aout[%d].type = %s\n", i, StringValuePtr(t));
103
+ printf(" aout[%d].dim = %d\n", i, nf->aout[i].dim);
104
+ for (k=0; k<nf->aout[i].dim; k++) {
105
+ printf(" aout[%d].shape[%d] = %"SZF"u\n", i, k, nf->aout[i].shape[k]);
106
+ }
107
+ }
108
+ printf("}\n");
109
+ }
110
+
111
+
112
+ static void
113
+ print_ndloop(na_md_loop_t *lp) {
114
+ int i,j,nd;
115
+ printf("na_md_loop_t = 0x%"SZF"x {\n",(size_t)lp);
116
+ printf(" narg = %d\n", lp->narg);
117
+ printf(" nin = %d\n", lp->nin);
118
+ printf(" ndim = %d\n", lp->ndim);
119
+ printf(" copy_flag = %x\n", lp->copy_flag);
120
+ printf(" writeback = %d\n", lp->writeback);
121
+ printf(" init_aidx = %d\n", lp->init_aidx);
122
+ printf(" reduce_dim = %d\n", lp->reduce_dim);
123
+ printf(" trans_map = 0x%"SZF"x\n", (size_t)lp->trans_map);
124
+ nd = lp->ndim + lp->user.ndim;
125
+ for (i=0; i<nd; i++) {
126
+ printf(" trans_map[%d] = %d\n", i, lp->trans_map[i]);
127
+ }
128
+ printf(" n = 0x%"SZF"x\n", (size_t)lp->n);
129
+ nd = lp->ndim + lp->user.ndim;
130
+ for (i=0; i<=lp->ndim; i++) {
131
+ printf(" n[%d] = %"SZF"u\n", i, lp->n[i]);
132
+ }
133
+ printf(" user.n = 0x%"SZF"x\n", (size_t)lp->user.n);
134
+ if (lp->user.n) {
135
+ for (i=0; i<=lp->user.ndim; i++) {
136
+ printf(" user.n[%d] = %"SZF"u\n", i, lp->user.n[i]);
137
+ }
138
+ }
139
+ printf(" xargs = 0x%"SZF"x\n", (size_t)lp->xargs);
140
+ printf(" iter_ptr = 0x%"SZF"x\n", (size_t)lp->iter_ptr);
141
+ printf(" user.narg = %d\n", lp->user.narg);
142
+ printf(" user.ndim = %d\n", lp->user.ndim);
143
+ printf(" user.args = 0x%"SZF"x\n", (size_t)lp->user.args);
144
+ for (j=0; j<lp->narg; j++) {
145
+ }
146
+ printf(" user.opt_ptr = 0x%"SZF"x\n", (size_t)lp->user.opt_ptr);
147
+ if (lp->reduce==Qnil) {
148
+ printf(" reduce = nil\n");
149
+ } else {
150
+ printf(" reduce = 0x%x\n", NUM2INT(lp->reduce));
151
+ }
152
+ for (j=0; j<lp->narg; j++) {
153
+ printf("--user.args[%d]--\n", j);
154
+ printf(" user.args[%d].ptr = 0x%"SZF"x\n", j, (size_t)LARG(lp,j).ptr);
155
+ printf(" user.args[%d].elmsz = %"SZF"d\n", j, LARG(lp,j).elmsz);
156
+ printf(" user.args[%d].value = 0x%"SZF"x\n", j, LARG(lp,j).value);
157
+ printf(" user.args[%d].ndim = %d\n", j, LARG(lp,j).ndim);
158
+ printf(" user.args[%d].shape = 0x%"SZF"x\n", j, (size_t)LARG(lp,j).shape);
159
+ if (LARG(lp,j).shape) {
160
+ for (i=0; i<LARG(lp,j).ndim; i++) {
161
+ printf(" user.args[%d].shape[%d] = %"SZF"d\n", j, i, LARG(lp,j).shape[i]);
162
+ }
163
+ }
164
+ printf(" user.args[%d].iter = 0x%"SZF"x\n", j,(size_t)lp->user.args[j].iter);
165
+ if (lp->user.args[j].iter) {
166
+ for (i=0; i<lp->user.ndim; i++) {
167
+ printf(" &user.args[%d].iter[%d] = 0x%"SZF"x\n", j,i, (size_t)&lp->user.args[j].iter[i]);
168
+ printf(" user.args[%d].iter[%d].pos = %"SZF"u\n", j,i, lp->user.args[j].iter[i].pos);
169
+ printf(" user.args[%d].iter[%d].step = %"SZF"u\n", j,i, lp->user.args[j].iter[i].step);
170
+ printf(" user.args[%d].iter[%d].idx = 0x%"SZF"x\n", j,i, (size_t)lp->user.args[j].iter[i].idx);
171
+ }
172
+ }
173
+ //
174
+ printf(" xargs[%d].flag = %d\n", j, lp->xargs[j].flag);
175
+ printf(" xargs[%d].free_user_iter = %d\n", j, lp->xargs[j].free_user_iter);
176
+ for (i=0; i<=nd; i++) {
177
+ printf(" &xargs[%d].iter[%d] = 0x%"SZF"x\n", j,i, (size_t)&LITER(lp,i,j));
178
+ printf(" xargs[%d].iter[%d].pos = %"SZF"u\n", j,i, LITER(lp,i,j).pos);
179
+ printf(" xargs[%d].iter[%d].step = %"SZF"u\n", j,i, LITER(lp,i,j).step);
180
+ printf(" xargs[%d].iter[%d].idx = 0x%"SZF"x\n", j,i, (size_t)LITER(lp,i,j).idx);
181
+ }
182
+ printf(" xargs[%d].bufcp = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp);
183
+ if (lp->xargs[j].bufcp) {
184
+ printf(" xargs[%d].bufcp->ndim = %d\n", j, lp->xargs[j].bufcp->ndim);
185
+ printf(" xargs[%d].bufcp->elmsz = %"SZF"d\n", j, lp->xargs[j].bufcp->elmsz);
186
+ printf(" xargs[%d].bufcp->n = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->n);
187
+ printf(" xargs[%d].bufcp->src_ptr = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->src_ptr);
188
+ printf(" xargs[%d].bufcp->buf_ptr = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->buf_ptr);
189
+ printf(" xargs[%d].bufcp->src_iter = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->src_iter);
190
+ printf(" xargs[%d].bufcp->buf_iter = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->buf_iter);
191
+ }
192
+ }
193
+ printf("}\n");
194
+ }
195
+
196
+
197
+ static unsigned int
198
+ ndloop_func_loop_spec(ndfunc_t *nf, int user_ndim)
199
+ {
200
+ unsigned int f=0;
201
+ // If user function supports LOOP
202
+ if (user_ndim > 0 || NDF_TEST(nf,NDF_HAS_LOOP)) {
203
+ if (!NDF_TEST(nf,NDF_STRIDE_LOOP)) {
204
+ f |= 1;
205
+ }
206
+ if (!NDF_TEST(nf,NDF_INDEX_LOOP)) {
207
+ f |= 2;
208
+ }
209
+ }
210
+ return f;
211
+ }
212
+
213
+
214
+
215
+
216
+ static int
217
+ ndloop_cast_required(VALUE type, VALUE value)
218
+ {
219
+ return CASTABLE(type) && type != CLASS_OF(value);
220
+ }
221
+
222
+ static int
223
+ ndloop_castable_type(VALUE type)
224
+ {
225
+ return rb_obj_is_kind_of(type, rb_cClass) && RTEST(rb_class_inherited_p(type, cNArray));
226
+ }
227
+
228
+ static void
229
+ ndloop_cast_error(VALUE type, VALUE value)
230
+ {
231
+ VALUE x = rb_inspect(type);
232
+ char* s = StringValueCStr(x);
233
+ rb_bug("fail cast from %s to %s", rb_obj_classname(value),s);
234
+ rb_raise(rb_eTypeError,"fail cast from %s to %s",
235
+ rb_obj_classname(value), s);
236
+ }
237
+
238
+ // convert input argeuments given by RARRAY_PTR(args)[j]
239
+ // to type specified by nf->args[j].type
240
+ // returns copy_flag where nth-bit is set if nth argument is converted.
241
+ static unsigned int
242
+ ndloop_cast_args(ndfunc_t *nf, VALUE args)
243
+ {
244
+ int j;
245
+ unsigned int copy_flag=0;
246
+ VALUE type, value;
247
+
248
+ for (j=0; j<nf->nin; j++) {
249
+
250
+ type = nf->ain[j].type;
251
+ if (TYPE(type)==T_SYMBOL)
252
+ continue;
253
+ value = RARRAY_AREF(args,j);
254
+ if (!ndloop_cast_required(type, value))
255
+ continue;
256
+
257
+ if (ndloop_castable_type(type)) {
258
+ RARRAY_ASET(args,j,nary_type_s_cast(type, value));
259
+ copy_flag |= 1<<j;
260
+ } else {
261
+ ndloop_cast_error(type, value);
262
+ }
263
+ }
264
+
265
+ RB_GC_GUARD(type); RB_GC_GUARD(value);
266
+ return copy_flag;
267
+ }
268
+
269
+
270
+ static void
271
+ ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, na_md_loop_t *lp)
272
+ {
273
+ if (type==sym_reduce) {
274
+ lp->reduce = value;
275
+ }
276
+ else if (type==sym_option) {
277
+ lp->user.option = value;
278
+ }
279
+ else if (type==sym_loop_opt) {
280
+ lp->loop_opt = value;
281
+ }
282
+ else if (type==sym_init) {
283
+ lp->init_aidx = at;
284
+ }
285
+ else {
286
+ rb_bug("ndloop parse_options: unknown type");
287
+ }
288
+ }
289
+
290
+ static inline int
291
+ max2(int x, int y)
292
+ {
293
+ return x > y ? x : y;
294
+ }
295
+
296
+ static void
297
+ ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
298
+ {
299
+ int j;
300
+ int nin=0; // number of input objects (except for symbols)
301
+ int user_nd=0; // max dimension of user function
302
+ int loop_nd=0; // max dimension of md-loop
303
+
304
+ for (j=0; j<RARRAY_LEN(args); j++) {
305
+ VALUE t = nf->ain[j].type;
306
+ VALUE v = RARRAY_AREF(args,j);
307
+ if (TYPE(t)==T_SYMBOL) {
308
+ ndloop_handle_symbol_in_ain(t, v, j, lp);
309
+ } else {
310
+ nin++;
311
+ user_nd = max2(user_nd, nf->ain[j].dim);
312
+ if (IsNArray(v))
313
+ loop_nd = max2(loop_nd, RNARRAY_NDIM(v) - nf->ain[j].dim);
314
+ }
315
+ }
316
+
317
+ lp->narg = lp->user.narg = nin + nf->nout;
318
+ lp->nin = nin;
319
+ lp->ndim = loop_nd;
320
+ lp->user.ndim = user_nd;
321
+ }
322
+
323
+ /*
324
+ user-dimension:
325
+ user_nd = MAX( nf->args[j].dim )
326
+
327
+ user-support dimension:
328
+
329
+ loop dimension:
330
+ loop_nd
331
+ */
332
+
333
+ static void
334
+ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
335
+ void *opt_ptr, unsigned int copy_flag,
336
+ void (*loop_func)(ndfunc_t*, na_md_loop_t*))
337
+ {
338
+ int i,j;
339
+ int narg;
340
+ int max_nd;
341
+
342
+ long args_len;
343
+
344
+ na_loop_iter_t *iter;
345
+
346
+ int trans_dim;
347
+ unsigned int f;
348
+
349
+ args_len = RARRAY_LEN(args);
350
+
351
+ if (args_len != nf->nin) {
352
+ rb_bug("wrong number of arguments for ndfunc (%lu for %d)",
353
+ args_len, nf->nin);
354
+ }
355
+
356
+ lp->vargs = args;
357
+ lp->ndfunc = nf;
358
+ lp->loop_func = loop_func;
359
+ lp->copy_flag = copy_flag;
360
+
361
+ lp->reduce = Qnil;
362
+ lp->user.option = Qnil;
363
+ lp->user.opt_ptr = opt_ptr;
364
+ lp->user.err_type = Qfalse;
365
+ lp->loop_opt = Qnil;
366
+ lp->writeback = -1;
367
+ lp->init_aidx = -1;
368
+
369
+ lp->n = NULL;
370
+ lp->n_ptr = NULL;
371
+ lp->xargs = NULL;
372
+ lp->user.args = NULL;
373
+ lp->user.n = NULL;
374
+ lp->iter_ptr = NULL;
375
+ lp->trans_map = NULL;
376
+
377
+ ndloop_find_max_dimension(lp, nf, args);
378
+ narg = lp->nin + nf->nout;
379
+ max_nd = lp->ndim + lp->user.ndim;
380
+
381
+ lp->n = lp->n_ptr = ALLOC_N(size_t, max_nd+1);
382
+ lp->xargs = ALLOC_N(na_loop_xargs_t, narg);
383
+ lp->user.args = ALLOC_N(na_loop_args_t, narg);
384
+ iter = ALLOC_N(na_loop_iter_t, narg*(max_nd+1));
385
+ lp->iter_ptr = iter;
386
+
387
+ for (j=0; j<narg; j++) {
388
+ LARG(lp,j).value = Qnil;
389
+ LARG(lp,j).iter = NULL;
390
+ LARG(lp,j).shape = NULL;
391
+ LARG(lp,j).ndim = 0;
392
+ lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
393
+ lp->xargs[j].bufcp = NULL;
394
+ lp->xargs[j].flag = (j<nf->nin) ? NDL_READ : NDL_WRITE;
395
+ lp->xargs[j].free_user_iter = 0;
396
+ }
397
+
398
+ for (i=0; i<=max_nd; i++) {
399
+ lp->n[i] = 1;
400
+ for (j=0; j<narg; j++) {
401
+ LITER(lp,i,j).pos = 0;
402
+ LITER(lp,i,j).step = 0;
403
+ LITER(lp,i,j).idx = NULL;
404
+ }
405
+ }
406
+
407
+ // transpose reduce-dimensions to last dimensions
408
+ // array loop
409
+ // [*,+,*,+,*] => [*,*,*,+,+]
410
+ // trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
411
+ lp->trans_map = ALLOC_N(int, max_nd+1);
412
+ if (NDF_TEST(nf,NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
413
+ trans_dim = 0;
414
+ for (i=0; i<max_nd; i++) {
415
+ if (na_test_reduce(lp->reduce, i)) {
416
+ lp->trans_map[i] = -1;
417
+ } else {
418
+ lp->trans_map[i] = trans_dim++;
419
+ }
420
+ }
421
+ j = trans_dim;
422
+ for (i=0; i<max_nd; i++) {
423
+ if (lp->trans_map[i] == -1) {
424
+ lp->trans_map[i] = j++;
425
+ }
426
+ }
427
+ lp->reduce_dim = max_nd - trans_dim;
428
+ f = 0;
429
+ for (i=trans_dim; i<max_nd; i++) {
430
+ f |= 1<<i;
431
+ }
432
+ lp->reduce = INT2FIX(f);
433
+ } else {
434
+ for (i=0; i<max_nd; i++) {
435
+ lp->trans_map[i] = i;
436
+ }
437
+ lp->reduce_dim = 0;
438
+ }
439
+ }
440
+
441
+
442
+ static VALUE
443
+ ndloop_release(VALUE vlp)
444
+ {
445
+ int j;
446
+ VALUE v;
447
+ na_md_loop_t *lp = (na_md_loop_t*)(vlp);
448
+
449
+ for (j=0; j < lp->narg; j++) {
450
+ v = LARG(lp,j).value;
451
+ if (IsNArray(v)) {
452
+ na_release_lock(v);
453
+ }
454
+ }
455
+ //xfree(lp);
456
+ for (j=0; j<lp->narg; j++) {
457
+ //printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
458
+ if (lp->xargs[j].bufcp) {
459
+ xfree(lp->xargs[j].bufcp->buf_iter);
460
+ xfree(lp->xargs[j].bufcp->buf_ptr);
461
+ xfree(lp->xargs[j].bufcp->n);
462
+ xfree(lp->xargs[j].bufcp);
463
+ if (lp->xargs[j].free_user_iter) {
464
+ xfree(LARG(lp,j).iter);
465
+ }
466
+ }
467
+ }
468
+ if (lp->trans_map) xfree(lp->trans_map);
469
+ xfree(lp->xargs);
470
+ xfree(lp->iter_ptr);
471
+ xfree(lp->user.args);
472
+ xfree(lp->n_ptr);
473
+ //rb_gc_force_recycle(vlp);
474
+ return Qnil;
475
+ }
476
+
477
+
478
+ /*
479
+ static void
480
+ ndloop_free(na_md_loop_t* lp)
481
+ {
482
+ int j;
483
+ VALUE v;
484
+
485
+ for (j=0; j<lp->narg; j++) {
486
+ v = LARG(lp,j).value;
487
+ if (IsNArray(v)) {
488
+ na_release_lock(v);
489
+ }
490
+ }
491
+ xfree(lp);
492
+ }
493
+ */
494
+
495
+
496
+ /*
497
+ set lp->n[i] (shape of n-d iteration) here
498
+ */
499
+ static void
500
+ ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
501
+ {
502
+ int i, k;
503
+ size_t n;
504
+ int dim_beg;
505
+
506
+ dim_beg = lp->ndim + nf_dim - na->ndim;
507
+
508
+ for (k = na->ndim - nf_dim - 1; k>=0; k--) {
509
+ i = lp->trans_map[k + dim_beg];
510
+ n = na->shape[k];
511
+ // if n==1 then repeat this dimension
512
+ if (n != 1) {
513
+ if (lp->n[i] == 1) {
514
+ lp->n[i] = n;
515
+ } else if (lp->n[i] != n) {
516
+ // inconsistent array shape
517
+ rb_raise(rb_eTypeError,"shape1[%d](=%"SZF"u) != shape2[%d](=%"SZF"u)",
518
+ i, lp->n[i], k, n);
519
+ }
520
+ }
521
+ }
522
+ }
523
+
524
+
525
+ /*
526
+ na->shape[i] == lp->n[ dim_map[i] ]
527
+ */
528
+ static void
529
+ ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
530
+ {
531
+ size_t n, s;
532
+ int i, k;
533
+ stridx_t sdx;
534
+ narray_t *na;
535
+
536
+ LARG(lp,j).value = vna;
537
+ LARG(lp,j).elmsz = na_get_elmsz(vna);
538
+ if (rwflag & NDL_READ) {
539
+ LARG(lp,j).ptr = na_get_pointer_for_read(vna);
540
+ } else
541
+ if (rwflag & NDL_WRITE) {
542
+ LARG(lp,j).ptr = na_get_pointer_for_write(vna);
543
+ } else {
544
+ rb_bug("invalid value for read-write flag");
545
+ }
546
+ GetNArray(vna,na);
547
+
548
+ switch(NA_TYPE(na)) {
549
+ case NARRAY_DATA_T:
550
+ if (NA_DATA_PTR(na)==NULL && NA_SIZE(na)>0) {
551
+ rb_bug("cannot read no-data NArray");
552
+ rb_raise(rb_eRuntimeError,"cannot read no-data NArray");
553
+ }
554
+ // through
555
+ case NARRAY_FILEMAP_T:
556
+ s = LARG(lp,j).elmsz;
557
+ for (k=na->ndim; k--;) {
558
+ n = na->shape[k];
559
+ if (n > 1) {
560
+ i = dim_map[k];
561
+ //printf("n=%d k=%d i=%d\n",n,k,i);
562
+ LITER(lp,i,j).step = s;
563
+ LITER(lp,i,j).idx = NULL;
564
+ }
565
+ s *= n;
566
+ }
567
+ LITER(lp,0,j).pos = 0;
568
+ break;
569
+ case NARRAY_VIEW_T:
570
+ LITER(lp,0,j).pos = NA_VIEW_OFFSET(na);
571
+ for (k=0; k<na->ndim; k++) {
572
+ n = na->shape[k];
573
+ sdx = NA_VIEW_STRIDX(na)[k];
574
+ if (n > 1) {
575
+ i = dim_map[k];
576
+ if (SDX_IS_INDEX(sdx)) {
577
+ LITER(lp,i,j).step = 0;
578
+ LITER(lp,i,j).idx = SDX_GET_INDEX(sdx);
579
+ } else {
580
+ LITER(lp,i,j).step = SDX_GET_STRIDE(sdx);
581
+ LITER(lp,i,j).idx = NULL;
582
+ }
583
+ } else if (n==1) {
584
+ if (SDX_IS_INDEX(sdx)) {
585
+ LITER(lp,0,j).pos += SDX_GET_INDEX(sdx)[0];
586
+ }
587
+ }
588
+ }
589
+ break;
590
+ default:
591
+ rb_bug("invalid narray internal type");
592
+ }
593
+ }
594
+
595
+
596
+
597
+ static void
598
+ ndloop_init_args(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
599
+ {
600
+ int i, j;
601
+ VALUE v;
602
+ narray_t *na;
603
+ int nf_dim;
604
+ int dim_beg;
605
+ int *dim_map;
606
+ int max_nd = lp->ndim + lp->user.ndim;
607
+ int flag;
608
+ size_t s;
609
+
610
+ /*
611
+ na->shape[i] == lp->n[ dim_map[i] ]
612
+ */
613
+ dim_map = ALLOCA_N(int, max_nd);
614
+
615
+ // input arguments
616
+ for (j=0; j<nf->nin; j++) {
617
+ if (TYPE(nf->ain[j].type)==T_SYMBOL) {
618
+ continue;
619
+ }
620
+ v = RARRAY_AREF(args,j);
621
+ if (IsNArray(v)) {
622
+ // set LARG(lp,j) with v
623
+ GetNArray(v,na);
624
+ nf_dim = nf->ain[j].dim;
625
+ if (nf_dim > na->ndim) {
626
+ rb_raise(nary_eDimensionError,"requires >= %d-dimensioal array "
627
+ "while %d-dimensional array is given",nf_dim,na->ndim);
628
+ }
629
+ ndloop_check_shape(lp, nf_dim, na);
630
+ dim_beg = lp->ndim + nf->ain[j].dim - na->ndim;
631
+ for (i=0; i<na->ndim; i++) {
632
+ dim_map[i] = lp->trans_map[i+dim_beg];
633
+ //printf("dim_map[%d]=%d na->shape[%d]=%d\n",i,dim_map[i],i,na->shape[i]);
634
+ }
635
+ if (nf->ain[j].type==OVERWRITE) {
636
+ lp->xargs[j].flag = flag = NDL_WRITE;
637
+ } else {
638
+ lp->xargs[j].flag = flag = NDL_READ;
639
+ }
640
+ ndloop_set_stepidx(lp, j, v, dim_map, flag);
641
+ LARG(lp,j).ndim = nf_dim;
642
+ if (nf_dim > 0) {
643
+ LARG(lp,j).shape = na->shape + (na->ndim - nf_dim);
644
+ }
645
+ } else if (TYPE(v)==T_ARRAY) {
646
+ LARG(lp,j).value = v;
647
+ LARG(lp,j).elmsz = sizeof(VALUE);
648
+ LARG(lp,j).ptr = NULL;
649
+ for (i=0; i<=max_nd; i++) {
650
+ LITER(lp,i,j).step = 1;
651
+ }
652
+ }
653
+ }
654
+ // check whether # of element is zero
655
+ for (s=1,i=0; i<=max_nd; i++) {
656
+ s *= lp->n[i];
657
+ }
658
+ if (s==0) {
659
+ for (i=0; i<=max_nd; i++) {
660
+ lp->n[i] = 0;
661
+ }
662
+ }
663
+ }
664
+
665
+
666
+ static int
667
+ ndloop_check_inplace(VALUE type, int na_ndim, size_t *na_shape, VALUE v)
668
+ {
669
+ int i;
670
+ narray_t *na;
671
+
672
+ // type check
673
+ if (type != CLASS_OF(v)) {
674
+ return 0;
675
+ }
676
+ GetNArray(v,na);
677
+ // shape check
678
+ if (na->ndim != na_ndim) {
679
+ return 0;
680
+ }
681
+ for (i=0; i<na_ndim; i++) {
682
+ if (na_shape[i] != na->shape[i]) {
683
+ return 0;
684
+ }
685
+ }
686
+ // v is selected as output
687
+ return 1;
688
+ }
689
+
690
+ static VALUE
691
+ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
692
+ int na_ndim, size_t *na_shape, VALUE args)
693
+ {
694
+ int j;
695
+ VALUE v;
696
+
697
+ // find inplace
698
+ for (j=0; j<nf->nin; j++) {
699
+ v = RARRAY_AREF(args,j);
700
+ if (IsNArray(v)) {
701
+ if (TEST_INPLACE(v)) {
702
+ if (ndloop_check_inplace(type,na_ndim,na_shape,v)) {
703
+ // if already copied, create outary and write-back
704
+ if (lp->copy_flag & (1<<j)) {
705
+ lp->writeback = j;
706
+ }
707
+ return v;
708
+ }
709
+ }
710
+ }
711
+ }
712
+ // find casted or copied input array
713
+ for (j=0; j<nf->nin; j++) {
714
+ if (lp->copy_flag & (1<<j)) {
715
+ v = RARRAY_AREF(args,j);
716
+ if (ndloop_check_inplace(type,na_ndim,na_shape,v)) {
717
+ return v;
718
+ }
719
+ }
720
+ }
721
+ return Qnil;
722
+ }
723
+
724
+
725
+
726
+ static VALUE
727
+ ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
728
+ {
729
+ int i;
730
+
731
+ // if type is FIXNUM, get the type of i-th argument
732
+ if (FIXNUM_P(t)) {
733
+ i = FIX2INT(t);
734
+ if (i<0 || i>=nf->nin) {
735
+ rb_bug("invalid type: index (%d) out of # of args",i);
736
+ }
737
+ t = nf->ain[i].type;
738
+ // if i-th type is Qnil, get the type of i-th input value
739
+ if (!CASTABLE(t)) {
740
+ t = CLASS_OF(RARRAY_AREF(args,i));
741
+ }
742
+ }
743
+ return t;
744
+ }
745
+
746
+
747
+ static VALUE
748
+ ndloop_set_output_narray(ndfunc_t *nf, na_md_loop_t *lp, int k,
749
+ VALUE type, VALUE args)
750
+ {
751
+ int i, j;
752
+ int na_ndim;
753
+ int lp_dim;
754
+ volatile VALUE v=Qnil;
755
+ size_t *na_shape;
756
+ int *dim_map;
757
+ int flag = NDL_READ_WRITE;
758
+ int nd;
759
+ int max_nd = lp->ndim + nf->aout[k].dim;
760
+
761
+ na_shape = ALLOCA_N(size_t, max_nd);
762
+ dim_map = ALLOCA_N(int, max_nd);
763
+
764
+ //printf("max_nd=%d lp->ndim=%d\n",max_nd,lp->ndim);
765
+
766
+ // md-loop shape
767
+ na_ndim = 0;
768
+ for (i=0; i<lp->ndim; i++) {
769
+ // na_shape[i] == lp->n[lp->trans_map[i]]
770
+ lp_dim = lp->trans_map[i];
771
+ //printf("i=%d lp_dim=%d\n",i,lp_dim);
772
+ if (NDF_TEST(nf,NDF_CUM)) { // cumulate with shape kept
773
+ na_shape[na_ndim] = lp->n[lp_dim];
774
+ } else
775
+ if (na_test_reduce(lp->reduce,lp_dim)) { // accumulate dimension
776
+ if (NDF_TEST(nf,NDF_KEEP_DIM)) {
777
+ na_shape[na_ndim] = 1; // leave it
778
+ } else {
779
+ continue; // delete dimension
780
+ }
781
+ } else {
782
+ na_shape[na_ndim] = lp->n[lp_dim];
783
+ }
784
+ //printf("i=%d lp_dim=%d na_shape[%d]=%ld\n",i,lp_dim,i,na_shape[i]);
785
+ dim_map[na_ndim++] = lp_dim;
786
+ //dim_map[lp_dim] = na_ndim++;
787
+ }
788
+
789
+ // user-specified shape
790
+ for (i=0; i<nf->aout[k].dim; i++) {
791
+ na_shape[na_ndim] = nf->aout[k].shape[i];
792
+ dim_map[na_ndim++] = i + lp->ndim;
793
+ }
794
+
795
+ // find inplace from input arrays
796
+ if (k==0 && NDF_TEST(nf,NDF_INPLACE)) {
797
+ v = ndloop_find_inplace(nf,lp,type,na_ndim,na_shape,args);
798
+ }
799
+ if (!RTEST(v)) {
800
+ // new object
801
+ v = rb_narray_new(type, na_ndim, na_shape);
802
+ flag = NDL_WRITE;
803
+ }
804
+
805
+ j = lp->nin + k;
806
+ ndloop_set_stepidx(lp, j, v, dim_map, flag);
807
+ LARG(lp,j).ndim = nd = nf->aout[k].dim;
808
+ if (nd > 0) {
809
+ LARG(lp,j).shape = nf->aout[k].shape;
810
+ }
811
+
812
+ return v;
813
+ }
814
+
815
+ static VALUE
816
+ ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
817
+ {
818
+ int i, j, k, idx;
819
+ volatile VALUE v, t, results;
820
+ VALUE init;
821
+
822
+ int max_nd = lp->ndim + lp->user.ndim;
823
+
824
+ // output results
825
+ results = rb_ary_new2(nf->nout);
826
+
827
+ for (k=0; k<nf->nout; k++) {
828
+ t = nf->aout[k].type;
829
+ t = ndloop_get_arg_type(nf,args,t);
830
+
831
+ if (rb_obj_is_kind_of(t, rb_cClass)) {
832
+ if (RTEST(rb_class_inherited_p(t, cNArray))) {
833
+ // NArray
834
+ v = ndloop_set_output_narray(nf,lp,k,t,args);
835
+ rb_ary_push(results, v);
836
+ }
837
+ else if (RTEST(rb_class_inherited_p(t, rb_cArray))) {
838
+ // Ruby Array
839
+ j = lp->nin + k;
840
+ for (i=0; i<=max_nd; i++) {
841
+ LITER(lp,i,j).step = sizeof(VALUE);
842
+ }
843
+ LARG(lp,j).value = t;
844
+ LARG(lp,j).elmsz = sizeof(VALUE);
845
+ } else {
846
+ rb_raise(rb_eRuntimeError,"ndloop_set_output: invalid for type");
847
+ }
848
+ }
849
+ }
850
+
851
+ // initialilzer
852
+ k = lp->init_aidx;
853
+ if (k > -1) {
854
+ idx = nf->ain[k].dim;
855
+ v = RARRAY_AREF(results,idx);
856
+ init = RARRAY_AREF(args,k);
857
+ na_store(v,init);
858
+ }
859
+
860
+ return results;
861
+ }
862
+
863
+
864
+ static void
865
+ ndfunc_contract_loop(na_md_loop_t *lp)
866
+ {
867
+ int i,j,k,success,cnt=0;
868
+ int red0, redi;
869
+
870
+ redi = na_test_reduce(lp->reduce,0);
871
+
872
+ //for (i=0; i<lp->ndim; i++) {
873
+ // printf("lp->n[%d]=%lu\n",i,lp->n[i]);
874
+ //}
875
+
876
+ for (i=1; i<lp->ndim; i++) {
877
+ red0 = redi;
878
+ redi = na_test_reduce(lp->reduce,i);
879
+ //printf("contract i=%d reduce_cond=%d %d\n",i,red0,redi);
880
+ if (red0 != redi) {
881
+ continue;
882
+ }
883
+ success = 1;
884
+ for (j=0; j<lp->narg; j++) {
885
+ if (!(LITER(lp,i,j).idx == NULL &&
886
+ LITER(lp,i-1,j).idx == NULL &&
887
+ LITER(lp,i-1,j).step == LITER(lp,i,j).step*(ssize_t)(lp->n[i]))) {
888
+ success = 0;
889
+ break;
890
+ }
891
+ }
892
+ if (success) {
893
+ //printf("contract i=%d-th and %d-th\n",i-1,i);
894
+ // contract (i-1)-th and i-th dimension
895
+ lp->n[i] *= lp->n[i-1];
896
+ // shift dimensions
897
+ for (k=i-1; k>cnt; k--) {
898
+ lp->n[k] = lp->n[k-1];
899
+ }
900
+ //printf("k=%d\n",k);
901
+ for (; k>=0; k--) {
902
+ lp->n[k] = 1;
903
+ }
904
+ for (j=0; j<lp->narg; j++) {
905
+ for (k=i-1; k>cnt; k--) {
906
+ LITER(lp,k,j) = LITER(lp,k-1,j);
907
+ }
908
+ }
909
+ if (redi) {
910
+ lp->reduce_dim--;
911
+ }
912
+ cnt++;
913
+ }
914
+ }
915
+ //printf("contract cnt=%d\n",cnt);
916
+ if (cnt>0) {
917
+ for (j=0; j<lp->narg; j++) {
918
+ lp->xargs[j].iter = &LITER(lp,cnt,j);
919
+ }
920
+ lp->n = &(lp->n[cnt]);
921
+ lp->ndim -= cnt;
922
+ //for (i=0; i<lp->ndim; i++) {printf("lp->n[%d]=%lu\n",i,lp->n[i]);}
923
+ }
924
+ }
925
+
926
+
927
+ static void
928
+ ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
929
+ {
930
+ int j, ud=0;
931
+
932
+ if (lp->reduce_dim > 0) {
933
+ ud = lp->reduce_dim;
934
+ }
935
+ else if (lp->ndim > 0 && NDF_TEST(nf,NDF_HAS_LOOP)) {
936
+ ud = 1;
937
+ }
938
+ else {
939
+ goto skip_ud;
940
+ }
941
+ if (ud > lp->ndim) {
942
+ rb_bug("Reduce-dimension is larger than loop-dimension");
943
+ }
944
+ // increase user dimension
945
+ lp->user.ndim += ud;
946
+ lp->ndim -= ud;
947
+ for (j=0; j<lp->narg; j++) {
948
+ if (LARG(lp,j).shape) {
949
+ rb_bug("HAS_LOOP or reduce-dimension=%d conflicts with user-dimension",lp->reduce_dim);
950
+ }
951
+ LARG(lp,j).ndim += ud;
952
+ LARG(lp,j).shape = &(lp->n[lp->ndim]);
953
+ //printf("LARG(lp,j).ndim=%d,LARG(lp,j).shape=%lx\n",LARG(lp,j).ndim,(size_t)LARG(lp,j).shape);
954
+ }
955
+ //printf("lp->reduce_dim=%d lp->user.ndim=%d lp->ndim=%d\n",lp->reduce_dim,lp->user.ndim,lp->ndim);
956
+
957
+ skip_ud:
958
+ lp->user.n = &(lp->n[lp->ndim]);
959
+ for (j=0; j<lp->narg; j++) {
960
+ LARG(lp,j).iter = &LITER(lp,lp->ndim,j);
961
+ //printf("in ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
962
+ }
963
+ }
964
+
965
+
966
+ static void
967
+ ndfunc_set_bufcp(na_md_loop_t *lp, unsigned int loop_spec)
968
+ {
969
+ unsigned int f;
970
+ int i, j;
971
+ int nd, ndim;
972
+ bool zero_step;
973
+ ssize_t n, sz, elmsz, stride, n_total; //, last_step;
974
+ size_t *buf_shape;
975
+ na_loop_iter_t *buf_iter=NULL, *src_iter;
976
+
977
+ //if (loop_spec==0) return;
978
+
979
+ n_total = lp->user.n[0];
980
+ for (i=1; i<lp->user.ndim; i++) {
981
+ n_total *= lp->user.n[i];
982
+ }
983
+
984
+ //for (j=0; j<lp->nin; j++) {
985
+ for (j=0; j<lp->narg; j++) {
986
+ //ndim = nd = lp->user.ndim;
987
+ ndim = nd = LARG(lp,j).ndim;
988
+ sz = elmsz = LARG(lp,j).elmsz;
989
+ src_iter = LARG(lp,j).iter;
990
+ //last_step = src_iter[ndim-1].step;
991
+ f = 0;
992
+ zero_step = 1;
993
+ for (i=ndim; i>0; ) {
994
+ i--;
995
+ if (LARG(lp,j).shape) {
996
+ n = LARG(lp,j).shape[i];
997
+ } else {
998
+ printf("shape is NULL\n");
999
+ n = lp->user.n[i];
1000
+ }
1001
+ stride = sz * n;
1002
+ //printf("{j=%d,i=%d,ndim=%d,nd=%d,idx=%lx,step=%ld,n=%ld,sz=%ld,stride=%ld}\n",j,i,ndim,nd,(size_t)src_iter[i].idx,src_iter[i].step,n,sz,stride);
1003
+ if (src_iter[i].idx) {
1004
+ f |= 2; // INDEX LOOP
1005
+ zero_step = 0;
1006
+ } else {
1007
+ if (src_iter[i].step != sz) {
1008
+ f |= 1; // NON_CONTIGUOUS LOOP
1009
+ } else {
1010
+ // CONTIGUOUS LOOP
1011
+ if (i==ndim-1) { // contract if last dimension
1012
+ ndim = i;
1013
+ elmsz = stride;
1014
+ }
1015
+ }
1016
+ if (src_iter[i].step != 0) {
1017
+ zero_step = 0;
1018
+ }
1019
+ }
1020
+ sz = stride;
1021
+ }
1022
+ //printf("[j=%d f=%d loop_spec=%d zero_step=%d]\n",j,f,loop_spec,zero_step);
1023
+
1024
+ if (zero_step) {
1025
+ // no buffer needed
1026
+ continue;
1027
+ }
1028
+
1029
+ // should check flatten-able loop to avoid buffering
1030
+
1031
+
1032
+ // over loop_spec or reduce_loop is not contiguous
1033
+ if (f & loop_spec || (lp->reduce_dim > 1 && ndim > 0)) {
1034
+ //printf("(buf,nd=%d)",nd);
1035
+ buf_iter = ALLOC_N(na_loop_iter_t,nd+3);
1036
+ buf_shape = ALLOC_N(size_t,nd);
1037
+ buf_iter[nd].pos = 0;
1038
+ buf_iter[nd].step = 0;
1039
+ buf_iter[nd].idx = NULL;
1040
+ sz = LARG(lp,j).elmsz;
1041
+ //last_step = sz;
1042
+ for (i=nd; i>0; ) {
1043
+ i--;
1044
+ buf_iter[i].pos = 0;
1045
+ buf_iter[i].step = sz;
1046
+ buf_iter[i].idx = NULL;
1047
+ //n = lp->user.n[i];
1048
+ n = LARG(lp,j).shape[i];
1049
+ buf_shape[i] = n;
1050
+ sz *= n;
1051
+ }
1052
+ LBUFCP(lp,j) = ALLOC(na_buffer_copy_t);
1053
+ LBUFCP(lp,j)->ndim = ndim;
1054
+ LBUFCP(lp,j)->elmsz = elmsz;
1055
+ LBUFCP(lp,j)->n = buf_shape;
1056
+ LBUFCP(lp,j)->src_iter = src_iter;
1057
+ LBUFCP(lp,j)->buf_iter = buf_iter;
1058
+ LARG(lp,j).iter = buf_iter;
1059
+ //printf("in ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1060
+ LBUFCP(lp,j)->src_ptr = LARG(lp,j).ptr;
1061
+ LARG(lp,j).ptr = LBUFCP(lp,j)->buf_ptr = xmalloc(sz);
1062
+ //printf("(LBUFCP(lp,%d)->buf_ptr=%lx)\n",j,(size_t)(LBUFCP(lp,j)->buf_ptr));
1063
+ }
1064
+ }
1065
+
1066
+ #if 0
1067
+ for (j=0; j<lp->narg; j++) {
1068
+ ndim = lp->user.ndim;
1069
+ src_iter = LARG(lp,j).iter;
1070
+ last_step = src_iter[ndim-1].step;
1071
+ if (lp->reduce_dim>1) {
1072
+ //printf("(reduce_dim=%d,ndim=%d,nd=%d,n=%ld,lst=%ld)\n",lp->reduce_dim,ndim,nd,n_total,last_step);
1073
+ buf_iter = ALLOC_N(na_loop_iter_t,2);
1074
+ buf_iter[0].pos = LARG(lp,j).iter[0].pos;
1075
+ buf_iter[0].step = last_step;
1076
+ buf_iter[0].idx = NULL;
1077
+ buf_iter[1].pos = 0;
1078
+ buf_iter[1].step = 0;
1079
+ buf_iter[1].idx = NULL;
1080
+ LARG(lp,j).iter = buf_iter;
1081
+ //printf("in ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1082
+ lp->xargs[j].free_user_iter = 1;
1083
+ }
1084
+ }
1085
+ #endif
1086
+
1087
+ // flatten reduce dimensions
1088
+ if (lp->reduce_dim > 1) {
1089
+ #if 1
1090
+ for (j=0; j<lp->narg; j++) {
1091
+ ndim = lp->user.ndim;
1092
+ LARG(lp,j).iter[0].step = LARG(lp,j).iter[ndim-1].step;
1093
+ LARG(lp,j).iter[0].idx = NULL;
1094
+ }
1095
+ #endif
1096
+ lp->user.n[0] = n_total;
1097
+ lp->user.ndim = 1;
1098
+ }
1099
+ }
1100
+
1101
+
1102
+ static void
1103
+ ndloop_copy_to_buffer(na_buffer_copy_t *lp)
1104
+ {
1105
+ size_t *c;
1106
+ char *src, *buf;
1107
+ int i;
1108
+ int nd = lp->ndim;
1109
+ size_t elmsz = lp->elmsz;
1110
+ size_t buf_pos = 0;
1111
+ DBG(size_t j);
1112
+
1113
+ //printf("\nto_buf nd=%d elmsz=%ld\n",nd,elmsz);
1114
+ DBG(printf("<to buf> ["));
1115
+ // zero-dimension
1116
+ if (nd==0) {
1117
+ src = lp->src_ptr + LITER_SRC(lp,0).pos;
1118
+ buf = lp->buf_ptr;
1119
+ memcpy(buf,src,elmsz);
1120
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(buf))[j]);});
1121
+ goto loop_end;
1122
+ }
1123
+ // initialize loop counter
1124
+ c = ALLOCA_N(size_t, nd+1);
1125
+ for (i=0; i<=nd; i++) c[i]=0;
1126
+ // loop body
1127
+ for (i=0;;) {
1128
+ // i-th dimension
1129
+ for (; i<nd; i++) {
1130
+ if (LITER_SRC(lp,i).idx) {
1131
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).idx[c[i]];
1132
+ } else {
1133
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).step*c[i];
1134
+ }
1135
+ }
1136
+ src = lp->src_ptr + LITER_SRC(lp,nd).pos;
1137
+ buf = lp->buf_ptr + buf_pos;
1138
+ memcpy(buf,src,elmsz);
1139
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(buf))[j]);});
1140
+ buf_pos += elmsz;
1141
+ // count up
1142
+ for (;;) {
1143
+ if (i<=0) goto loop_end;
1144
+ i--;
1145
+ if (++c[i] < lp->n[i]) break;
1146
+ c[i] = 0;
1147
+ }
1148
+ }
1149
+ loop_end:
1150
+ ;
1151
+ DBG(printf("]\n"));
1152
+ }
1153
+
1154
+ static void
1155
+ ndloop_copy_from_buffer(na_buffer_copy_t *lp)
1156
+ {
1157
+ size_t *c;
1158
+ char *src, *buf;
1159
+ int i;
1160
+ int nd = lp->ndim;
1161
+ size_t elmsz = lp->elmsz;
1162
+ size_t buf_pos = 0;
1163
+ DBG(size_t j);
1164
+
1165
+ //printf("\nfrom_buf nd=%d elmsz=%ld\n",nd,elmsz);
1166
+ DBG(printf("<from buf> ["));
1167
+ // zero-dimension
1168
+ if (nd==0) {
1169
+ src = lp->src_ptr + LITER_SRC(lp,0).pos;
1170
+ buf = lp->buf_ptr;
1171
+ memcpy(src,buf,elmsz);
1172
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(src))[j]);});
1173
+ goto loop_end;
1174
+ }
1175
+ // initialize loop counter
1176
+ c = ALLOCA_N(size_t, nd+1);
1177
+ for (i=0; i<=nd; i++) c[i]=0;
1178
+ // loop body
1179
+ for (i=0;;) {
1180
+ // i-th dimension
1181
+ for (; i<nd; i++) {
1182
+ if (LITER_SRC(lp,i).idx) {
1183
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).idx[c[i]];
1184
+ } else {
1185
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).step*c[i];
1186
+ }
1187
+ }
1188
+ src = lp->src_ptr + LITER_SRC(lp,nd).pos;
1189
+ buf = lp->buf_ptr + buf_pos;
1190
+ memcpy(src,buf,elmsz);
1191
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(src))[j]);});
1192
+ buf_pos += elmsz;
1193
+ // count up
1194
+ for (;;) {
1195
+ if (i<=0) goto loop_end;
1196
+ i--;
1197
+ if (++c[i] < lp->n[i]) break;
1198
+ c[i] = 0;
1199
+ }
1200
+ }
1201
+ loop_end:
1202
+ DBG(printf("]\n"));
1203
+ }
1204
+
1205
+
1206
+ static void
1207
+ ndfunc_write_back(ndfunc_t *nf, na_md_loop_t *lp, VALUE orig_args, VALUE results)
1208
+ {
1209
+ VALUE src, dst;
1210
+
1211
+ if (lp->writeback >= 0) {
1212
+ dst = RARRAY_AREF(orig_args,lp->writeback);
1213
+ src = RARRAY_AREF(results,0);
1214
+ na_store(dst,src);
1215
+ RARRAY_ASET(results,0,dst);
1216
+ }
1217
+ }
1218
+
1219
+
1220
+ static VALUE
1221
+ ndloop_extract(VALUE results, ndfunc_t *nf)
1222
+ {
1223
+ static ID id_extract = 0;
1224
+ long n, i;
1225
+ VALUE x, y;
1226
+ narray_t *na;
1227
+
1228
+ if (id_extract==0) {
1229
+ id_extract = rb_intern("extract");
1230
+ }
1231
+
1232
+ // extract result objects
1233
+ switch(nf->nout) {
1234
+ case 0:
1235
+ return Qnil;
1236
+ case 1:
1237
+ x = RARRAY_AREF(results,0);
1238
+ if (NDF_TEST(nf,NDF_EXTRACT)) {
1239
+ if (IsNArray(x)){
1240
+ GetNArray(x,na);
1241
+ if (NA_NDIM(na)==0) {
1242
+ x = rb_funcall(x, id_extract, 0);
1243
+ }
1244
+ }
1245
+ }
1246
+ return x;
1247
+ }
1248
+ if (NDF_TEST(nf,NDF_EXTRACT)) {
1249
+ n = RARRAY_LEN(results);
1250
+ for (i=0; i<n; i++) {
1251
+ x = RARRAY_AREF(results,i);
1252
+ if (IsNArray(x)){
1253
+ GetNArray(x,na);
1254
+ if (NA_NDIM(na)==0) {
1255
+ y = rb_funcall(x, id_extract, 0);
1256
+ RARRAY_ASET(results,i,y);
1257
+ }
1258
+ }
1259
+ }
1260
+ }
1261
+ return results;
1262
+ }
1263
+
1264
+
1265
+ static void
1266
+ loop_narray(ndfunc_t *nf, na_md_loop_t *lp);
1267
+
1268
+ static VALUE
1269
+ ndloop_run(VALUE vlp)
1270
+ {
1271
+ unsigned int loop_spec;
1272
+ volatile VALUE args, orig_args, results;
1273
+ na_md_loop_t *lp = (na_md_loop_t*)(vlp);
1274
+ ndfunc_t *nf;
1275
+
1276
+ orig_args = lp->vargs;
1277
+ nf = lp->ndfunc;
1278
+
1279
+ args = rb_obj_dup(orig_args);
1280
+
1281
+ // setup ndloop iterator with arguments
1282
+ ndloop_init_args(nf, lp, args);
1283
+ results = ndloop_set_output(nf, lp, args);
1284
+
1285
+ //if (na_debug_flag) {
1286
+ // printf("-- ndloop_set_output --\n");
1287
+ // print_ndloop(lp);
1288
+ //}
1289
+
1290
+ // contract loop
1291
+ if (lp->loop_func == loop_narray) {
1292
+ ndfunc_contract_loop(lp);
1293
+ //if (na_debug_flag) {
1294
+ // printf("-- ndfunc_contract_loop --\n");
1295
+ // print_ndloop(lp);
1296
+ //}
1297
+ }
1298
+
1299
+ // setup objects in which resuts are stored
1300
+ ndfunc_set_user_loop(nf, lp);
1301
+
1302
+ // setup buffering during loop
1303
+ if (lp->loop_func == loop_narray) {
1304
+ loop_spec = ndloop_func_loop_spec(nf, lp->user.ndim);
1305
+ ndfunc_set_bufcp(lp, loop_spec);
1306
+ if (na_debug_flag) {
1307
+ printf("-- ndfunc_set_bufcp --\n");
1308
+ print_ndloop(lp);
1309
+ }
1310
+ }
1311
+
1312
+ // loop
1313
+ (*(lp->loop_func))(nf, lp);
1314
+
1315
+ if (na_debug_flag) {
1316
+ printf("-- after loop --\n");
1317
+ print_ndloop(lp);
1318
+ }
1319
+
1320
+ if (RTEST(lp->user.err_type)) {
1321
+ rb_raise(lp->user.err_type, "error in NArray operation");
1322
+ }
1323
+
1324
+ // write-back will be placed here
1325
+ ndfunc_write_back(nf, lp, orig_args, results);
1326
+
1327
+ // extract result objects
1328
+ return ndloop_extract(results, nf);
1329
+ }
1330
+
1331
+
1332
+ // ---------------------------------------------------------------------------
1333
+
1334
+ static void
1335
+ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1336
+ {
1337
+ size_t *c;
1338
+ int i, j;
1339
+ int nd = lp->ndim;
1340
+
1341
+ if (nd<0) {
1342
+ rb_bug("bug? lp->ndim = %d\n", lp->ndim);
1343
+ }
1344
+
1345
+ if (nd==0) {
1346
+ for (j=0; j<lp->nin; j++) {
1347
+ if (lp->xargs[j].bufcp) {
1348
+ //printf("copy_to_buffer j=%d\n",j);
1349
+ ndloop_copy_to_buffer(lp->xargs[j].bufcp);
1350
+ }
1351
+ }
1352
+ (*(nf->func))(&(lp->user));
1353
+ for (j=0; j<lp->narg; j++) {
1354
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1355
+ //printf("copy_from_buffer j=%d\n",j);
1356
+ // copy data to work buffer
1357
+ ndloop_copy_from_buffer(lp->xargs[j].bufcp);
1358
+ }
1359
+ }
1360
+ return;
1361
+ }
1362
+
1363
+ // initialize loop counter
1364
+ c = ALLOCA_N(size_t, nd+1);
1365
+ for (i=0; i<=nd; i++) c[i]=0;
1366
+
1367
+ // loop body
1368
+ for (i=0;;) {
1369
+ // i-th dimension
1370
+ for (; i<nd; i++) {
1371
+ // j-th argument
1372
+ for (j=0; j<lp->narg; j++) {
1373
+ if (LITER(lp,i,j).idx) {
1374
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).idx[c[i]];
1375
+ } else {
1376
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).step*c[i];
1377
+ }
1378
+ //printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
1379
+ }
1380
+ }
1381
+ for (j=0; j<lp->nin; j++) {
1382
+ if (lp->xargs[j].bufcp) {
1383
+ // copy data to work buffer
1384
+ // cp lp->iter[j][nd..*] to lp->user.args[j].iter[0..*]
1385
+ //printf("copy_to_buffer j=%d\n",j);
1386
+ ndloop_copy_to_buffer(lp->xargs[j].bufcp);
1387
+ }
1388
+ }
1389
+ (*(nf->func))(&(lp->user));
1390
+ for (j=0; j<lp->narg; j++) {
1391
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1392
+ // copy data to work buffer
1393
+ //printf("copy_from_buffer j=%d\n",j);
1394
+ ndloop_copy_from_buffer(lp->xargs[j].bufcp);
1395
+ }
1396
+ }
1397
+ if (RTEST(lp->user.err_type)) {return;}
1398
+
1399
+ for (;;) {
1400
+ if (i<=0) goto loop_end;
1401
+ i--;
1402
+ if (++c[i] < lp->n[i]) break;
1403
+ c[i] = 0;
1404
+ }
1405
+ }
1406
+ loop_end:
1407
+ ;
1408
+ }
1409
+
1410
+
1411
+ VALUE
1412
+ na_ndloop_main(ndfunc_t *nf, VALUE args, void *opt_ptr)
1413
+ {
1414
+ unsigned int copy_flag;
1415
+ na_md_loop_t lp;
1416
+
1417
+ if (na_debug_flag) print_ndfunc(nf);
1418
+
1419
+ // cast arguments to NArray
1420
+ copy_flag = ndloop_cast_args(nf, args);
1421
+
1422
+ // allocate ndloop struct
1423
+ ndloop_alloc(&lp, nf, args, opt_ptr, copy_flag, loop_narray);
1424
+
1425
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1426
+ }
1427
+
1428
+
1429
+ VALUE
1430
+ #ifdef HAVE_STDARG_PROTOTYPES
1431
+ na_ndloop(ndfunc_t *nf, int argc, ...)
1432
+ #else
1433
+ na_ndloop(nf, argc, va_alist)
1434
+ ndfunc_t *nf;
1435
+ int argc;
1436
+ va_dcl
1437
+ #endif
1438
+ {
1439
+ va_list ar;
1440
+
1441
+ int i;
1442
+ VALUE *argv;
1443
+ volatile VALUE args;
1444
+
1445
+ argv = ALLOCA_N(VALUE,argc);
1446
+
1447
+ va_init_list(ar, argc);
1448
+ for (i=0; i<argc; i++) {
1449
+ argv[i] = va_arg(ar, VALUE);
1450
+ }
1451
+ va_end(ar);
1452
+
1453
+ args = rb_ary_new4(argc, argv);
1454
+
1455
+ return na_ndloop_main(nf, args, NULL);
1456
+ }
1457
+
1458
+
1459
+ VALUE
1460
+ na_ndloop2(ndfunc_t *nf, VALUE args)
1461
+ {
1462
+ return na_ndloop_main(nf, args, NULL);
1463
+ }
1464
+
1465
+ VALUE
1466
+ #ifdef HAVE_STDARG_PROTOTYPES
1467
+ na_ndloop3(ndfunc_t *nf, void *ptr, int argc, ...)
1468
+ #else
1469
+ na_ndloop3(nf, ptr, argc, va_alist)
1470
+ ndfunc_t *nf;
1471
+ void *ptr;
1472
+ int argc;
1473
+ va_dcl
1474
+ #endif
1475
+ {
1476
+ va_list ar;
1477
+
1478
+ int i;
1479
+ VALUE *argv;
1480
+ volatile VALUE args;
1481
+
1482
+ argv = ALLOCA_N(VALUE,argc);
1483
+
1484
+ va_init_list(ar, argc);
1485
+ for (i=0; i<argc; i++) {
1486
+ argv[i] = va_arg(ar, VALUE);
1487
+ }
1488
+ va_end(ar);
1489
+
1490
+ args = rb_ary_new4(argc, argv);
1491
+
1492
+ return na_ndloop_main(nf, args, ptr);
1493
+ }
1494
+
1495
+ VALUE
1496
+ na_ndloop4(ndfunc_t *nf, void *ptr, VALUE args)
1497
+ {
1498
+ return na_ndloop_main(nf, args, ptr);
1499
+ }
1500
+
1501
+ //----------------------------------------------------------------------
1502
+
1503
+ VALUE
1504
+ na_info_str(VALUE ary)
1505
+ {
1506
+ int nd, i;
1507
+ char tmp[32];
1508
+ VALUE buf;
1509
+ narray_t *na;
1510
+
1511
+ GetNArray(ary,na);
1512
+ nd = na->ndim;
1513
+
1514
+ buf = rb_str_new2(rb_class2name(CLASS_OF(ary)));
1515
+ if (NA_TYPE(na) == NARRAY_VIEW_T) {
1516
+ rb_str_cat(buf,"(view)",6);
1517
+ }
1518
+ rb_str_cat(buf,"#shape=[",8);
1519
+ if (nd>0) {
1520
+ for (i=0;;) {
1521
+ sprintf(tmp,"%"SZF"u",na->shape[i]);
1522
+ rb_str_cat2(buf,tmp);
1523
+ if (++i==nd) break;
1524
+ rb_str_cat(buf,",",1);
1525
+ }
1526
+ }
1527
+ rb_str_cat(buf,"]",1);
1528
+ return buf;
1529
+ }
1530
+
1531
+
1532
+ //----------------------------------------------------------------------
1533
+
1534
+ static void
1535
+ ndloop_inspect_get_width(int *ncol, int *nrow)
1536
+ {
1537
+ VALUE cols, rows;
1538
+
1539
+ cols = rb_ivar_get(cNArray,rb_intern("inspect_cols"));
1540
+ if (RTEST(cols)) {
1541
+ *ncol = NUM2INT(cols);
1542
+ } else {
1543
+ *ncol = 80;
1544
+ }
1545
+
1546
+ rows = rb_ivar_get(cNArray,rb_intern("inspect_rows"));
1547
+ if (RTEST(rows)) {
1548
+ *nrow = NUM2INT(rows);
1549
+ } else {
1550
+ *nrow = 20;
1551
+ }
1552
+ }
1553
+
1554
+ static void
1555
+ loop_inspect(ndfunc_t *nf, na_md_loop_t *lp)
1556
+ {
1557
+ int nd, i, ii;
1558
+ size_t *c;
1559
+ int col=0, row=0;
1560
+ long len;
1561
+ VALUE str;
1562
+ int ncol;
1563
+ int nrow;
1564
+ na_text_func_t func = (na_text_func_t)(nf->func);
1565
+ VALUE buf, opt;
1566
+
1567
+ nd = lp->ndim;
1568
+ buf = lp->loop_opt;
1569
+ //opt = *(VALUE*)(lp->user.opt_ptr);
1570
+ opt = lp->user.option;
1571
+
1572
+ for (i=0; i<nd; i++) {
1573
+ if (lp->n[i] == 0) {
1574
+ rb_str_cat(buf,"[]",2);
1575
+ return;
1576
+ }
1577
+ }
1578
+
1579
+ rb_str_cat(buf,"\n",1);
1580
+
1581
+ c = ALLOCA_N(size_t, nd+1);
1582
+ for (i=0; i<=nd; i++) c[i]=0;
1583
+
1584
+ if (nd>0) {
1585
+ rb_str_cat(buf,"[",1);
1586
+ } else {
1587
+ rb_str_cat(buf,"",0);
1588
+ }
1589
+
1590
+ ndloop_inspect_get_width(&ncol,&nrow);
1591
+
1592
+ col = nd*2;
1593
+ for (i=0;;) {
1594
+ if (i<nd-1) {
1595
+ for (ii=0; ii<i; ii++) rb_str_cat(buf," ",1);
1596
+ for (; ii<nd-1; ii++) rb_str_cat(buf,"[",1);
1597
+ }
1598
+ for (; i<nd; i++) {
1599
+ if (LITER(lp,i,0).idx) {
1600
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).idx[c[i]];
1601
+ } else {
1602
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).step*c[i];
1603
+ }
1604
+ }
1605
+ str = (*func)(LARG(lp,0).ptr, LITER(lp,i,0).pos, opt);
1606
+
1607
+ len = RSTRING_LEN(str) + 2;
1608
+ if (ncol>0 && col+len > ncol-3) {
1609
+ rb_str_cat(buf,"...",3);
1610
+ c[i-1] = lp->n[i-1];
1611
+ } else {
1612
+ rb_str_append(buf, str);
1613
+ col += len;
1614
+ }
1615
+ for (;;) {
1616
+ if (i==0) goto loop_end;
1617
+ i--;
1618
+ if (++c[i] < lp->n[i]) break;
1619
+ rb_str_cat(buf,"]",1);
1620
+ c[i] = 0;
1621
+ }
1622
+ //line_break:
1623
+ rb_str_cat(buf,", ",2);
1624
+ if (i<nd-1) {
1625
+ rb_str_cat(buf,"\n ",2);
1626
+ col = nd*2;
1627
+ row++;
1628
+ if (row==nrow) {
1629
+ rb_str_cat(buf,"...",3);
1630
+ goto loop_end;
1631
+ }
1632
+ }
1633
+ }
1634
+ loop_end:
1635
+ ;
1636
+ }
1637
+
1638
+
1639
+ VALUE
1640
+ na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt)
1641
+ {
1642
+ volatile VALUE args;
1643
+ na_md_loop_t lp;
1644
+ VALUE buf;
1645
+ ndfunc_arg_in_t ain[3] = {{Qnil,0},{sym_loop_opt},{sym_option}};
1646
+ ndfunc_t nf = { (na_iter_func_t)func, NO_LOOP, 3, 0, ain, 0 };
1647
+ //nf = ndfunc_alloc(NULL, NO_LOOP, 1, 0, Qnil);
1648
+
1649
+ buf = na_info_str(nary);
1650
+
1651
+ if (na_get_pointer(nary)==NULL) {
1652
+ return rb_str_cat(buf,"(empty)",7);
1653
+ }
1654
+
1655
+ //rb_p(args);
1656
+ //if (na_debug_flag) print_ndfunc(&nf);
1657
+
1658
+ args = rb_ary_new3(3,nary,buf,opt);
1659
+
1660
+ // cast arguments to NArray
1661
+ //ndloop_cast_args(nf, args);
1662
+
1663
+ // allocate ndloop struct
1664
+ ndloop_alloc(&lp, &nf, args, NULL, 0, loop_inspect);
1665
+
1666
+ rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1667
+
1668
+ return buf;
1669
+ }
1670
+
1671
+
1672
+ //----------------------------------------------------------------------
1673
+
1674
+ static void
1675
+ loop_rarray_to_narray(ndfunc_t *nf, na_md_loop_t *lp)
1676
+ {
1677
+ size_t *c;
1678
+ int i;
1679
+ VALUE *a;
1680
+ int nd = lp->ndim;
1681
+
1682
+ // counter
1683
+ c = ALLOCA_N(size_t, nd+1);
1684
+ for (i=0; i<=nd; i++) c[i]=0;
1685
+
1686
+ // array at each dimension
1687
+ a = ALLOCA_N(VALUE, nd+1);
1688
+ a[0] = LARG(lp,0).value;
1689
+
1690
+ // loop body
1691
+ for (i=0;;) {
1692
+ for (; i<nd; i++) {
1693
+ if (LITER(lp,i,1).idx) {
1694
+ LITER(lp,i+1,1).pos = LITER(lp,i,1).pos + LITER(lp,i,1).idx[c[i]];
1695
+ } else {
1696
+ LITER(lp,i+1,1).pos = LITER(lp,i,1).pos + LITER(lp,i,1).step*c[i];
1697
+ }
1698
+ //LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + c[i];
1699
+ if (TYPE(a[i])==T_ARRAY) {
1700
+ if (c[i] < (size_t)RARRAY_LEN(a[i])) {
1701
+ a[i+1] = RARRAY_AREF(a[i],c[i]);
1702
+ } else {
1703
+ a[i+1] = Qnil;
1704
+ }
1705
+ } else { // not Array -- what about narray?
1706
+ if (c[i]==0) {
1707
+ a[i+1] = a[i];
1708
+ } else {
1709
+ a[i+1] = Qnil;
1710
+ }
1711
+ }
1712
+ //printf("c[i]=%d, i=%d\n",c[i],i);
1713
+ }
1714
+
1715
+ //printf("a[i]=0x%x, i=%d\n",a[i],i);
1716
+ LARG(lp,0).value = a[i];
1717
+
1718
+ (*(nf->func))(&(lp->user));
1719
+
1720
+ for (;;) {
1721
+ if (i<=0) goto loop_end;
1722
+ i--; c[i]++;
1723
+ if (c[i] < lp->n[i]) break;
1724
+ c[i] = 0;
1725
+ }
1726
+ }
1727
+ loop_end:
1728
+ ;
1729
+ }
1730
+
1731
+ VALUE
1732
+ na_ndloop_cast_rarray_to_narray(ndfunc_t *nf, VALUE rary, VALUE nary)
1733
+ {
1734
+ na_md_loop_t lp;
1735
+ VALUE args;
1736
+
1737
+ //rb_p(args);
1738
+ if (na_debug_flag) print_ndfunc(nf);
1739
+
1740
+ args = rb_assoc_new(rary,nary);
1741
+
1742
+ // cast arguments to NArray
1743
+ //ndloop_cast_args(nf, args);
1744
+
1745
+ // allocate ndloop struct
1746
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_rarray_to_narray);
1747
+
1748
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1749
+ }
1750
+
1751
+
1752
+ VALUE
1753
+ na_ndloop_cast_rarray_to_narray2(ndfunc_t *nf, VALUE rary, VALUE nary, VALUE opt)
1754
+ {
1755
+ na_md_loop_t lp;
1756
+ VALUE args;
1757
+
1758
+ //rb_p(args);
1759
+ if (na_debug_flag) print_ndfunc(nf);
1760
+
1761
+ //args = rb_assoc_new(rary,nary);
1762
+ args = rb_ary_new3(3,rary,nary,opt);
1763
+
1764
+ // cast arguments to NArray
1765
+ //ndloop_cast_args(nf, args);
1766
+
1767
+ // allocate ndloop struct
1768
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_rarray_to_narray);
1769
+
1770
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1771
+ }
1772
+
1773
+
1774
+ //----------------------------------------------------------------------
1775
+
1776
+ static void
1777
+ loop_narray_to_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1778
+ {
1779
+ size_t *c;
1780
+ int i;
1781
+ //int nargs = nf->narg + nf->nres;
1782
+ int nd = lp->ndim;
1783
+ VALUE *a;
1784
+ volatile VALUE a0;
1785
+
1786
+ // alloc counter
1787
+ c = ALLOCA_N(size_t, nd+1);
1788
+ for (i=0; i<=nd; i++) c[i]=0;
1789
+ //c[i]=1; // for zero-dim
1790
+ //fprintf(stderr,"in loop_narray_to_rarray, nd=%d\n",nd);
1791
+
1792
+ a = ALLOCA_N(VALUE, nd+1);
1793
+ a[0] = a0 = lp->loop_opt;
1794
+
1795
+ // loop body
1796
+ for (i=0;;) {
1797
+ for (; i<nd; i++) {
1798
+ if (LITER(lp,i,0).idx) {
1799
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).idx[c[i]];
1800
+ } else {
1801
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).step*c[i];
1802
+ }
1803
+ if (c[i]==0) {
1804
+ a[i+1] = rb_ary_new2(lp->n[i]);
1805
+ rb_ary_push(a[i],a[i+1]);
1806
+ }
1807
+ }
1808
+
1809
+ //lp->user.info = a[i];
1810
+ LARG(lp,1).value = a[i];
1811
+ (*(nf->func))(&(lp->user));
1812
+
1813
+ for (;;) {
1814
+ if (i<=0) goto loop_end;
1815
+ i--;
1816
+ if (++c[i] < lp->n[i]) break;
1817
+ c[i] = 0;
1818
+ }
1819
+ }
1820
+ loop_end:
1821
+ ;
1822
+ }
1823
+
1824
+ VALUE
1825
+ na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt)
1826
+ {
1827
+ na_md_loop_t lp;
1828
+ VALUE args, a0;
1829
+
1830
+ //rb_p(args);
1831
+ if (na_debug_flag) print_ndfunc(nf);
1832
+
1833
+ a0 = rb_ary_new();
1834
+ args = rb_ary_new3(3,nary,a0,fmt);
1835
+
1836
+ // cast arguments to NArray
1837
+ //ndloop_cast_args(nf, args);
1838
+
1839
+ // allocate ndloop struct
1840
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_narray_to_rarray);
1841
+
1842
+ rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1843
+ return RARRAY_AREF(a0,0);
1844
+ }
1845
+
1846
+
1847
+ //----------------------------------------------------------------------
1848
+
1849
+ static void
1850
+ loop_narray_with_index(ndfunc_t *nf, na_md_loop_t *lp)
1851
+ {
1852
+ size_t *c;
1853
+ int i,j;
1854
+ int nd = lp->ndim;
1855
+
1856
+ // pass total ndim to iterator
1857
+ lp->user.ndim += nd;
1858
+
1859
+ // alloc counter
1860
+ lp->user.opt_ptr = c = ALLOCA_N(size_t, nd+1);
1861
+ for (i=0; i<=nd; i++) c[i]=0;
1862
+
1863
+ // loop body
1864
+ for (i=0;;) {
1865
+ for (; i<nd; i++) {
1866
+ // j-th argument
1867
+ for (j=0; j<lp->narg; j++) {
1868
+ if (LITER(lp,i,j).idx) {
1869
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).idx[c[i]];
1870
+ } else {
1871
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).step*c[i];
1872
+ }
1873
+ //printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
1874
+ }
1875
+ }
1876
+
1877
+ (*(nf->func))(&(lp->user));
1878
+
1879
+ for (;;) {
1880
+ if (i<=0) goto loop_end;
1881
+ i--;
1882
+ if (++c[i] < lp->n[i]) break;
1883
+ c[i] = 0;
1884
+ }
1885
+ }
1886
+ loop_end:
1887
+ ;
1888
+ }
1889
+
1890
+
1891
+ VALUE
1892
+ #ifdef HAVE_STDARG_PROTOTYPES
1893
+ na_ndloop_with_index(ndfunc_t *nf, int argc, ...)
1894
+ #else
1895
+ na_ndloop(nf, argc, va_alist)
1896
+ ndfunc_t *nf;
1897
+ int argc;
1898
+ va_dcl
1899
+ #endif
1900
+ {
1901
+ va_list ar;
1902
+
1903
+ int i;
1904
+ VALUE *argv;
1905
+ volatile VALUE args;
1906
+ na_md_loop_t lp;
1907
+
1908
+ argv = ALLOCA_N(VALUE,argc);
1909
+
1910
+ va_init_list(ar, argc);
1911
+ for (i=0; i<argc; i++) {
1912
+ argv[i] = va_arg(ar, VALUE);
1913
+ }
1914
+ va_end(ar);
1915
+
1916
+ args = rb_ary_new4(argc, argv);
1917
+
1918
+ //return na_ndloop_main(nf, args, NULL);
1919
+ if (na_debug_flag) print_ndfunc(nf);
1920
+
1921
+ // cast arguments to NArray
1922
+ //copy_flag = ndloop_cast_args(nf, args);
1923
+
1924
+ // allocate ndloop struct
1925
+ ndloop_alloc(&lp, nf, args, 0, 0, loop_narray_with_index);
1926
+
1927
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1928
+ }