numo-narray 0.9.0.1-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/README.md +47 -0
  4. data/Rakefile +41 -0
  5. data/ext/numo/narray/SFMT-params.h +97 -0
  6. data/ext/numo/narray/SFMT-params19937.h +46 -0
  7. data/ext/numo/narray/SFMT.c +620 -0
  8. data/ext/numo/narray/SFMT.h +157 -0
  9. data/ext/numo/narray/array.c +525 -0
  10. data/ext/numo/narray/data.c +901 -0
  11. data/ext/numo/narray/depend.erb +33 -0
  12. data/ext/numo/narray/extconf.rb +117 -0
  13. data/ext/numo/narray/gen/bit.erb.c +811 -0
  14. data/ext/numo/narray/gen/cogen.rb +18 -0
  15. data/ext/numo/narray/gen/def/dcomplex.rb +32 -0
  16. data/ext/numo/narray/gen/def/dfloat.rb +30 -0
  17. data/ext/numo/narray/gen/def/int16.rb +29 -0
  18. data/ext/numo/narray/gen/def/int32.rb +29 -0
  19. data/ext/numo/narray/gen/def/int64.rb +29 -0
  20. data/ext/numo/narray/gen/def/int8.rb +29 -0
  21. data/ext/numo/narray/gen/def/robject.rb +30 -0
  22. data/ext/numo/narray/gen/def/scomplex.rb +32 -0
  23. data/ext/numo/narray/gen/def/sfloat.rb +30 -0
  24. data/ext/numo/narray/gen/def/uint16.rb +29 -0
  25. data/ext/numo/narray/gen/def/uint32.rb +29 -0
  26. data/ext/numo/narray/gen/def/uint64.rb +29 -0
  27. data/ext/numo/narray/gen/def/uint8.rb +29 -0
  28. data/ext/numo/narray/gen/dtype.erb.c +328 -0
  29. data/ext/numo/narray/gen/tmpl/accum.c +36 -0
  30. data/ext/numo/narray/gen/tmpl/accum_binary.c +75 -0
  31. data/ext/numo/narray/gen/tmpl/accum_index.c +58 -0
  32. data/ext/numo/narray/gen/tmpl/allocate.c +35 -0
  33. data/ext/numo/narray/gen/tmpl/aref.c +51 -0
  34. data/ext/numo/narray/gen/tmpl/aset.c +61 -0
  35. data/ext/numo/narray/gen/tmpl/binary.c +53 -0
  36. data/ext/numo/narray/gen/tmpl/binary2.c +55 -0
  37. data/ext/numo/narray/gen/tmpl/binary_s.c +34 -0
  38. data/ext/numo/narray/gen/tmpl/bit_binary.c +94 -0
  39. data/ext/numo/narray/gen/tmpl/bit_count.c +82 -0
  40. data/ext/numo/narray/gen/tmpl/bit_unary.c +77 -0
  41. data/ext/numo/narray/gen/tmpl/cast.c +37 -0
  42. data/ext/numo/narray/gen/tmpl/cast_array.c +79 -0
  43. data/ext/numo/narray/gen/tmpl/cast_numeric.c +22 -0
  44. data/ext/numo/narray/gen/tmpl/coerce_cast.c +8 -0
  45. data/ext/numo/narray/gen/tmpl/cond_binary.c +51 -0
  46. data/ext/numo/narray/gen/tmpl/cond_unary.c +45 -0
  47. data/ext/numo/narray/gen/tmpl/cum.c +42 -0
  48. data/ext/numo/narray/gen/tmpl/each.c +43 -0
  49. data/ext/numo/narray/gen/tmpl/each_with_index.c +64 -0
  50. data/ext/numo/narray/gen/tmpl/extract.c +23 -0
  51. data/ext/numo/narray/gen/tmpl/eye.c +91 -0
  52. data/ext/numo/narray/gen/tmpl/fill.c +38 -0
  53. data/ext/numo/narray/gen/tmpl/format.c +60 -0
  54. data/ext/numo/narray/gen/tmpl/format_to_a.c +47 -0
  55. data/ext/numo/narray/gen/tmpl/head.c +25 -0
  56. data/ext/numo/narray/gen/tmpl/inspect.c +16 -0
  57. data/ext/numo/narray/gen/tmpl/map_with_index.c +94 -0
  58. data/ext/numo/narray/gen/tmpl/median.c +44 -0
  59. data/ext/numo/narray/gen/tmpl/minmax.c +47 -0
  60. data/ext/numo/narray/gen/tmpl/poly.c +49 -0
  61. data/ext/numo/narray/gen/tmpl/pow.c +74 -0
  62. data/ext/numo/narray/gen/tmpl/powint.c +17 -0
  63. data/ext/numo/narray/gen/tmpl/qsort.c +149 -0
  64. data/ext/numo/narray/gen/tmpl/rand.c +33 -0
  65. data/ext/numo/narray/gen/tmpl/rand_norm.c +46 -0
  66. data/ext/numo/narray/gen/tmpl/robj_allocate.c +32 -0
  67. data/ext/numo/narray/gen/tmpl/seq.c +61 -0
  68. data/ext/numo/narray/gen/tmpl/set2.c +56 -0
  69. data/ext/numo/narray/gen/tmpl/sort.c +36 -0
  70. data/ext/numo/narray/gen/tmpl/sort_index.c +86 -0
  71. data/ext/numo/narray/gen/tmpl/store.c +31 -0
  72. data/ext/numo/narray/gen/tmpl/store_array.c +5 -0
  73. data/ext/numo/narray/gen/tmpl/store_from.c +53 -0
  74. data/ext/numo/narray/gen/tmpl/store_numeric.c +22 -0
  75. data/ext/numo/narray/gen/tmpl/to_a.c +41 -0
  76. data/ext/numo/narray/gen/tmpl/unary.c +58 -0
  77. data/ext/numo/narray/gen/tmpl/unary2.c +58 -0
  78. data/ext/numo/narray/gen/tmpl/unary_s.c +57 -0
  79. data/ext/numo/narray/index.c +822 -0
  80. data/ext/numo/narray/kwarg.c +79 -0
  81. data/ext/numo/narray/math.c +140 -0
  82. data/ext/numo/narray/narray.c +1539 -0
  83. data/ext/numo/narray/ndloop.c +1928 -0
  84. data/ext/numo/narray/numo/compat.h +23 -0
  85. data/ext/numo/narray/numo/intern.h +112 -0
  86. data/ext/numo/narray/numo/narray.h +411 -0
  87. data/ext/numo/narray/numo/ndloop.h +99 -0
  88. data/ext/numo/narray/numo/template.h +140 -0
  89. data/ext/numo/narray/numo/types/bit.h +19 -0
  90. data/ext/numo/narray/numo/types/complex.h +410 -0
  91. data/ext/numo/narray/numo/types/complex_macro.h +205 -0
  92. data/ext/numo/narray/numo/types/dcomplex.h +11 -0
  93. data/ext/numo/narray/numo/types/dfloat.h +12 -0
  94. data/ext/numo/narray/numo/types/float_def.h +34 -0
  95. data/ext/numo/narray/numo/types/float_macro.h +277 -0
  96. data/ext/numo/narray/numo/types/int16.h +12 -0
  97. data/ext/numo/narray/numo/types/int32.h +12 -0
  98. data/ext/numo/narray/numo/types/int64.h +12 -0
  99. data/ext/numo/narray/numo/types/int8.h +12 -0
  100. data/ext/numo/narray/numo/types/int_macro.h +34 -0
  101. data/ext/numo/narray/numo/types/robj_macro.h +218 -0
  102. data/ext/numo/narray/numo/types/robject.h +21 -0
  103. data/ext/numo/narray/numo/types/scomplex.h +11 -0
  104. data/ext/numo/narray/numo/types/sfloat.h +13 -0
  105. data/ext/numo/narray/numo/types/uint16.h +12 -0
  106. data/ext/numo/narray/numo/types/uint32.h +12 -0
  107. data/ext/numo/narray/numo/types/uint64.h +12 -0
  108. data/ext/numo/narray/numo/types/uint8.h +12 -0
  109. data/ext/numo/narray/numo/types/uint_macro.h +31 -0
  110. data/ext/numo/narray/numo/types/xint_macro.h +133 -0
  111. data/ext/numo/narray/rand.c +87 -0
  112. data/ext/numo/narray/step.c +506 -0
  113. data/ext/numo/narray/struct.c +872 -0
  114. data/lib/2.1/numo/narray.so +0 -0
  115. data/lib/2.2/numo/narray.so +0 -0
  116. data/lib/2.3/numo/narray.so +0 -0
  117. data/lib/erbpp.rb +286 -0
  118. data/lib/erbpp/line_number.rb +126 -0
  119. data/lib/erbpp/narray_def.rb +338 -0
  120. data/lib/numo/narray.rb +6 -0
  121. data/numo-narray.gemspec +35 -0
  122. data/spec/bit_spec.rb +93 -0
  123. data/spec/narray_spec.rb +249 -0
  124. metadata +238 -0
@@ -0,0 +1,1928 @@
1
+ /*
2
+ ndloop.c
3
+ Numerical Array Extension for Ruby
4
+ (C) Copyright 1999-2016 by Masahiro TANAKA
5
+
6
+ This program is free software.
7
+ You can distribute/modify this program
8
+ under the same terms as Ruby itself.
9
+ NO WARRANTY.
10
+ */
11
+
12
+ #include <ruby.h>
13
+ #include "numo/narray.h"
14
+
15
+ #if 0
16
+ #define DBG(x) x
17
+ #else
18
+ #define DBG(x)
19
+ #endif
20
+
21
+ #ifdef HAVE_STDARG_PROTOTYPES
22
+ #include <stdarg.h>
23
+ #define va_init_list(a,b) va_start(a,b)
24
+ #else
25
+ #include <varargs.h>
26
+ #define va_init_list(a,b) va_start(a)
27
+ #endif
28
+
29
+ typedef struct NA_BUFFER_COPY {
30
+ int ndim;
31
+ size_t elmsz;
32
+ size_t *n;
33
+ char *src_ptr;
34
+ char *buf_ptr;
35
+ na_loop_iter_t *src_iter;
36
+ na_loop_iter_t *buf_iter;
37
+ } na_buffer_copy_t;
38
+
39
+ typedef struct NA_LOOP_XARGS {
40
+ na_loop_iter_t *iter; // moved from na_loop_t
41
+ na_buffer_copy_t *bufcp; // copy data to buffer
42
+ int flag; // NDL_READ NDL_WRITE
43
+ bool free_user_iter; // alloc LARG(lp,j).iter=lp->xargs[j].iter
44
+ } na_loop_xargs_t;
45
+
46
+ typedef struct NA_MD_LOOP {
47
+ int narg;
48
+ int nin;
49
+ int ndim; // n of total dimention
50
+ unsigned int copy_flag; // set i-th bit if i-th arg is cast
51
+ size_t *n_ptr; // memory for n
52
+ na_loop_iter_t *iter_ptr; // memory for iter
53
+ size_t *n; // n of elements for each dim
54
+ na_loop_t user; // loop in user function
55
+ na_loop_xargs_t *xargs; // extra data for each arg
56
+ int writeback; // write back result to i-th arg
57
+ int init_aidx; // index of initializer argument
58
+ int reduce_dim;
59
+ int *trans_map;
60
+ VALUE vargs;
61
+ VALUE reduce;
62
+ VALUE loop_opt;
63
+ ndfunc_t *ndfunc;
64
+ void (*loop_func)();
65
+ } na_md_loop_t;
66
+
67
+ #define LARG(lp,iarg) ((lp)->user.args[iarg])
68
+ #define LITER(lp,idim,iarg) ((lp)->xargs[iarg].iter[idim])
69
+ #define LITER_SRC(lp,idim) ((lp)->src_iter[idim])
70
+ #define LBUFCP(lp,j) ((lp)->xargs[j].bufcp)
71
+
72
+ #define CASTABLE(t) (RTEST(t) && (t)!=OVERWRITE)
73
+
74
+ #define NDL_READ 1
75
+ #define NDL_WRITE 2
76
+ #define NDL_READ_WRITE 3
77
+
78
+ static inline VALUE
79
+ nary_type_s_cast(VALUE type, VALUE obj)
80
+ {
81
+ return rb_funcall(type,rb_intern("cast"),1,obj);
82
+ }
83
+
84
+ static void
85
+ print_ndfunc(ndfunc_t *nf) {
86
+ volatile VALUE t;
87
+ int i, k;
88
+ printf("ndfunc_t = 0x%"SZF"x {\n",(size_t)nf);
89
+ printf(" func = 0x%"SZF"x\n", (size_t)nf->func);
90
+ printf(" flag = 0x%"SZF"x\n", (size_t)nf->flag);
91
+ printf(" nin = %d\n", nf->nin);
92
+ printf(" nout = %d\n", nf->nout);
93
+ printf(" ain = 0x%"SZF"x\n", (size_t)nf->ain);
94
+ for (i=0; i<nf->nin; i++) {
95
+ t = rb_inspect(nf->ain[i].type);
96
+ printf(" ain[%d].type = %s\n", i, StringValuePtr(t));
97
+ printf(" ain[%d].dim = %d\n", i, nf->ain[i].dim);
98
+ }
99
+ printf(" aout = 0x%"SZF"x\n", (size_t)nf->aout);
100
+ for (i=0; i<nf->nout; i++) {
101
+ t = rb_inspect(nf->aout[i].type);
102
+ printf(" aout[%d].type = %s\n", i, StringValuePtr(t));
103
+ printf(" aout[%d].dim = %d\n", i, nf->aout[i].dim);
104
+ for (k=0; k<nf->aout[i].dim; k++) {
105
+ printf(" aout[%d].shape[%d] = %"SZF"u\n", i, k, nf->aout[i].shape[k]);
106
+ }
107
+ }
108
+ printf("}\n");
109
+ }
110
+
111
+
112
+ static void
113
+ print_ndloop(na_md_loop_t *lp) {
114
+ int i,j,nd;
115
+ printf("na_md_loop_t = 0x%"SZF"x {\n",(size_t)lp);
116
+ printf(" narg = %d\n", lp->narg);
117
+ printf(" nin = %d\n", lp->nin);
118
+ printf(" ndim = %d\n", lp->ndim);
119
+ printf(" copy_flag = %x\n", lp->copy_flag);
120
+ printf(" writeback = %d\n", lp->writeback);
121
+ printf(" init_aidx = %d\n", lp->init_aidx);
122
+ printf(" reduce_dim = %d\n", lp->reduce_dim);
123
+ printf(" trans_map = 0x%"SZF"x\n", (size_t)lp->trans_map);
124
+ nd = lp->ndim + lp->user.ndim;
125
+ for (i=0; i<nd; i++) {
126
+ printf(" trans_map[%d] = %d\n", i, lp->trans_map[i]);
127
+ }
128
+ printf(" n = 0x%"SZF"x\n", (size_t)lp->n);
129
+ nd = lp->ndim + lp->user.ndim;
130
+ for (i=0; i<=lp->ndim; i++) {
131
+ printf(" n[%d] = %"SZF"u\n", i, lp->n[i]);
132
+ }
133
+ printf(" user.n = 0x%"SZF"x\n", (size_t)lp->user.n);
134
+ if (lp->user.n) {
135
+ for (i=0; i<=lp->user.ndim; i++) {
136
+ printf(" user.n[%d] = %"SZF"u\n", i, lp->user.n[i]);
137
+ }
138
+ }
139
+ printf(" xargs = 0x%"SZF"x\n", (size_t)lp->xargs);
140
+ printf(" iter_ptr = 0x%"SZF"x\n", (size_t)lp->iter_ptr);
141
+ printf(" user.narg = %d\n", lp->user.narg);
142
+ printf(" user.ndim = %d\n", lp->user.ndim);
143
+ printf(" user.args = 0x%"SZF"x\n", (size_t)lp->user.args);
144
+ for (j=0; j<lp->narg; j++) {
145
+ }
146
+ printf(" user.opt_ptr = 0x%"SZF"x\n", (size_t)lp->user.opt_ptr);
147
+ if (lp->reduce==Qnil) {
148
+ printf(" reduce = nil\n");
149
+ } else {
150
+ printf(" reduce = 0x%x\n", NUM2INT(lp->reduce));
151
+ }
152
+ for (j=0; j<lp->narg; j++) {
153
+ printf("--user.args[%d]--\n", j);
154
+ printf(" user.args[%d].ptr = 0x%"SZF"x\n", j, (size_t)LARG(lp,j).ptr);
155
+ printf(" user.args[%d].elmsz = %"SZF"d\n", j, LARG(lp,j).elmsz);
156
+ printf(" user.args[%d].value = 0x%"SZF"x\n", j, LARG(lp,j).value);
157
+ printf(" user.args[%d].ndim = %d\n", j, LARG(lp,j).ndim);
158
+ printf(" user.args[%d].shape = 0x%"SZF"x\n", j, (size_t)LARG(lp,j).shape);
159
+ if (LARG(lp,j).shape) {
160
+ for (i=0; i<LARG(lp,j).ndim; i++) {
161
+ printf(" user.args[%d].shape[%d] = %"SZF"d\n", j, i, LARG(lp,j).shape[i]);
162
+ }
163
+ }
164
+ printf(" user.args[%d].iter = 0x%"SZF"x\n", j,(size_t)lp->user.args[j].iter);
165
+ if (lp->user.args[j].iter) {
166
+ for (i=0; i<lp->user.ndim; i++) {
167
+ printf(" &user.args[%d].iter[%d] = 0x%"SZF"x\n", j,i, (size_t)&lp->user.args[j].iter[i]);
168
+ printf(" user.args[%d].iter[%d].pos = %"SZF"u\n", j,i, lp->user.args[j].iter[i].pos);
169
+ printf(" user.args[%d].iter[%d].step = %"SZF"u\n", j,i, lp->user.args[j].iter[i].step);
170
+ printf(" user.args[%d].iter[%d].idx = 0x%"SZF"x\n", j,i, (size_t)lp->user.args[j].iter[i].idx);
171
+ }
172
+ }
173
+ //
174
+ printf(" xargs[%d].flag = %d\n", j, lp->xargs[j].flag);
175
+ printf(" xargs[%d].free_user_iter = %d\n", j, lp->xargs[j].free_user_iter);
176
+ for (i=0; i<=nd; i++) {
177
+ printf(" &xargs[%d].iter[%d] = 0x%"SZF"x\n", j,i, (size_t)&LITER(lp,i,j));
178
+ printf(" xargs[%d].iter[%d].pos = %"SZF"u\n", j,i, LITER(lp,i,j).pos);
179
+ printf(" xargs[%d].iter[%d].step = %"SZF"u\n", j,i, LITER(lp,i,j).step);
180
+ printf(" xargs[%d].iter[%d].idx = 0x%"SZF"x\n", j,i, (size_t)LITER(lp,i,j).idx);
181
+ }
182
+ printf(" xargs[%d].bufcp = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp);
183
+ if (lp->xargs[j].bufcp) {
184
+ printf(" xargs[%d].bufcp->ndim = %d\n", j, lp->xargs[j].bufcp->ndim);
185
+ printf(" xargs[%d].bufcp->elmsz = %"SZF"d\n", j, lp->xargs[j].bufcp->elmsz);
186
+ printf(" xargs[%d].bufcp->n = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->n);
187
+ printf(" xargs[%d].bufcp->src_ptr = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->src_ptr);
188
+ printf(" xargs[%d].bufcp->buf_ptr = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->buf_ptr);
189
+ printf(" xargs[%d].bufcp->src_iter = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->src_iter);
190
+ printf(" xargs[%d].bufcp->buf_iter = 0x%"SZF"x\n", j, (size_t)lp->xargs[j].bufcp->buf_iter);
191
+ }
192
+ }
193
+ printf("}\n");
194
+ }
195
+
196
+
197
+ static unsigned int
198
+ ndloop_func_loop_spec(ndfunc_t *nf, int user_ndim)
199
+ {
200
+ unsigned int f=0;
201
+ // If user function supports LOOP
202
+ if (user_ndim > 0 || NDF_TEST(nf,NDF_HAS_LOOP)) {
203
+ if (!NDF_TEST(nf,NDF_STRIDE_LOOP)) {
204
+ f |= 1;
205
+ }
206
+ if (!NDF_TEST(nf,NDF_INDEX_LOOP)) {
207
+ f |= 2;
208
+ }
209
+ }
210
+ return f;
211
+ }
212
+
213
+
214
+
215
+
216
+ static int
217
+ ndloop_cast_required(VALUE type, VALUE value)
218
+ {
219
+ return CASTABLE(type) && type != CLASS_OF(value);
220
+ }
221
+
222
+ static int
223
+ ndloop_castable_type(VALUE type)
224
+ {
225
+ return rb_obj_is_kind_of(type, rb_cClass) && RTEST(rb_class_inherited_p(type, cNArray));
226
+ }
227
+
228
+ static void
229
+ ndloop_cast_error(VALUE type, VALUE value)
230
+ {
231
+ VALUE x = rb_inspect(type);
232
+ char* s = StringValueCStr(x);
233
+ rb_bug("fail cast from %s to %s", rb_obj_classname(value),s);
234
+ rb_raise(rb_eTypeError,"fail cast from %s to %s",
235
+ rb_obj_classname(value), s);
236
+ }
237
+
238
+ // convert input argeuments given by RARRAY_PTR(args)[j]
239
+ // to type specified by nf->args[j].type
240
+ // returns copy_flag where nth-bit is set if nth argument is converted.
241
+ static unsigned int
242
+ ndloop_cast_args(ndfunc_t *nf, VALUE args)
243
+ {
244
+ int j;
245
+ unsigned int copy_flag=0;
246
+ VALUE type, value;
247
+
248
+ for (j=0; j<nf->nin; j++) {
249
+
250
+ type = nf->ain[j].type;
251
+ if (TYPE(type)==T_SYMBOL)
252
+ continue;
253
+ value = RARRAY_AREF(args,j);
254
+ if (!ndloop_cast_required(type, value))
255
+ continue;
256
+
257
+ if (ndloop_castable_type(type)) {
258
+ RARRAY_ASET(args,j,nary_type_s_cast(type, value));
259
+ copy_flag |= 1<<j;
260
+ } else {
261
+ ndloop_cast_error(type, value);
262
+ }
263
+ }
264
+
265
+ RB_GC_GUARD(type); RB_GC_GUARD(value);
266
+ return copy_flag;
267
+ }
268
+
269
+
270
+ static void
271
+ ndloop_handle_symbol_in_ain(VALUE type, VALUE value, int at, na_md_loop_t *lp)
272
+ {
273
+ if (type==sym_reduce) {
274
+ lp->reduce = value;
275
+ }
276
+ else if (type==sym_option) {
277
+ lp->user.option = value;
278
+ }
279
+ else if (type==sym_loop_opt) {
280
+ lp->loop_opt = value;
281
+ }
282
+ else if (type==sym_init) {
283
+ lp->init_aidx = at;
284
+ }
285
+ else {
286
+ rb_bug("ndloop parse_options: unknown type");
287
+ }
288
+ }
289
+
290
+ static inline int
291
+ max2(int x, int y)
292
+ {
293
+ return x > y ? x : y;
294
+ }
295
+
296
+ static void
297
+ ndloop_find_max_dimension(na_md_loop_t *lp, ndfunc_t *nf, VALUE args)
298
+ {
299
+ int j;
300
+ int nin=0; // number of input objects (except for symbols)
301
+ int user_nd=0; // max dimension of user function
302
+ int loop_nd=0; // max dimension of md-loop
303
+
304
+ for (j=0; j<RARRAY_LEN(args); j++) {
305
+ VALUE t = nf->ain[j].type;
306
+ VALUE v = RARRAY_AREF(args,j);
307
+ if (TYPE(t)==T_SYMBOL) {
308
+ ndloop_handle_symbol_in_ain(t, v, j, lp);
309
+ } else {
310
+ nin++;
311
+ user_nd = max2(user_nd, nf->ain[j].dim);
312
+ if (IsNArray(v))
313
+ loop_nd = max2(loop_nd, RNARRAY_NDIM(v) - nf->ain[j].dim);
314
+ }
315
+ }
316
+
317
+ lp->narg = lp->user.narg = nin + nf->nout;
318
+ lp->nin = nin;
319
+ lp->ndim = loop_nd;
320
+ lp->user.ndim = user_nd;
321
+ }
322
+
323
+ /*
324
+ user-dimension:
325
+ user_nd = MAX( nf->args[j].dim )
326
+
327
+ user-support dimension:
328
+
329
+ loop dimension:
330
+ loop_nd
331
+ */
332
+
333
+ static void
334
+ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
335
+ void *opt_ptr, unsigned int copy_flag,
336
+ void (*loop_func)(ndfunc_t*, na_md_loop_t*))
337
+ {
338
+ int i,j;
339
+ int narg;
340
+ int max_nd;
341
+
342
+ long args_len;
343
+
344
+ na_loop_iter_t *iter;
345
+
346
+ int trans_dim;
347
+ unsigned int f;
348
+
349
+ args_len = RARRAY_LEN(args);
350
+
351
+ if (args_len != nf->nin) {
352
+ rb_bug("wrong number of arguments for ndfunc (%lu for %d)",
353
+ args_len, nf->nin);
354
+ }
355
+
356
+ lp->vargs = args;
357
+ lp->ndfunc = nf;
358
+ lp->loop_func = loop_func;
359
+ lp->copy_flag = copy_flag;
360
+
361
+ lp->reduce = Qnil;
362
+ lp->user.option = Qnil;
363
+ lp->user.opt_ptr = opt_ptr;
364
+ lp->user.err_type = Qfalse;
365
+ lp->loop_opt = Qnil;
366
+ lp->writeback = -1;
367
+ lp->init_aidx = -1;
368
+
369
+ lp->n = NULL;
370
+ lp->n_ptr = NULL;
371
+ lp->xargs = NULL;
372
+ lp->user.args = NULL;
373
+ lp->user.n = NULL;
374
+ lp->iter_ptr = NULL;
375
+ lp->trans_map = NULL;
376
+
377
+ ndloop_find_max_dimension(lp, nf, args);
378
+ narg = lp->nin + nf->nout;
379
+ max_nd = lp->ndim + lp->user.ndim;
380
+
381
+ lp->n = lp->n_ptr = ALLOC_N(size_t, max_nd+1);
382
+ lp->xargs = ALLOC_N(na_loop_xargs_t, narg);
383
+ lp->user.args = ALLOC_N(na_loop_args_t, narg);
384
+ iter = ALLOC_N(na_loop_iter_t, narg*(max_nd+1));
385
+ lp->iter_ptr = iter;
386
+
387
+ for (j=0; j<narg; j++) {
388
+ LARG(lp,j).value = Qnil;
389
+ LARG(lp,j).iter = NULL;
390
+ LARG(lp,j).shape = NULL;
391
+ LARG(lp,j).ndim = 0;
392
+ lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
393
+ lp->xargs[j].bufcp = NULL;
394
+ lp->xargs[j].flag = (j<nf->nin) ? NDL_READ : NDL_WRITE;
395
+ lp->xargs[j].free_user_iter = 0;
396
+ }
397
+
398
+ for (i=0; i<=max_nd; i++) {
399
+ lp->n[i] = 1;
400
+ for (j=0; j<narg; j++) {
401
+ LITER(lp,i,j).pos = 0;
402
+ LITER(lp,i,j).step = 0;
403
+ LITER(lp,i,j).idx = NULL;
404
+ }
405
+ }
406
+
407
+ // transpose reduce-dimensions to last dimensions
408
+ // array loop
409
+ // [*,+,*,+,*] => [*,*,*,+,+]
410
+ // trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
411
+ lp->trans_map = ALLOC_N(int, max_nd+1);
412
+ if (NDF_TEST(nf,NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
413
+ trans_dim = 0;
414
+ for (i=0; i<max_nd; i++) {
415
+ if (na_test_reduce(lp->reduce, i)) {
416
+ lp->trans_map[i] = -1;
417
+ } else {
418
+ lp->trans_map[i] = trans_dim++;
419
+ }
420
+ }
421
+ j = trans_dim;
422
+ for (i=0; i<max_nd; i++) {
423
+ if (lp->trans_map[i] == -1) {
424
+ lp->trans_map[i] = j++;
425
+ }
426
+ }
427
+ lp->reduce_dim = max_nd - trans_dim;
428
+ f = 0;
429
+ for (i=trans_dim; i<max_nd; i++) {
430
+ f |= 1<<i;
431
+ }
432
+ lp->reduce = INT2FIX(f);
433
+ } else {
434
+ for (i=0; i<max_nd; i++) {
435
+ lp->trans_map[i] = i;
436
+ }
437
+ lp->reduce_dim = 0;
438
+ }
439
+ }
440
+
441
+
442
+ static VALUE
443
+ ndloop_release(VALUE vlp)
444
+ {
445
+ int j;
446
+ VALUE v;
447
+ na_md_loop_t *lp = (na_md_loop_t*)(vlp);
448
+
449
+ for (j=0; j < lp->narg; j++) {
450
+ v = LARG(lp,j).value;
451
+ if (IsNArray(v)) {
452
+ na_release_lock(v);
453
+ }
454
+ }
455
+ //xfree(lp);
456
+ for (j=0; j<lp->narg; j++) {
457
+ //printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
458
+ if (lp->xargs[j].bufcp) {
459
+ xfree(lp->xargs[j].bufcp->buf_iter);
460
+ xfree(lp->xargs[j].bufcp->buf_ptr);
461
+ xfree(lp->xargs[j].bufcp->n);
462
+ xfree(lp->xargs[j].bufcp);
463
+ if (lp->xargs[j].free_user_iter) {
464
+ xfree(LARG(lp,j).iter);
465
+ }
466
+ }
467
+ }
468
+ if (lp->trans_map) xfree(lp->trans_map);
469
+ xfree(lp->xargs);
470
+ xfree(lp->iter_ptr);
471
+ xfree(lp->user.args);
472
+ xfree(lp->n_ptr);
473
+ //rb_gc_force_recycle(vlp);
474
+ return Qnil;
475
+ }
476
+
477
+
478
+ /*
479
+ static void
480
+ ndloop_free(na_md_loop_t* lp)
481
+ {
482
+ int j;
483
+ VALUE v;
484
+
485
+ for (j=0; j<lp->narg; j++) {
486
+ v = LARG(lp,j).value;
487
+ if (IsNArray(v)) {
488
+ na_release_lock(v);
489
+ }
490
+ }
491
+ xfree(lp);
492
+ }
493
+ */
494
+
495
+
496
+ /*
497
+ set lp->n[i] (shape of n-d iteration) here
498
+ */
499
+ static void
500
+ ndloop_check_shape(na_md_loop_t *lp, int nf_dim, narray_t *na)
501
+ {
502
+ int i, k;
503
+ size_t n;
504
+ int dim_beg;
505
+
506
+ dim_beg = lp->ndim + nf_dim - na->ndim;
507
+
508
+ for (k = na->ndim - nf_dim - 1; k>=0; k--) {
509
+ i = lp->trans_map[k + dim_beg];
510
+ n = na->shape[k];
511
+ // if n==1 then repeat this dimension
512
+ if (n != 1) {
513
+ if (lp->n[i] == 1) {
514
+ lp->n[i] = n;
515
+ } else if (lp->n[i] != n) {
516
+ // inconsistent array shape
517
+ rb_raise(rb_eTypeError,"shape1[%d](=%"SZF"u) != shape2[%d](=%"SZF"u)",
518
+ i, lp->n[i], k, n);
519
+ }
520
+ }
521
+ }
522
+ }
523
+
524
+
525
+ /*
526
+ na->shape[i] == lp->n[ dim_map[i] ]
527
+ */
528
+ static void
529
+ ndloop_set_stepidx(na_md_loop_t *lp, int j, VALUE vna, int *dim_map, int rwflag)
530
+ {
531
+ size_t n, s;
532
+ int i, k;
533
+ stridx_t sdx;
534
+ narray_t *na;
535
+
536
+ LARG(lp,j).value = vna;
537
+ LARG(lp,j).elmsz = na_get_elmsz(vna);
538
+ if (rwflag & NDL_READ) {
539
+ LARG(lp,j).ptr = na_get_pointer_for_read(vna);
540
+ } else
541
+ if (rwflag & NDL_WRITE) {
542
+ LARG(lp,j).ptr = na_get_pointer_for_write(vna);
543
+ } else {
544
+ rb_bug("invalid value for read-write flag");
545
+ }
546
+ GetNArray(vna,na);
547
+
548
+ switch(NA_TYPE(na)) {
549
+ case NARRAY_DATA_T:
550
+ if (NA_DATA_PTR(na)==NULL && NA_SIZE(na)>0) {
551
+ rb_bug("cannot read no-data NArray");
552
+ rb_raise(rb_eRuntimeError,"cannot read no-data NArray");
553
+ }
554
+ // through
555
+ case NARRAY_FILEMAP_T:
556
+ s = LARG(lp,j).elmsz;
557
+ for (k=na->ndim; k--;) {
558
+ n = na->shape[k];
559
+ if (n > 1) {
560
+ i = dim_map[k];
561
+ //printf("n=%d k=%d i=%d\n",n,k,i);
562
+ LITER(lp,i,j).step = s;
563
+ LITER(lp,i,j).idx = NULL;
564
+ }
565
+ s *= n;
566
+ }
567
+ LITER(lp,0,j).pos = 0;
568
+ break;
569
+ case NARRAY_VIEW_T:
570
+ LITER(lp,0,j).pos = NA_VIEW_OFFSET(na);
571
+ for (k=0; k<na->ndim; k++) {
572
+ n = na->shape[k];
573
+ sdx = NA_VIEW_STRIDX(na)[k];
574
+ if (n > 1) {
575
+ i = dim_map[k];
576
+ if (SDX_IS_INDEX(sdx)) {
577
+ LITER(lp,i,j).step = 0;
578
+ LITER(lp,i,j).idx = SDX_GET_INDEX(sdx);
579
+ } else {
580
+ LITER(lp,i,j).step = SDX_GET_STRIDE(sdx);
581
+ LITER(lp,i,j).idx = NULL;
582
+ }
583
+ } else if (n==1) {
584
+ if (SDX_IS_INDEX(sdx)) {
585
+ LITER(lp,0,j).pos += SDX_GET_INDEX(sdx)[0];
586
+ }
587
+ }
588
+ }
589
+ break;
590
+ default:
591
+ rb_bug("invalid narray internal type");
592
+ }
593
+ }
594
+
595
+
596
+
597
+ static void
598
+ ndloop_init_args(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
599
+ {
600
+ int i, j;
601
+ VALUE v;
602
+ narray_t *na;
603
+ int nf_dim;
604
+ int dim_beg;
605
+ int *dim_map;
606
+ int max_nd = lp->ndim + lp->user.ndim;
607
+ int flag;
608
+ size_t s;
609
+
610
+ /*
611
+ na->shape[i] == lp->n[ dim_map[i] ]
612
+ */
613
+ dim_map = ALLOCA_N(int, max_nd);
614
+
615
+ // input arguments
616
+ for (j=0; j<nf->nin; j++) {
617
+ if (TYPE(nf->ain[j].type)==T_SYMBOL) {
618
+ continue;
619
+ }
620
+ v = RARRAY_AREF(args,j);
621
+ if (IsNArray(v)) {
622
+ // set LARG(lp,j) with v
623
+ GetNArray(v,na);
624
+ nf_dim = nf->ain[j].dim;
625
+ if (nf_dim > na->ndim) {
626
+ rb_raise(nary_eDimensionError,"requires >= %d-dimensioal array "
627
+ "while %d-dimensional array is given",nf_dim,na->ndim);
628
+ }
629
+ ndloop_check_shape(lp, nf_dim, na);
630
+ dim_beg = lp->ndim + nf->ain[j].dim - na->ndim;
631
+ for (i=0; i<na->ndim; i++) {
632
+ dim_map[i] = lp->trans_map[i+dim_beg];
633
+ //printf("dim_map[%d]=%d na->shape[%d]=%d\n",i,dim_map[i],i,na->shape[i]);
634
+ }
635
+ if (nf->ain[j].type==OVERWRITE) {
636
+ lp->xargs[j].flag = flag = NDL_WRITE;
637
+ } else {
638
+ lp->xargs[j].flag = flag = NDL_READ;
639
+ }
640
+ ndloop_set_stepidx(lp, j, v, dim_map, flag);
641
+ LARG(lp,j).ndim = nf_dim;
642
+ if (nf_dim > 0) {
643
+ LARG(lp,j).shape = na->shape + (na->ndim - nf_dim);
644
+ }
645
+ } else if (TYPE(v)==T_ARRAY) {
646
+ LARG(lp,j).value = v;
647
+ LARG(lp,j).elmsz = sizeof(VALUE);
648
+ LARG(lp,j).ptr = NULL;
649
+ for (i=0; i<=max_nd; i++) {
650
+ LITER(lp,i,j).step = 1;
651
+ }
652
+ }
653
+ }
654
+ // check whether # of element is zero
655
+ for (s=1,i=0; i<=max_nd; i++) {
656
+ s *= lp->n[i];
657
+ }
658
+ if (s==0) {
659
+ for (i=0; i<=max_nd; i++) {
660
+ lp->n[i] = 0;
661
+ }
662
+ }
663
+ }
664
+
665
+
666
+ static int
667
+ ndloop_check_inplace(VALUE type, int na_ndim, size_t *na_shape, VALUE v)
668
+ {
669
+ int i;
670
+ narray_t *na;
671
+
672
+ // type check
673
+ if (type != CLASS_OF(v)) {
674
+ return 0;
675
+ }
676
+ GetNArray(v,na);
677
+ // shape check
678
+ if (na->ndim != na_ndim) {
679
+ return 0;
680
+ }
681
+ for (i=0; i<na_ndim; i++) {
682
+ if (na_shape[i] != na->shape[i]) {
683
+ return 0;
684
+ }
685
+ }
686
+ // v is selected as output
687
+ return 1;
688
+ }
689
+
690
+ static VALUE
691
+ ndloop_find_inplace(ndfunc_t *nf, na_md_loop_t *lp, VALUE type,
692
+ int na_ndim, size_t *na_shape, VALUE args)
693
+ {
694
+ int j;
695
+ VALUE v;
696
+
697
+ // find inplace
698
+ for (j=0; j<nf->nin; j++) {
699
+ v = RARRAY_AREF(args,j);
700
+ if (IsNArray(v)) {
701
+ if (TEST_INPLACE(v)) {
702
+ if (ndloop_check_inplace(type,na_ndim,na_shape,v)) {
703
+ // if already copied, create outary and write-back
704
+ if (lp->copy_flag & (1<<j)) {
705
+ lp->writeback = j;
706
+ }
707
+ return v;
708
+ }
709
+ }
710
+ }
711
+ }
712
+ // find casted or copied input array
713
+ for (j=0; j<nf->nin; j++) {
714
+ if (lp->copy_flag & (1<<j)) {
715
+ v = RARRAY_AREF(args,j);
716
+ if (ndloop_check_inplace(type,na_ndim,na_shape,v)) {
717
+ return v;
718
+ }
719
+ }
720
+ }
721
+ return Qnil;
722
+ }
723
+
724
+
725
+
726
+ static VALUE
727
+ ndloop_get_arg_type(ndfunc_t *nf, VALUE args, VALUE t)
728
+ {
729
+ int i;
730
+
731
+ // if type is FIXNUM, get the type of i-th argument
732
+ if (FIXNUM_P(t)) {
733
+ i = FIX2INT(t);
734
+ if (i<0 || i>=nf->nin) {
735
+ rb_bug("invalid type: index (%d) out of # of args",i);
736
+ }
737
+ t = nf->ain[i].type;
738
+ // if i-th type is Qnil, get the type of i-th input value
739
+ if (!CASTABLE(t)) {
740
+ t = CLASS_OF(RARRAY_AREF(args,i));
741
+ }
742
+ }
743
+ return t;
744
+ }
745
+
746
+
747
+ static VALUE
748
+ ndloop_set_output_narray(ndfunc_t *nf, na_md_loop_t *lp, int k,
749
+ VALUE type, VALUE args)
750
+ {
751
+ int i, j;
752
+ int na_ndim;
753
+ int lp_dim;
754
+ volatile VALUE v=Qnil;
755
+ size_t *na_shape;
756
+ int *dim_map;
757
+ int flag = NDL_READ_WRITE;
758
+ int nd;
759
+ int max_nd = lp->ndim + nf->aout[k].dim;
760
+
761
+ na_shape = ALLOCA_N(size_t, max_nd);
762
+ dim_map = ALLOCA_N(int, max_nd);
763
+
764
+ //printf("max_nd=%d lp->ndim=%d\n",max_nd,lp->ndim);
765
+
766
+ // md-loop shape
767
+ na_ndim = 0;
768
+ for (i=0; i<lp->ndim; i++) {
769
+ // na_shape[i] == lp->n[lp->trans_map[i]]
770
+ lp_dim = lp->trans_map[i];
771
+ //printf("i=%d lp_dim=%d\n",i,lp_dim);
772
+ if (NDF_TEST(nf,NDF_CUM)) { // cumulate with shape kept
773
+ na_shape[na_ndim] = lp->n[lp_dim];
774
+ } else
775
+ if (na_test_reduce(lp->reduce,lp_dim)) { // accumulate dimension
776
+ if (NDF_TEST(nf,NDF_KEEP_DIM)) {
777
+ na_shape[na_ndim] = 1; // leave it
778
+ } else {
779
+ continue; // delete dimension
780
+ }
781
+ } else {
782
+ na_shape[na_ndim] = lp->n[lp_dim];
783
+ }
784
+ //printf("i=%d lp_dim=%d na_shape[%d]=%ld\n",i,lp_dim,i,na_shape[i]);
785
+ dim_map[na_ndim++] = lp_dim;
786
+ //dim_map[lp_dim] = na_ndim++;
787
+ }
788
+
789
+ // user-specified shape
790
+ for (i=0; i<nf->aout[k].dim; i++) {
791
+ na_shape[na_ndim] = nf->aout[k].shape[i];
792
+ dim_map[na_ndim++] = i + lp->ndim;
793
+ }
794
+
795
+ // find inplace from input arrays
796
+ if (k==0 && NDF_TEST(nf,NDF_INPLACE)) {
797
+ v = ndloop_find_inplace(nf,lp,type,na_ndim,na_shape,args);
798
+ }
799
+ if (!RTEST(v)) {
800
+ // new object
801
+ v = rb_narray_new(type, na_ndim, na_shape);
802
+ flag = NDL_WRITE;
803
+ }
804
+
805
+ j = lp->nin + k;
806
+ ndloop_set_stepidx(lp, j, v, dim_map, flag);
807
+ LARG(lp,j).ndim = nd = nf->aout[k].dim;
808
+ if (nd > 0) {
809
+ LARG(lp,j).shape = nf->aout[k].shape;
810
+ }
811
+
812
+ return v;
813
+ }
814
+
815
+ static VALUE
816
+ ndloop_set_output(ndfunc_t *nf, na_md_loop_t *lp, VALUE args)
817
+ {
818
+ int i, j, k, idx;
819
+ volatile VALUE v, t, results;
820
+ VALUE init;
821
+
822
+ int max_nd = lp->ndim + lp->user.ndim;
823
+
824
+ // output results
825
+ results = rb_ary_new2(nf->nout);
826
+
827
+ for (k=0; k<nf->nout; k++) {
828
+ t = nf->aout[k].type;
829
+ t = ndloop_get_arg_type(nf,args,t);
830
+
831
+ if (rb_obj_is_kind_of(t, rb_cClass)) {
832
+ if (RTEST(rb_class_inherited_p(t, cNArray))) {
833
+ // NArray
834
+ v = ndloop_set_output_narray(nf,lp,k,t,args);
835
+ rb_ary_push(results, v);
836
+ }
837
+ else if (RTEST(rb_class_inherited_p(t, rb_cArray))) {
838
+ // Ruby Array
839
+ j = lp->nin + k;
840
+ for (i=0; i<=max_nd; i++) {
841
+ LITER(lp,i,j).step = sizeof(VALUE);
842
+ }
843
+ LARG(lp,j).value = t;
844
+ LARG(lp,j).elmsz = sizeof(VALUE);
845
+ } else {
846
+ rb_raise(rb_eRuntimeError,"ndloop_set_output: invalid for type");
847
+ }
848
+ }
849
+ }
850
+
851
+ // initialilzer
852
+ k = lp->init_aidx;
853
+ if (k > -1) {
854
+ idx = nf->ain[k].dim;
855
+ v = RARRAY_AREF(results,idx);
856
+ init = RARRAY_AREF(args,k);
857
+ na_store(v,init);
858
+ }
859
+
860
+ return results;
861
+ }
862
+
863
+
864
+ static void
865
+ ndfunc_contract_loop(na_md_loop_t *lp)
866
+ {
867
+ int i,j,k,success,cnt=0;
868
+ int red0, redi;
869
+
870
+ redi = na_test_reduce(lp->reduce,0);
871
+
872
+ //for (i=0; i<lp->ndim; i++) {
873
+ // printf("lp->n[%d]=%lu\n",i,lp->n[i]);
874
+ //}
875
+
876
+ for (i=1; i<lp->ndim; i++) {
877
+ red0 = redi;
878
+ redi = na_test_reduce(lp->reduce,i);
879
+ //printf("contract i=%d reduce_cond=%d %d\n",i,red0,redi);
880
+ if (red0 != redi) {
881
+ continue;
882
+ }
883
+ success = 1;
884
+ for (j=0; j<lp->narg; j++) {
885
+ if (!(LITER(lp,i,j).idx == NULL &&
886
+ LITER(lp,i-1,j).idx == NULL &&
887
+ LITER(lp,i-1,j).step == LITER(lp,i,j).step*(ssize_t)(lp->n[i]))) {
888
+ success = 0;
889
+ break;
890
+ }
891
+ }
892
+ if (success) {
893
+ //printf("contract i=%d-th and %d-th\n",i-1,i);
894
+ // contract (i-1)-th and i-th dimension
895
+ lp->n[i] *= lp->n[i-1];
896
+ // shift dimensions
897
+ for (k=i-1; k>cnt; k--) {
898
+ lp->n[k] = lp->n[k-1];
899
+ }
900
+ //printf("k=%d\n",k);
901
+ for (; k>=0; k--) {
902
+ lp->n[k] = 1;
903
+ }
904
+ for (j=0; j<lp->narg; j++) {
905
+ for (k=i-1; k>cnt; k--) {
906
+ LITER(lp,k,j) = LITER(lp,k-1,j);
907
+ }
908
+ }
909
+ if (redi) {
910
+ lp->reduce_dim--;
911
+ }
912
+ cnt++;
913
+ }
914
+ }
915
+ //printf("contract cnt=%d\n",cnt);
916
+ if (cnt>0) {
917
+ for (j=0; j<lp->narg; j++) {
918
+ lp->xargs[j].iter = &LITER(lp,cnt,j);
919
+ }
920
+ lp->n = &(lp->n[cnt]);
921
+ lp->ndim -= cnt;
922
+ //for (i=0; i<lp->ndim; i++) {printf("lp->n[%d]=%lu\n",i,lp->n[i]);}
923
+ }
924
+ }
925
+
926
+
927
+ static void
928
+ ndfunc_set_user_loop(ndfunc_t *nf, na_md_loop_t *lp)
929
+ {
930
+ int j, ud=0;
931
+
932
+ if (lp->reduce_dim > 0) {
933
+ ud = lp->reduce_dim;
934
+ }
935
+ else if (lp->ndim > 0 && NDF_TEST(nf,NDF_HAS_LOOP)) {
936
+ ud = 1;
937
+ }
938
+ else {
939
+ goto skip_ud;
940
+ }
941
+ if (ud > lp->ndim) {
942
+ rb_bug("Reduce-dimension is larger than loop-dimension");
943
+ }
944
+ // increase user dimension
945
+ lp->user.ndim += ud;
946
+ lp->ndim -= ud;
947
+ for (j=0; j<lp->narg; j++) {
948
+ if (LARG(lp,j).shape) {
949
+ rb_bug("HAS_LOOP or reduce-dimension=%d conflicts with user-dimension",lp->reduce_dim);
950
+ }
951
+ LARG(lp,j).ndim += ud;
952
+ LARG(lp,j).shape = &(lp->n[lp->ndim]);
953
+ //printf("LARG(lp,j).ndim=%d,LARG(lp,j).shape=%lx\n",LARG(lp,j).ndim,(size_t)LARG(lp,j).shape);
954
+ }
955
+ //printf("lp->reduce_dim=%d lp->user.ndim=%d lp->ndim=%d\n",lp->reduce_dim,lp->user.ndim,lp->ndim);
956
+
957
+ skip_ud:
958
+ lp->user.n = &(lp->n[lp->ndim]);
959
+ for (j=0; j<lp->narg; j++) {
960
+ LARG(lp,j).iter = &LITER(lp,lp->ndim,j);
961
+ //printf("in ndfunc_set_user_loop: lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
962
+ }
963
+ }
964
+
965
+
966
+ static void
967
+ ndfunc_set_bufcp(na_md_loop_t *lp, unsigned int loop_spec)
968
+ {
969
+ unsigned int f;
970
+ int i, j;
971
+ int nd, ndim;
972
+ bool zero_step;
973
+ ssize_t n, sz, elmsz, stride, n_total; //, last_step;
974
+ size_t *buf_shape;
975
+ na_loop_iter_t *buf_iter=NULL, *src_iter;
976
+
977
+ //if (loop_spec==0) return;
978
+
979
+ n_total = lp->user.n[0];
980
+ for (i=1; i<lp->user.ndim; i++) {
981
+ n_total *= lp->user.n[i];
982
+ }
983
+
984
+ //for (j=0; j<lp->nin; j++) {
985
+ for (j=0; j<lp->narg; j++) {
986
+ //ndim = nd = lp->user.ndim;
987
+ ndim = nd = LARG(lp,j).ndim;
988
+ sz = elmsz = LARG(lp,j).elmsz;
989
+ src_iter = LARG(lp,j).iter;
990
+ //last_step = src_iter[ndim-1].step;
991
+ f = 0;
992
+ zero_step = 1;
993
+ for (i=ndim; i>0; ) {
994
+ i--;
995
+ if (LARG(lp,j).shape) {
996
+ n = LARG(lp,j).shape[i];
997
+ } else {
998
+ printf("shape is NULL\n");
999
+ n = lp->user.n[i];
1000
+ }
1001
+ stride = sz * n;
1002
+ //printf("{j=%d,i=%d,ndim=%d,nd=%d,idx=%lx,step=%ld,n=%ld,sz=%ld,stride=%ld}\n",j,i,ndim,nd,(size_t)src_iter[i].idx,src_iter[i].step,n,sz,stride);
1003
+ if (src_iter[i].idx) {
1004
+ f |= 2; // INDEX LOOP
1005
+ zero_step = 0;
1006
+ } else {
1007
+ if (src_iter[i].step != sz) {
1008
+ f |= 1; // NON_CONTIGUOUS LOOP
1009
+ } else {
1010
+ // CONTIGUOUS LOOP
1011
+ if (i==ndim-1) { // contract if last dimension
1012
+ ndim = i;
1013
+ elmsz = stride;
1014
+ }
1015
+ }
1016
+ if (src_iter[i].step != 0) {
1017
+ zero_step = 0;
1018
+ }
1019
+ }
1020
+ sz = stride;
1021
+ }
1022
+ //printf("[j=%d f=%d loop_spec=%d zero_step=%d]\n",j,f,loop_spec,zero_step);
1023
+
1024
+ if (zero_step) {
1025
+ // no buffer needed
1026
+ continue;
1027
+ }
1028
+
1029
+ // should check flatten-able loop to avoid buffering
1030
+
1031
+
1032
+ // over loop_spec or reduce_loop is not contiguous
1033
+ if (f & loop_spec || (lp->reduce_dim > 1 && ndim > 0)) {
1034
+ //printf("(buf,nd=%d)",nd);
1035
+ buf_iter = ALLOC_N(na_loop_iter_t,nd+3);
1036
+ buf_shape = ALLOC_N(size_t,nd);
1037
+ buf_iter[nd].pos = 0;
1038
+ buf_iter[nd].step = 0;
1039
+ buf_iter[nd].idx = NULL;
1040
+ sz = LARG(lp,j).elmsz;
1041
+ //last_step = sz;
1042
+ for (i=nd; i>0; ) {
1043
+ i--;
1044
+ buf_iter[i].pos = 0;
1045
+ buf_iter[i].step = sz;
1046
+ buf_iter[i].idx = NULL;
1047
+ //n = lp->user.n[i];
1048
+ n = LARG(lp,j).shape[i];
1049
+ buf_shape[i] = n;
1050
+ sz *= n;
1051
+ }
1052
+ LBUFCP(lp,j) = ALLOC(na_buffer_copy_t);
1053
+ LBUFCP(lp,j)->ndim = ndim;
1054
+ LBUFCP(lp,j)->elmsz = elmsz;
1055
+ LBUFCP(lp,j)->n = buf_shape;
1056
+ LBUFCP(lp,j)->src_iter = src_iter;
1057
+ LBUFCP(lp,j)->buf_iter = buf_iter;
1058
+ LARG(lp,j).iter = buf_iter;
1059
+ //printf("in ndfunc_set_bufcp(1): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1060
+ LBUFCP(lp,j)->src_ptr = LARG(lp,j).ptr;
1061
+ LARG(lp,j).ptr = LBUFCP(lp,j)->buf_ptr = xmalloc(sz);
1062
+ //printf("(LBUFCP(lp,%d)->buf_ptr=%lx)\n",j,(size_t)(LBUFCP(lp,j)->buf_ptr));
1063
+ }
1064
+ }
1065
+
1066
+ #if 0
1067
+ for (j=0; j<lp->narg; j++) {
1068
+ ndim = lp->user.ndim;
1069
+ src_iter = LARG(lp,j).iter;
1070
+ last_step = src_iter[ndim-1].step;
1071
+ if (lp->reduce_dim>1) {
1072
+ //printf("(reduce_dim=%d,ndim=%d,nd=%d,n=%ld,lst=%ld)\n",lp->reduce_dim,ndim,nd,n_total,last_step);
1073
+ buf_iter = ALLOC_N(na_loop_iter_t,2);
1074
+ buf_iter[0].pos = LARG(lp,j).iter[0].pos;
1075
+ buf_iter[0].step = last_step;
1076
+ buf_iter[0].idx = NULL;
1077
+ buf_iter[1].pos = 0;
1078
+ buf_iter[1].step = 0;
1079
+ buf_iter[1].idx = NULL;
1080
+ LARG(lp,j).iter = buf_iter;
1081
+ //printf("in ndfunc_set_bufcp(2): lp->user.args[%d].iter=%lx\n",j,(size_t)(LARG(lp,j).iter));
1082
+ lp->xargs[j].free_user_iter = 1;
1083
+ }
1084
+ }
1085
+ #endif
1086
+
1087
+ // flatten reduce dimensions
1088
+ if (lp->reduce_dim > 1) {
1089
+ #if 1
1090
+ for (j=0; j<lp->narg; j++) {
1091
+ ndim = lp->user.ndim;
1092
+ LARG(lp,j).iter[0].step = LARG(lp,j).iter[ndim-1].step;
1093
+ LARG(lp,j).iter[0].idx = NULL;
1094
+ }
1095
+ #endif
1096
+ lp->user.n[0] = n_total;
1097
+ lp->user.ndim = 1;
1098
+ }
1099
+ }
1100
+
1101
+
1102
+ static void
1103
+ ndloop_copy_to_buffer(na_buffer_copy_t *lp)
1104
+ {
1105
+ size_t *c;
1106
+ char *src, *buf;
1107
+ int i;
1108
+ int nd = lp->ndim;
1109
+ size_t elmsz = lp->elmsz;
1110
+ size_t buf_pos = 0;
1111
+ DBG(size_t j);
1112
+
1113
+ //printf("\nto_buf nd=%d elmsz=%ld\n",nd,elmsz);
1114
+ DBG(printf("<to buf> ["));
1115
+ // zero-dimension
1116
+ if (nd==0) {
1117
+ src = lp->src_ptr + LITER_SRC(lp,0).pos;
1118
+ buf = lp->buf_ptr;
1119
+ memcpy(buf,src,elmsz);
1120
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(buf))[j]);});
1121
+ goto loop_end;
1122
+ }
1123
+ // initialize loop counter
1124
+ c = ALLOCA_N(size_t, nd+1);
1125
+ for (i=0; i<=nd; i++) c[i]=0;
1126
+ // loop body
1127
+ for (i=0;;) {
1128
+ // i-th dimension
1129
+ for (; i<nd; i++) {
1130
+ if (LITER_SRC(lp,i).idx) {
1131
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).idx[c[i]];
1132
+ } else {
1133
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).step*c[i];
1134
+ }
1135
+ }
1136
+ src = lp->src_ptr + LITER_SRC(lp,nd).pos;
1137
+ buf = lp->buf_ptr + buf_pos;
1138
+ memcpy(buf,src,elmsz);
1139
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(buf))[j]);});
1140
+ buf_pos += elmsz;
1141
+ // count up
1142
+ for (;;) {
1143
+ if (i<=0) goto loop_end;
1144
+ i--;
1145
+ if (++c[i] < lp->n[i]) break;
1146
+ c[i] = 0;
1147
+ }
1148
+ }
1149
+ loop_end:
1150
+ ;
1151
+ DBG(printf("]\n"));
1152
+ }
1153
+
1154
+ static void
1155
+ ndloop_copy_from_buffer(na_buffer_copy_t *lp)
1156
+ {
1157
+ size_t *c;
1158
+ char *src, *buf;
1159
+ int i;
1160
+ int nd = lp->ndim;
1161
+ size_t elmsz = lp->elmsz;
1162
+ size_t buf_pos = 0;
1163
+ DBG(size_t j);
1164
+
1165
+ //printf("\nfrom_buf nd=%d elmsz=%ld\n",nd,elmsz);
1166
+ DBG(printf("<from buf> ["));
1167
+ // zero-dimension
1168
+ if (nd==0) {
1169
+ src = lp->src_ptr + LITER_SRC(lp,0).pos;
1170
+ buf = lp->buf_ptr;
1171
+ memcpy(src,buf,elmsz);
1172
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(src))[j]);});
1173
+ goto loop_end;
1174
+ }
1175
+ // initialize loop counter
1176
+ c = ALLOCA_N(size_t, nd+1);
1177
+ for (i=0; i<=nd; i++) c[i]=0;
1178
+ // loop body
1179
+ for (i=0;;) {
1180
+ // i-th dimension
1181
+ for (; i<nd; i++) {
1182
+ if (LITER_SRC(lp,i).idx) {
1183
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).idx[c[i]];
1184
+ } else {
1185
+ LITER_SRC(lp,i+1).pos = LITER_SRC(lp,i).pos + LITER_SRC(lp,i).step*c[i];
1186
+ }
1187
+ }
1188
+ src = lp->src_ptr + LITER_SRC(lp,nd).pos;
1189
+ buf = lp->buf_ptr + buf_pos;
1190
+ memcpy(src,buf,elmsz);
1191
+ DBG(for (j=0; j<elmsz/8; j++) {printf("%g,",((double*)(src))[j]);});
1192
+ buf_pos += elmsz;
1193
+ // count up
1194
+ for (;;) {
1195
+ if (i<=0) goto loop_end;
1196
+ i--;
1197
+ if (++c[i] < lp->n[i]) break;
1198
+ c[i] = 0;
1199
+ }
1200
+ }
1201
+ loop_end:
1202
+ DBG(printf("]\n"));
1203
+ }
1204
+
1205
+
1206
+ static void
1207
+ ndfunc_write_back(ndfunc_t *nf, na_md_loop_t *lp, VALUE orig_args, VALUE results)
1208
+ {
1209
+ VALUE src, dst;
1210
+
1211
+ if (lp->writeback >= 0) {
1212
+ dst = RARRAY_AREF(orig_args,lp->writeback);
1213
+ src = RARRAY_AREF(results,0);
1214
+ na_store(dst,src);
1215
+ RARRAY_ASET(results,0,dst);
1216
+ }
1217
+ }
1218
+
1219
+
1220
+ static VALUE
1221
+ ndloop_extract(VALUE results, ndfunc_t *nf)
1222
+ {
1223
+ static ID id_extract = 0;
1224
+ long n, i;
1225
+ VALUE x, y;
1226
+ narray_t *na;
1227
+
1228
+ if (id_extract==0) {
1229
+ id_extract = rb_intern("extract");
1230
+ }
1231
+
1232
+ // extract result objects
1233
+ switch(nf->nout) {
1234
+ case 0:
1235
+ return Qnil;
1236
+ case 1:
1237
+ x = RARRAY_AREF(results,0);
1238
+ if (NDF_TEST(nf,NDF_EXTRACT)) {
1239
+ if (IsNArray(x)){
1240
+ GetNArray(x,na);
1241
+ if (NA_NDIM(na)==0) {
1242
+ x = rb_funcall(x, id_extract, 0);
1243
+ }
1244
+ }
1245
+ }
1246
+ return x;
1247
+ }
1248
+ if (NDF_TEST(nf,NDF_EXTRACT)) {
1249
+ n = RARRAY_LEN(results);
1250
+ for (i=0; i<n; i++) {
1251
+ x = RARRAY_AREF(results,i);
1252
+ if (IsNArray(x)){
1253
+ GetNArray(x,na);
1254
+ if (NA_NDIM(na)==0) {
1255
+ y = rb_funcall(x, id_extract, 0);
1256
+ RARRAY_ASET(results,i,y);
1257
+ }
1258
+ }
1259
+ }
1260
+ }
1261
+ return results;
1262
+ }
1263
+
1264
+
1265
+ static void
1266
+ loop_narray(ndfunc_t *nf, na_md_loop_t *lp);
1267
+
1268
+ static VALUE
1269
+ ndloop_run(VALUE vlp)
1270
+ {
1271
+ unsigned int loop_spec;
1272
+ volatile VALUE args, orig_args, results;
1273
+ na_md_loop_t *lp = (na_md_loop_t*)(vlp);
1274
+ ndfunc_t *nf;
1275
+
1276
+ orig_args = lp->vargs;
1277
+ nf = lp->ndfunc;
1278
+
1279
+ args = rb_obj_dup(orig_args);
1280
+
1281
+ // setup ndloop iterator with arguments
1282
+ ndloop_init_args(nf, lp, args);
1283
+ results = ndloop_set_output(nf, lp, args);
1284
+
1285
+ //if (na_debug_flag) {
1286
+ // printf("-- ndloop_set_output --\n");
1287
+ // print_ndloop(lp);
1288
+ //}
1289
+
1290
+ // contract loop
1291
+ if (lp->loop_func == loop_narray) {
1292
+ ndfunc_contract_loop(lp);
1293
+ //if (na_debug_flag) {
1294
+ // printf("-- ndfunc_contract_loop --\n");
1295
+ // print_ndloop(lp);
1296
+ //}
1297
+ }
1298
+
1299
+ // setup objects in which resuts are stored
1300
+ ndfunc_set_user_loop(nf, lp);
1301
+
1302
+ // setup buffering during loop
1303
+ if (lp->loop_func == loop_narray) {
1304
+ loop_spec = ndloop_func_loop_spec(nf, lp->user.ndim);
1305
+ ndfunc_set_bufcp(lp, loop_spec);
1306
+ if (na_debug_flag) {
1307
+ printf("-- ndfunc_set_bufcp --\n");
1308
+ print_ndloop(lp);
1309
+ }
1310
+ }
1311
+
1312
+ // loop
1313
+ (*(lp->loop_func))(nf, lp);
1314
+
1315
+ if (na_debug_flag) {
1316
+ printf("-- after loop --\n");
1317
+ print_ndloop(lp);
1318
+ }
1319
+
1320
+ if (RTEST(lp->user.err_type)) {
1321
+ rb_raise(lp->user.err_type, "error in NArray operation");
1322
+ }
1323
+
1324
+ // write-back will be placed here
1325
+ ndfunc_write_back(nf, lp, orig_args, results);
1326
+
1327
+ // extract result objects
1328
+ return ndloop_extract(results, nf);
1329
+ }
1330
+
1331
+
1332
+ // ---------------------------------------------------------------------------
1333
+
1334
+ static void
1335
+ loop_narray(ndfunc_t *nf, na_md_loop_t *lp)
1336
+ {
1337
+ size_t *c;
1338
+ int i, j;
1339
+ int nd = lp->ndim;
1340
+
1341
+ if (nd<0) {
1342
+ rb_bug("bug? lp->ndim = %d\n", lp->ndim);
1343
+ }
1344
+
1345
+ if (nd==0) {
1346
+ for (j=0; j<lp->nin; j++) {
1347
+ if (lp->xargs[j].bufcp) {
1348
+ //printf("copy_to_buffer j=%d\n",j);
1349
+ ndloop_copy_to_buffer(lp->xargs[j].bufcp);
1350
+ }
1351
+ }
1352
+ (*(nf->func))(&(lp->user));
1353
+ for (j=0; j<lp->narg; j++) {
1354
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1355
+ //printf("copy_from_buffer j=%d\n",j);
1356
+ // copy data to work buffer
1357
+ ndloop_copy_from_buffer(lp->xargs[j].bufcp);
1358
+ }
1359
+ }
1360
+ return;
1361
+ }
1362
+
1363
+ // initialize loop counter
1364
+ c = ALLOCA_N(size_t, nd+1);
1365
+ for (i=0; i<=nd; i++) c[i]=0;
1366
+
1367
+ // loop body
1368
+ for (i=0;;) {
1369
+ // i-th dimension
1370
+ for (; i<nd; i++) {
1371
+ // j-th argument
1372
+ for (j=0; j<lp->narg; j++) {
1373
+ if (LITER(lp,i,j).idx) {
1374
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).idx[c[i]];
1375
+ } else {
1376
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).step*c[i];
1377
+ }
1378
+ //printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
1379
+ }
1380
+ }
1381
+ for (j=0; j<lp->nin; j++) {
1382
+ if (lp->xargs[j].bufcp) {
1383
+ // copy data to work buffer
1384
+ // cp lp->iter[j][nd..*] to lp->user.args[j].iter[0..*]
1385
+ //printf("copy_to_buffer j=%d\n",j);
1386
+ ndloop_copy_to_buffer(lp->xargs[j].bufcp);
1387
+ }
1388
+ }
1389
+ (*(nf->func))(&(lp->user));
1390
+ for (j=0; j<lp->narg; j++) {
1391
+ if (lp->xargs[j].bufcp && (lp->xargs[j].flag & NDL_WRITE)) {
1392
+ // copy data to work buffer
1393
+ //printf("copy_from_buffer j=%d\n",j);
1394
+ ndloop_copy_from_buffer(lp->xargs[j].bufcp);
1395
+ }
1396
+ }
1397
+ if (RTEST(lp->user.err_type)) {return;}
1398
+
1399
+ for (;;) {
1400
+ if (i<=0) goto loop_end;
1401
+ i--;
1402
+ if (++c[i] < lp->n[i]) break;
1403
+ c[i] = 0;
1404
+ }
1405
+ }
1406
+ loop_end:
1407
+ ;
1408
+ }
1409
+
1410
+
1411
+ VALUE
1412
+ na_ndloop_main(ndfunc_t *nf, VALUE args, void *opt_ptr)
1413
+ {
1414
+ unsigned int copy_flag;
1415
+ na_md_loop_t lp;
1416
+
1417
+ if (na_debug_flag) print_ndfunc(nf);
1418
+
1419
+ // cast arguments to NArray
1420
+ copy_flag = ndloop_cast_args(nf, args);
1421
+
1422
+ // allocate ndloop struct
1423
+ ndloop_alloc(&lp, nf, args, opt_ptr, copy_flag, loop_narray);
1424
+
1425
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1426
+ }
1427
+
1428
+
1429
+ VALUE
1430
+ #ifdef HAVE_STDARG_PROTOTYPES
1431
+ na_ndloop(ndfunc_t *nf, int argc, ...)
1432
+ #else
1433
+ na_ndloop(nf, argc, va_alist)
1434
+ ndfunc_t *nf;
1435
+ int argc;
1436
+ va_dcl
1437
+ #endif
1438
+ {
1439
+ va_list ar;
1440
+
1441
+ int i;
1442
+ VALUE *argv;
1443
+ volatile VALUE args;
1444
+
1445
+ argv = ALLOCA_N(VALUE,argc);
1446
+
1447
+ va_init_list(ar, argc);
1448
+ for (i=0; i<argc; i++) {
1449
+ argv[i] = va_arg(ar, VALUE);
1450
+ }
1451
+ va_end(ar);
1452
+
1453
+ args = rb_ary_new4(argc, argv);
1454
+
1455
+ return na_ndloop_main(nf, args, NULL);
1456
+ }
1457
+
1458
+
1459
+ VALUE
1460
+ na_ndloop2(ndfunc_t *nf, VALUE args)
1461
+ {
1462
+ return na_ndloop_main(nf, args, NULL);
1463
+ }
1464
+
1465
+ VALUE
1466
+ #ifdef HAVE_STDARG_PROTOTYPES
1467
+ na_ndloop3(ndfunc_t *nf, void *ptr, int argc, ...)
1468
+ #else
1469
+ na_ndloop3(nf, ptr, argc, va_alist)
1470
+ ndfunc_t *nf;
1471
+ void *ptr;
1472
+ int argc;
1473
+ va_dcl
1474
+ #endif
1475
+ {
1476
+ va_list ar;
1477
+
1478
+ int i;
1479
+ VALUE *argv;
1480
+ volatile VALUE args;
1481
+
1482
+ argv = ALLOCA_N(VALUE,argc);
1483
+
1484
+ va_init_list(ar, argc);
1485
+ for (i=0; i<argc; i++) {
1486
+ argv[i] = va_arg(ar, VALUE);
1487
+ }
1488
+ va_end(ar);
1489
+
1490
+ args = rb_ary_new4(argc, argv);
1491
+
1492
+ return na_ndloop_main(nf, args, ptr);
1493
+ }
1494
+
1495
+ VALUE
1496
+ na_ndloop4(ndfunc_t *nf, void *ptr, VALUE args)
1497
+ {
1498
+ return na_ndloop_main(nf, args, ptr);
1499
+ }
1500
+
1501
+ //----------------------------------------------------------------------
1502
+
1503
+ VALUE
1504
+ na_info_str(VALUE ary)
1505
+ {
1506
+ int nd, i;
1507
+ char tmp[32];
1508
+ VALUE buf;
1509
+ narray_t *na;
1510
+
1511
+ GetNArray(ary,na);
1512
+ nd = na->ndim;
1513
+
1514
+ buf = rb_str_new2(rb_class2name(CLASS_OF(ary)));
1515
+ if (NA_TYPE(na) == NARRAY_VIEW_T) {
1516
+ rb_str_cat(buf,"(view)",6);
1517
+ }
1518
+ rb_str_cat(buf,"#shape=[",8);
1519
+ if (nd>0) {
1520
+ for (i=0;;) {
1521
+ sprintf(tmp,"%"SZF"u",na->shape[i]);
1522
+ rb_str_cat2(buf,tmp);
1523
+ if (++i==nd) break;
1524
+ rb_str_cat(buf,",",1);
1525
+ }
1526
+ }
1527
+ rb_str_cat(buf,"]",1);
1528
+ return buf;
1529
+ }
1530
+
1531
+
1532
+ //----------------------------------------------------------------------
1533
+
1534
+ static void
1535
+ ndloop_inspect_get_width(int *ncol, int *nrow)
1536
+ {
1537
+ VALUE cols, rows;
1538
+
1539
+ cols = rb_ivar_get(cNArray,rb_intern("inspect_cols"));
1540
+ if (RTEST(cols)) {
1541
+ *ncol = NUM2INT(cols);
1542
+ } else {
1543
+ *ncol = 80;
1544
+ }
1545
+
1546
+ rows = rb_ivar_get(cNArray,rb_intern("inspect_rows"));
1547
+ if (RTEST(rows)) {
1548
+ *nrow = NUM2INT(rows);
1549
+ } else {
1550
+ *nrow = 20;
1551
+ }
1552
+ }
1553
+
1554
+ static void
1555
+ loop_inspect(ndfunc_t *nf, na_md_loop_t *lp)
1556
+ {
1557
+ int nd, i, ii;
1558
+ size_t *c;
1559
+ int col=0, row=0;
1560
+ long len;
1561
+ VALUE str;
1562
+ int ncol;
1563
+ int nrow;
1564
+ na_text_func_t func = (na_text_func_t)(nf->func);
1565
+ VALUE buf, opt;
1566
+
1567
+ nd = lp->ndim;
1568
+ buf = lp->loop_opt;
1569
+ //opt = *(VALUE*)(lp->user.opt_ptr);
1570
+ opt = lp->user.option;
1571
+
1572
+ for (i=0; i<nd; i++) {
1573
+ if (lp->n[i] == 0) {
1574
+ rb_str_cat(buf,"[]",2);
1575
+ return;
1576
+ }
1577
+ }
1578
+
1579
+ rb_str_cat(buf,"\n",1);
1580
+
1581
+ c = ALLOCA_N(size_t, nd+1);
1582
+ for (i=0; i<=nd; i++) c[i]=0;
1583
+
1584
+ if (nd>0) {
1585
+ rb_str_cat(buf,"[",1);
1586
+ } else {
1587
+ rb_str_cat(buf,"",0);
1588
+ }
1589
+
1590
+ ndloop_inspect_get_width(&ncol,&nrow);
1591
+
1592
+ col = nd*2;
1593
+ for (i=0;;) {
1594
+ if (i<nd-1) {
1595
+ for (ii=0; ii<i; ii++) rb_str_cat(buf," ",1);
1596
+ for (; ii<nd-1; ii++) rb_str_cat(buf,"[",1);
1597
+ }
1598
+ for (; i<nd; i++) {
1599
+ if (LITER(lp,i,0).idx) {
1600
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).idx[c[i]];
1601
+ } else {
1602
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).step*c[i];
1603
+ }
1604
+ }
1605
+ str = (*func)(LARG(lp,0).ptr, LITER(lp,i,0).pos, opt);
1606
+
1607
+ len = RSTRING_LEN(str) + 2;
1608
+ if (ncol>0 && col+len > ncol-3) {
1609
+ rb_str_cat(buf,"...",3);
1610
+ c[i-1] = lp->n[i-1];
1611
+ } else {
1612
+ rb_str_append(buf, str);
1613
+ col += len;
1614
+ }
1615
+ for (;;) {
1616
+ if (i==0) goto loop_end;
1617
+ i--;
1618
+ if (++c[i] < lp->n[i]) break;
1619
+ rb_str_cat(buf,"]",1);
1620
+ c[i] = 0;
1621
+ }
1622
+ //line_break:
1623
+ rb_str_cat(buf,", ",2);
1624
+ if (i<nd-1) {
1625
+ rb_str_cat(buf,"\n ",2);
1626
+ col = nd*2;
1627
+ row++;
1628
+ if (row==nrow) {
1629
+ rb_str_cat(buf,"...",3);
1630
+ goto loop_end;
1631
+ }
1632
+ }
1633
+ }
1634
+ loop_end:
1635
+ ;
1636
+ }
1637
+
1638
+
1639
+ VALUE
1640
+ na_ndloop_inspect(VALUE nary, na_text_func_t func, VALUE opt)
1641
+ {
1642
+ volatile VALUE args;
1643
+ na_md_loop_t lp;
1644
+ VALUE buf;
1645
+ ndfunc_arg_in_t ain[3] = {{Qnil,0},{sym_loop_opt},{sym_option}};
1646
+ ndfunc_t nf = { (na_iter_func_t)func, NO_LOOP, 3, 0, ain, 0 };
1647
+ //nf = ndfunc_alloc(NULL, NO_LOOP, 1, 0, Qnil);
1648
+
1649
+ buf = na_info_str(nary);
1650
+
1651
+ if (na_get_pointer(nary)==NULL) {
1652
+ return rb_str_cat(buf,"(empty)",7);
1653
+ }
1654
+
1655
+ //rb_p(args);
1656
+ //if (na_debug_flag) print_ndfunc(&nf);
1657
+
1658
+ args = rb_ary_new3(3,nary,buf,opt);
1659
+
1660
+ // cast arguments to NArray
1661
+ //ndloop_cast_args(nf, args);
1662
+
1663
+ // allocate ndloop struct
1664
+ ndloop_alloc(&lp, &nf, args, NULL, 0, loop_inspect);
1665
+
1666
+ rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1667
+
1668
+ return buf;
1669
+ }
1670
+
1671
+
1672
+ //----------------------------------------------------------------------
1673
+
1674
+ static void
1675
+ loop_rarray_to_narray(ndfunc_t *nf, na_md_loop_t *lp)
1676
+ {
1677
+ size_t *c;
1678
+ int i;
1679
+ VALUE *a;
1680
+ int nd = lp->ndim;
1681
+
1682
+ // counter
1683
+ c = ALLOCA_N(size_t, nd+1);
1684
+ for (i=0; i<=nd; i++) c[i]=0;
1685
+
1686
+ // array at each dimension
1687
+ a = ALLOCA_N(VALUE, nd+1);
1688
+ a[0] = LARG(lp,0).value;
1689
+
1690
+ // loop body
1691
+ for (i=0;;) {
1692
+ for (; i<nd; i++) {
1693
+ if (LITER(lp,i,1).idx) {
1694
+ LITER(lp,i+1,1).pos = LITER(lp,i,1).pos + LITER(lp,i,1).idx[c[i]];
1695
+ } else {
1696
+ LITER(lp,i+1,1).pos = LITER(lp,i,1).pos + LITER(lp,i,1).step*c[i];
1697
+ }
1698
+ //LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + c[i];
1699
+ if (TYPE(a[i])==T_ARRAY) {
1700
+ if (c[i] < (size_t)RARRAY_LEN(a[i])) {
1701
+ a[i+1] = RARRAY_AREF(a[i],c[i]);
1702
+ } else {
1703
+ a[i+1] = Qnil;
1704
+ }
1705
+ } else { // not Array -- what about narray?
1706
+ if (c[i]==0) {
1707
+ a[i+1] = a[i];
1708
+ } else {
1709
+ a[i+1] = Qnil;
1710
+ }
1711
+ }
1712
+ //printf("c[i]=%d, i=%d\n",c[i],i);
1713
+ }
1714
+
1715
+ //printf("a[i]=0x%x, i=%d\n",a[i],i);
1716
+ LARG(lp,0).value = a[i];
1717
+
1718
+ (*(nf->func))(&(lp->user));
1719
+
1720
+ for (;;) {
1721
+ if (i<=0) goto loop_end;
1722
+ i--; c[i]++;
1723
+ if (c[i] < lp->n[i]) break;
1724
+ c[i] = 0;
1725
+ }
1726
+ }
1727
+ loop_end:
1728
+ ;
1729
+ }
1730
+
1731
+ VALUE
1732
+ na_ndloop_cast_rarray_to_narray(ndfunc_t *nf, VALUE rary, VALUE nary)
1733
+ {
1734
+ na_md_loop_t lp;
1735
+ VALUE args;
1736
+
1737
+ //rb_p(args);
1738
+ if (na_debug_flag) print_ndfunc(nf);
1739
+
1740
+ args = rb_assoc_new(rary,nary);
1741
+
1742
+ // cast arguments to NArray
1743
+ //ndloop_cast_args(nf, args);
1744
+
1745
+ // allocate ndloop struct
1746
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_rarray_to_narray);
1747
+
1748
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1749
+ }
1750
+
1751
+
1752
+ VALUE
1753
+ na_ndloop_cast_rarray_to_narray2(ndfunc_t *nf, VALUE rary, VALUE nary, VALUE opt)
1754
+ {
1755
+ na_md_loop_t lp;
1756
+ VALUE args;
1757
+
1758
+ //rb_p(args);
1759
+ if (na_debug_flag) print_ndfunc(nf);
1760
+
1761
+ //args = rb_assoc_new(rary,nary);
1762
+ args = rb_ary_new3(3,rary,nary,opt);
1763
+
1764
+ // cast arguments to NArray
1765
+ //ndloop_cast_args(nf, args);
1766
+
1767
+ // allocate ndloop struct
1768
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_rarray_to_narray);
1769
+
1770
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1771
+ }
1772
+
1773
+
1774
+ //----------------------------------------------------------------------
1775
+
1776
+ static void
1777
+ loop_narray_to_rarray(ndfunc_t *nf, na_md_loop_t *lp)
1778
+ {
1779
+ size_t *c;
1780
+ int i;
1781
+ //int nargs = nf->narg + nf->nres;
1782
+ int nd = lp->ndim;
1783
+ VALUE *a;
1784
+ volatile VALUE a0;
1785
+
1786
+ // alloc counter
1787
+ c = ALLOCA_N(size_t, nd+1);
1788
+ for (i=0; i<=nd; i++) c[i]=0;
1789
+ //c[i]=1; // for zero-dim
1790
+ //fprintf(stderr,"in loop_narray_to_rarray, nd=%d\n",nd);
1791
+
1792
+ a = ALLOCA_N(VALUE, nd+1);
1793
+ a[0] = a0 = lp->loop_opt;
1794
+
1795
+ // loop body
1796
+ for (i=0;;) {
1797
+ for (; i<nd; i++) {
1798
+ if (LITER(lp,i,0).idx) {
1799
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).idx[c[i]];
1800
+ } else {
1801
+ LITER(lp,i+1,0).pos = LITER(lp,i,0).pos + LITER(lp,i,0).step*c[i];
1802
+ }
1803
+ if (c[i]==0) {
1804
+ a[i+1] = rb_ary_new2(lp->n[i]);
1805
+ rb_ary_push(a[i],a[i+1]);
1806
+ }
1807
+ }
1808
+
1809
+ //lp->user.info = a[i];
1810
+ LARG(lp,1).value = a[i];
1811
+ (*(nf->func))(&(lp->user));
1812
+
1813
+ for (;;) {
1814
+ if (i<=0) goto loop_end;
1815
+ i--;
1816
+ if (++c[i] < lp->n[i]) break;
1817
+ c[i] = 0;
1818
+ }
1819
+ }
1820
+ loop_end:
1821
+ ;
1822
+ }
1823
+
1824
+ VALUE
1825
+ na_ndloop_cast_narray_to_rarray(ndfunc_t *nf, VALUE nary, VALUE fmt)
1826
+ {
1827
+ na_md_loop_t lp;
1828
+ VALUE args, a0;
1829
+
1830
+ //rb_p(args);
1831
+ if (na_debug_flag) print_ndfunc(nf);
1832
+
1833
+ a0 = rb_ary_new();
1834
+ args = rb_ary_new3(3,nary,a0,fmt);
1835
+
1836
+ // cast arguments to NArray
1837
+ //ndloop_cast_args(nf, args);
1838
+
1839
+ // allocate ndloop struct
1840
+ ndloop_alloc(&lp, nf, args, NULL, 0, loop_narray_to_rarray);
1841
+
1842
+ rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1843
+ return RARRAY_AREF(a0,0);
1844
+ }
1845
+
1846
+
1847
+ //----------------------------------------------------------------------
1848
+
1849
+ static void
1850
+ loop_narray_with_index(ndfunc_t *nf, na_md_loop_t *lp)
1851
+ {
1852
+ size_t *c;
1853
+ int i,j;
1854
+ int nd = lp->ndim;
1855
+
1856
+ // pass total ndim to iterator
1857
+ lp->user.ndim += nd;
1858
+
1859
+ // alloc counter
1860
+ lp->user.opt_ptr = c = ALLOCA_N(size_t, nd+1);
1861
+ for (i=0; i<=nd; i++) c[i]=0;
1862
+
1863
+ // loop body
1864
+ for (i=0;;) {
1865
+ for (; i<nd; i++) {
1866
+ // j-th argument
1867
+ for (j=0; j<lp->narg; j++) {
1868
+ if (LITER(lp,i,j).idx) {
1869
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).idx[c[i]];
1870
+ } else {
1871
+ LITER(lp,i+1,j).pos = LITER(lp,i,j).pos + LITER(lp,i,j).step*c[i];
1872
+ }
1873
+ //printf("j=%d c[i=%d]=%lu pos=%lu\n",j,i,c[i],LITER(lp,i+1,j).pos);
1874
+ }
1875
+ }
1876
+
1877
+ (*(nf->func))(&(lp->user));
1878
+
1879
+ for (;;) {
1880
+ if (i<=0) goto loop_end;
1881
+ i--;
1882
+ if (++c[i] < lp->n[i]) break;
1883
+ c[i] = 0;
1884
+ }
1885
+ }
1886
+ loop_end:
1887
+ ;
1888
+ }
1889
+
1890
+
1891
+ VALUE
1892
+ #ifdef HAVE_STDARG_PROTOTYPES
1893
+ na_ndloop_with_index(ndfunc_t *nf, int argc, ...)
1894
+ #else
1895
+ na_ndloop(nf, argc, va_alist)
1896
+ ndfunc_t *nf;
1897
+ int argc;
1898
+ va_dcl
1899
+ #endif
1900
+ {
1901
+ va_list ar;
1902
+
1903
+ int i;
1904
+ VALUE *argv;
1905
+ volatile VALUE args;
1906
+ na_md_loop_t lp;
1907
+
1908
+ argv = ALLOCA_N(VALUE,argc);
1909
+
1910
+ va_init_list(ar, argc);
1911
+ for (i=0; i<argc; i++) {
1912
+ argv[i] = va_arg(ar, VALUE);
1913
+ }
1914
+ va_end(ar);
1915
+
1916
+ args = rb_ary_new4(argc, argv);
1917
+
1918
+ //return na_ndloop_main(nf, args, NULL);
1919
+ if (na_debug_flag) print_ndfunc(nf);
1920
+
1921
+ // cast arguments to NArray
1922
+ //copy_flag = ndloop_cast_args(nf, args);
1923
+
1924
+ // allocate ndloop struct
1925
+ ndloop_alloc(&lp, nf, args, 0, 0, loop_narray_with_index);
1926
+
1927
+ return rb_ensure(ndloop_run, (VALUE)&lp, ndloop_release, (VALUE)&lp);
1928
+ }