narray-bigmem 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,212 @@
1
+ $type_codes = %w(n B I L G F D X C O)
2
+ $data_types =
3
+ %w(none u_int8_t int16_t int32_t int64_t float double scomplex dcomplex VALUE)
4
+ $real_types =
5
+ %w(none u_int8_t int16_t int32_t int64_t float double float double VALUE)
6
+ $int_types =
7
+ %w(none u_int8_t int16_t int32_t int64_t int32_t int32_t scomplex dcomplex VALUE)
8
+ $comp_types =
9
+ %w(none scomplex scomplex scomplex dcomplex scomplex dcomplex scomplex dcomplex VALUE)
10
+ $swap_types =
11
+ %w(none u_int8_t na_size16_t na_size32_t na_size64_t na_size32_t na_size64_t na_size64_t na_size128_t VALUE)
12
+ $upcast = [
13
+ [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
14
+ [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
15
+ [ 0, 2, 2, 3, 4, 5, 6, 7, 8, 9],
16
+ [ 0, 3, 3, 3, 4, 5, 6, 7, 8, 9],
17
+ [ 0, 4, 4, 4, 4, 5, 6, 7, 8, 9],
18
+ [ 0, 5, 5, 5, 5, 5, 6, 7, 8, 9],
19
+ [ 0, 6, 6, 6, 6, 6, 6, 8, 8, 9],
20
+ [ 0, 7, 7, 7, 7, 7, 8, 7, 8, 9],
21
+ [ 0, 8, 8, 8, 8, 8, 8, 8, 8, 9],
22
+ [ 0, 9, 9, 9, 9, 9, 9, 9, 9, 9] ]
23
+ $data_obj = [
24
+ [/[O]/,/[O]/, "
25
+ *p1 = rb_funcall(*p1,#id,1,*p2);"],
26
+ [/[O]/,/[BIL]/,"
27
+ *p1 = rb_funcall(*p1,#id,1,INT2FIX(*p2));"],
28
+ [/[O]/,/[G]/,"
29
+ *p1 = rb_funcall(*p1,#id,1,LL2NUM(*p2));"],
30
+ [/[O]/,/[FD]/, "
31
+ *p1 = rb_funcall(*p1,#id,1,rb_float_new(*p2));"],
32
+ [/[O]/,/[XC]/, "
33
+ *p1 = rb_funcall(*p1,#id,1,rb_complex_new(p2->r,p2->i));"],
34
+ [/[BIL]/,/[O]/,"
35
+ *p1 = NUM2INT(rb_funcall(INT2FIX(*p1),#id,1,*p2));"],
36
+ [/[G]/,/[O]/,"
37
+ *p1 = NUM2LL(rb_funcall(LL2NUM(*p1),#id,1,*p2));"],
38
+ [/[FD]/,/[O]/, "
39
+ *p1 = NUM2DBL(rb_funcall(rb_float_new(*p1),#id,1,*p2));"],
40
+ [/[XC]/,/[O]/, "VALUE v=rb_funcall(rb_complex_new(p1->r,p1->i),#id,1,*p2);
41
+ p1->r = NUM2REAL(v); p1->i = NUM2IMAG(v);"] ]
42
+
43
+
44
+ def mksetfuncs(name,op,id,funcs)
45
+
46
+ print "
47
+ /* ------------------------- #{name} --------------------------- */\n"
48
+ c = $type_codes
49
+ n = $type_codes.size
50
+ td = $data_types
51
+ tr = $real_types
52
+
53
+ # Function Definition
54
+
55
+ for i in 0...n
56
+ for j in 0...n
57
+ funcs.each do |k|
58
+ if c[i]=~k[0] && c[j]=~k[1]
59
+ #if i==j
60
+ # f = "memcpy(p1,p1,sizeof(typed));"
61
+ #else
62
+ f = k[2]
63
+ #end
64
+ f = f.
65
+ gsub(/p1\./,"((#{td[i]}*)p1)[i].").
66
+ gsub(/p2\./,"((#{td[j]}*)p2)[i].").
67
+ gsub(/\*p1/,"((#{td[i]}*)p1)[i]").
68
+ gsub(/\*p2/,"((#{td[j]}*)p2)[i]").
69
+ gsub(/ = /," = (#{tr[i]})").
70
+ gsub(/#id/,id).
71
+ gsub(/#op/,op).
72
+ gsub(/typed/,td[i]).
73
+ gsub(/typef/,tr[i])
74
+ puts $func_body.
75
+ gsub(/typed/,td[i]).
76
+ gsub(/typef/,tr[i]).
77
+ gsub(/#name/,name).
78
+ sub(/OPERATION/,f).
79
+ gsub(/#CC/,c[i]+c[j])
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ # function pointer array
86
+ print "\nna_setfunc_t "+name+"Funcs = {\n"
87
+ m = []
88
+ for i in 0...n
89
+ l = []
90
+ for j in 0...n
91
+ f = true
92
+ for k in funcs
93
+ if c[i]=~k[0] && c[j]=~k[1]
94
+ l += [name+c[i]+c[j]]
95
+ f = false
96
+ break
97
+ end
98
+ end
99
+ if f
100
+ l += ['TpErr']
101
+ end
102
+ end
103
+ m += [' { '+l.join(', ')+' }']
104
+ end
105
+ print m.join(",\n")+"\n};\n"
106
+
107
+ end
108
+
109
+
110
+
111
+ def mkfuncs(name,t1,t2,func,idx=["i"]*3)
112
+
113
+ print "
114
+ /* ------------------------- #{name} --------------------------- */\n"
115
+ c = $type_codes
116
+ td = $data_types
117
+ tr = $real_types
118
+
119
+ for i in 0...c.size
120
+ if func[i] != nil && func[i] != "copy"
121
+ if /Insp/ =~ name
122
+ f = func[i].
123
+ gsub(/p2\./,"(*((#{t2[i]}*)p2)).").
124
+ gsub(/\*p1/,"*((#{t1[i]}*)p1)").
125
+ gsub(/\*p2/,"*((#{t2[i]}*)p2)")
126
+ else
127
+ f = func[i].
128
+ gsub(/p1\./,"(*((#{t1[i]}*)(p1+#{idx[0]}*i1))).").
129
+ gsub(/p2\./,"(*((#{t2[i]}*)(p2+#{idx[1]}*i2))).").
130
+ gsub(/p3\./,"(*((#{t2[i]}*)(p3+#{idx[2]}*i3))).").
131
+ gsub(/\*p1/,"*((#{t1[i]}*)(p1+#{idx[0]}*i1))").
132
+ gsub(/\*p2/,"*((#{t2[i]}*)(p2+#{idx[1]}*i2))").
133
+ gsub(/\*p3/,"*((#{t2[i]}*)(p3+#{idx[2]}*i3))").
134
+ gsub(/\(u_int8_t\*\)p3/,"(u_int8_t*)(p3+#{idx[2]}*i3)").
135
+ gsub(/p2;/,'p2+i*i2;')
136
+ end
137
+ f = f.
138
+ gsub(/type1/,td[i]).
139
+ gsub(/typec/,t1[i]).
140
+ gsub(/typef/,tr[i])
141
+ puts $func_body.
142
+ gsub(/type1/,td[i]).
143
+ gsub(/typec/,t1[i]).
144
+ gsub(/typef/,tr[i]).
145
+ gsub(/#name/,name).
146
+ gsub(/OPERATION/,f).
147
+ gsub(/#C/,c[i])
148
+ end
149
+ end
150
+
151
+ # Function Array
152
+
153
+ print "\nna_func_t #{name}Funcs =\n{ "
154
+ m = []
155
+ for i in 0...c.size
156
+ if func[i] == nil
157
+ m += ['TpErr']
158
+ elsif func[i]=='copy'
159
+ m += ['Set'+c[$data_types.index(t1[i])]+c[i]]
160
+ else
161
+ m += [name+c[i]]
162
+ end
163
+ end
164
+ print m.join(", ")+" };\n"
165
+
166
+ end
167
+
168
+
169
+
170
+ def mksortfuncs(bsname,t1,t2,func)
171
+
172
+ print "
173
+ /* ------------------------- #{bsname} --------------------------- */\n"
174
+ c = $type_codes
175
+ tf = $real_types
176
+ name = bsname
177
+
178
+ # Function Definition
179
+ head = "static int #{name}#code(const void *p1, const void *p2)"
180
+ for i in 0...c.size
181
+ if func[i] != nil && func[i]=~/^\{/
182
+ f = func[i].
183
+ gsub(/p1->/,"((#{t1[i]}*)p1)->").
184
+ gsub(/p2->/,"((#{t2[i]}*)p2)->").
185
+ gsub(/\*\*p1/,"**(#{t1[i]}**)p1").
186
+ gsub(/\*\*p2/,"**(#{t2[i]}**)p2").
187
+ gsub(/\*p1/,"*(#{t1[i]}*)p1").
188
+ gsub(/\*p2/,"*(#{t2[i]}*)p2").
189
+ gsub(/typef/,tf[i])
190
+ puts( (head+f).gsub(/#code/,c[i]) )
191
+ end
192
+ end
193
+
194
+ # Function Array
195
+
196
+ print "\nna_sortfunc_t #{name}Funcs =\n{ "
197
+ m = []
198
+ for i in 0...c.size
199
+ if func[i] == nil
200
+ m += ['(int (*)(const void *, const void *))TpErrI']
201
+ elsif func[i]=='copy'
202
+ m += ['Set'+c[i]*2]
203
+ elsif !( func[i] =~ /^\{/ )
204
+ m += [func[i]]
205
+ else
206
+ m += [name+c[i]]
207
+ end
208
+ end
209
+ print m.join(", ")+" };\n"
210
+
211
+ end
212
+
@@ -0,0 +1,734 @@
1
+ require "mknafunc"
2
+
3
+ fname = "na_op.c"
4
+ $> = open(fname,"w")
5
+
6
+ upcast_ary = $upcast.collect{|i| ' {'+i.join(", ")+'}'}.join(",\n")
7
+
8
+ print <<EOM
9
+ /*
10
+ #{fname}
11
+ Automatically generated code
12
+ Numerical Array Extention for Ruby
13
+ (C) Copyright 1999-2008 by Masahiro TANAKA
14
+
15
+ This program is free software.
16
+ You can distribute/modify this program
17
+ under the same terms as Ruby itself.
18
+ NO WARRANTY.
19
+ */
20
+ #include <ruby.h>
21
+ #include "narray.h"
22
+ #include "narray_local.h"
23
+ /* isalpha(3) etc. */
24
+ #include <ctype.h>
25
+
26
+ const int na_upcast[NA_NTYPES][NA_NTYPES] = {
27
+ #{upcast_ary} };
28
+
29
+ const int na_no_cast[NA_NTYPES] =
30
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
31
+ const int na_cast_real[NA_NTYPES] =
32
+ { 0, 1, 2, 3, 4, 5, 4, 5, 8, 9 };
33
+ const int na_cast_comp[NA_NTYPES] =
34
+ { 0, 7, 7, 7, 7, 7, 8, 7, 8, 9 };
35
+ const int na_cast_round[NA_NTYPES] =
36
+ { 0, 1, 2, 3, 4, 3, 3, 7, 8, 9 };
37
+ const int na_cast_byte[NA_NTYPES] =
38
+ { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
39
+
40
+
41
+ static void TpErr(void) {
42
+ rb_raise(rb_eTypeError,"illegal operation with this type");
43
+ }
44
+ static int TpErrI(void) {
45
+ rb_raise(rb_eTypeError,"illegal operation with this type");
46
+ return 0;
47
+ }
48
+ static void na_zerodiv() {
49
+ rb_raise(rb_eZeroDivError, "divided by 0");
50
+ }
51
+
52
+ static int notnanF(float *n)
53
+ {
54
+ return *n == *n;
55
+ }
56
+ static int notnanD(double *n)
57
+ {
58
+ return *n == *n;
59
+ }
60
+ EOM
61
+
62
+
63
+ #
64
+ # Set Fucs
65
+ #
66
+ data = [
67
+ [/[O]/,/[O]/, "*p1 = *p2;"],
68
+ [/[O]/,/[BI]/, "*p1 = INT2FIX(*p2);"],
69
+ [/[O]/,/[L]/, "*p1 = INT2NUM(*p2);"],
70
+ [/[O]/,/[G]/, "*p1 = LL2NUM(*p2);"],
71
+ [/[O]/,/[FD]/, "*p1 = rb_float_new(*p2);"],
72
+ [/[O]/,/[XC]/, "*p1 = rb_complex_new(p2.r,p2.i);"],
73
+ [/[BIL]/,/[O]/, "*p1 = NUM2INT(*p2);"],
74
+ [/[G]/,/[O]/, "*p1 = NUM2LL(*p2);"],
75
+ [/[FD]/,/[O]/, "*p1 = NUM2DBL(*p2);"],
76
+ [/[XC]/,/[O]/, "p1.r = NUM2REAL(*p2); p1.i = NUM2IMAG(*p2);"],
77
+ [/[BILFDG]/,/[BILFDG]/,"*p1 = *p2;"],
78
+ [/[BILFDG]/,/[XC]/, "*p1 = p2.r;"],
79
+ [/[XC]/,/[BILFDG]/, "p1.r = *p2; p1.i = 0;"],
80
+ [/[XC]/,/[XC]/, "p1.r = p2.r; p1.i = p2.i;"] ]
81
+
82
+ $func_body =
83
+ "static void #name#CC(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
84
+ {
85
+ int i;
86
+ #pragma omp parallel for
87
+ for (i=0; i<n; i++) {
88
+ OPERATION
89
+ }
90
+ }
91
+ "
92
+ mksetfuncs('Set','','',data)
93
+
94
+
95
+
96
+ #
97
+ # Unary Funcs
98
+ #
99
+ $func_body =
100
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
101
+ {
102
+ int i;
103
+ #pragma omp parallel for
104
+ for (i=0; i<n; i++) {
105
+ OPERATION
106
+ }
107
+ }
108
+ "
109
+
110
+
111
+ mkfuncs('Swp', $swap_types, $swap_types,
112
+ [nil] +
113
+ ["*p1 = *p2;"] +
114
+ ["na_size16_t x; swap16(x,*p2); *p1 = x;"] +
115
+ ["na_size32_t x; swap32(x,*p2); *p1 = x;"] +
116
+ ["na_size64_t x; swap64(x,*p2); *p1 = x;"] +
117
+ ["na_size32_t x; swap32(x,*p2); *p1 = x;"] +
118
+ ["na_size64_t x; swap64(x,*p2); *p1 = x;"] +
119
+ ["na_size64_t x; swap64c(x,*p2); *p1 = x;"] +
120
+ ["na_size128_t x; swap128c(x,*p2); *p1 = x;"] +
121
+ ["*p1 = *p2;"]
122
+ )
123
+
124
+ print <<EOM
125
+
126
+ /* ------------------------- H2N --------------------------- */
127
+ #ifdef WORDS_BIGENDIAN
128
+
129
+ na_func_t H2NFuncs =
130
+ { TpErr, SetBB, SetII, SetLL, SetGG, SetFF, SetDD, SetXX, SetCC, SetOO };
131
+
132
+ na_func_t H2VFuncs =
133
+ { TpErr, SetBB, SwpI, SwpL, SwpG, SwpF, SwpD, SwpX, SwpC, SetOO };
134
+
135
+ #else
136
+ #ifdef DYNAMIC_ENDIAN /* not supported yet */
137
+ #else /* LITTLE ENDIAN */
138
+
139
+ na_func_t H2NFuncs =
140
+ { TpErr, SetBB, SwpI, SwpL, SwpG, SwpF, SwpD, SwpX, SwpC, SetOO };
141
+
142
+ na_func_t H2VFuncs =
143
+ { TpErr, SetBB, SetII, SetLL, SetGG, SetFF, SetDD, SetXX, SetCC, SetOO };
144
+
145
+ #endif
146
+ #endif
147
+ EOM
148
+
149
+ mkfuncs('Neg', $data_types, $data_types,
150
+ [nil] +
151
+ ["*p1 = -*p2;"]*6 +
152
+ ["p1.r = -p2.r;
153
+ p1.i = -p2.i;"]*2 +
154
+ ["*p1 = rb_funcall(*p2,na_id_minus,0);"]
155
+ )
156
+
157
+ # method: imag=
158
+ mkfuncs('ImgSet',$data_types,$real_types,
159
+ [nil]*7 +
160
+ ["p1.i = *p2;"]*2 +
161
+ [nil]
162
+ )
163
+
164
+
165
+ mkfuncs('Floor',$int_types,$data_types,[nil] +
166
+ ['copy']*4 +
167
+ ["*p1 = (typec)floor(*p2);"]*2 +
168
+ [nil]*3
169
+ )
170
+
171
+ mkfuncs('Ceil',$int_types,$data_types,[nil] +
172
+ ['copy']*4 +
173
+ ["*p1 = (typec)ceil(*p2);"]*2 +
174
+ [nil]*3
175
+ )
176
+
177
+ mkfuncs('Round',$int_types,$data_types,[nil] +
178
+ ['copy']*4 +
179
+ # ["*p1 = floor(*p2+0.5);"]*2 +
180
+ ["if (*p2 >= 0) *p1 = (typec)floor(*p2+0.5);
181
+ else *p1 = (typec)ceil(*p2-0.5);"]*2 +
182
+ [nil]*3
183
+ )
184
+
185
+ mkfuncs('Abs',$real_types,$data_types,[nil] +
186
+ ["*p1 = *p2;"] +
187
+ ["*p1 = (*p2<0) ? -*p2 : *p2;"]*5 +
188
+ ["*p1 = (typec)hypot(p2.r, p2.i);"]*2 +
189
+ ["*p1 = rb_funcall(*p2,na_id_abs,0);"]
190
+ )
191
+
192
+
193
+ mkfuncs('Real',$real_types,$data_types,[nil] +
194
+ ['copy']*8 +
195
+ [nil]
196
+ )
197
+
198
+ mkfuncs('Imag',$real_types,$data_types,[nil] +
199
+ ["*p1 = 0;"]*6 +
200
+ ["*p1 = p2.i;"]*2 +
201
+ [nil]
202
+ )
203
+
204
+ mkfuncs('Angl',$real_types,$data_types,[nil] +
205
+ [nil]*6 +
206
+ ["*p1 = atan2(p2.i,p2.r);"]*2 +
207
+ [nil]
208
+ )
209
+
210
+ mkfuncs('ImagMul',$comp_types,$data_types,[nil] +
211
+ [nil]*4 +
212
+ ["p1.r = 0; p1.i = *p2;"]*2 +
213
+ ["p1.r = -p2.i; p1.i = p2.r;"]*2 +
214
+ [nil]
215
+ )
216
+
217
+ mkfuncs('Conj',$data_types,$data_types,[nil] +
218
+ ['copy']*6 +
219
+ ["p1.r = p2.r; p1.i = -p2.i;"]*2 +
220
+ [nil]
221
+ )
222
+
223
+ mkfuncs('Not', [$data_types[1]]*10, $data_types,
224
+ [nil] +
225
+ ["*p1 = (*p2==0) ? 1:0;"]*6 +
226
+ ["*p1 = (p2.r==0 && p2.i==0) ? 1:0;"]*2 +
227
+ ["*p1 = RTEST(*p2) ? 0:1;"]
228
+ )
229
+
230
+ mkfuncs('BRv', $data_types, $data_types, [nil] +
231
+ ["*p1 = ~(*p2);"]*4 +
232
+ [nil]*4 +
233
+ ["*p1 = rb_funcall(*p2,'~',0);"]
234
+ )
235
+
236
+ mkfuncs('Min', $data_types, $data_types, [nil] +
237
+ ["if (*p1>*p2) *p1=*p2;"]*4 +
238
+ ["if (notnan#C((type1*)p2) && *p1>*p2) *p1=*p2;"]*2 +
239
+ [nil]*2 +
240
+ ["if (FIX2INT(rb_funcall(*p1,na_id_compare,1,*p2))>0) *p1=*p2;"]
241
+ )
242
+
243
+ mkfuncs('Max', $data_types, $data_types, [nil] +
244
+ ["if (*p1<*p2) *p1=*p2;"]*4 +
245
+ ["if (notnan#C((type1*)p2) && *p1<*p2) *p1=*p2;"]*2 +
246
+ [nil]*2 +
247
+ ["if (FIX2INT(rb_funcall(*p1,na_id_compare,1,*p2))<0) *p1=*p2;"]
248
+ )
249
+
250
+
251
+ mksortfuncs('Sort', $data_types, $data_types, [nil] +
252
+ ["
253
+ { if (*p1 > *p2) return 1;
254
+ if (*p1 < *p2) return -1;
255
+ return 0; }"]*6 +
256
+ [nil]*2 +
257
+ ["
258
+ { VALUE r = rb_funcall(*p1, na_id_compare, 1, *p2);
259
+ return NUM2INT(r); }"]
260
+ )
261
+
262
+ mksortfuncs('SortIdx', $data_types, $data_types, [nil] +
263
+ ["
264
+ { if (**p1 > **p2) return 1;
265
+ if (**p1 < **p2) return -1;
266
+ return 0; }"]*6 +
267
+ [nil]*2 +
268
+ ["
269
+ { VALUE r = rb_funcall(**p1, na_id_compare, 1, **p2);
270
+ return NUM2INT(r); }"]
271
+ )
272
+
273
+ $func_body =
274
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
275
+ {
276
+ int i;
277
+ if (i1 != 0) {
278
+ #pragma omp parallel for
279
+ for (i=0; i<n; i++) {
280
+ OPERATION
281
+ }
282
+ } else {
283
+ // #pragma omp parallel for reduction(+:p1)
284
+ for (i=0; i<n; i++) {
285
+ OPERATION
286
+ }
287
+ }
288
+ }
289
+ "
290
+
291
+ mkfuncs('AddU', $data_types, $data_types,
292
+ [nil] +
293
+ ["*p1 += *p2;"]*6 +
294
+ ["p1.r += p2.r;
295
+ p1.i += p2.i;"]*2 +
296
+ ["*p1 = rb_funcall(*p1,'+',1,*p2);"]
297
+ )
298
+
299
+ mkfuncs('SbtU', $data_types, $data_types,
300
+ [nil] +
301
+ ["*p1 -= *p2;"]*6 +
302
+ ["p1.r -= p2.r;
303
+ p1.i -= p2.i;"]*2 +
304
+ ["*p1 = rb_funcall(*p1,'-',1,*p2);"]
305
+ )
306
+
307
+ $func_body =
308
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
309
+ {
310
+ int i;
311
+ if (i1 != 0) {
312
+ #pragma omp parallel for
313
+ for (i=0; i<n; i++) {
314
+ OPERATION
315
+ }
316
+ } else {
317
+ // #pragma omp parallel for reduction(*:p1)
318
+ for (i=0; i<n; i++) {
319
+ OPERATION
320
+ }
321
+ }
322
+ }
323
+ "
324
+
325
+ mkfuncs('MulU', $data_types, $data_types,
326
+ [nil] +
327
+ ["*p1 *= *p2;"]*6 +
328
+ ["type1 x = *p1;
329
+ p1.r = x.r*p2.r - x.i*p2.i;
330
+ p1.i = x.r*p2.i + x.i*p2.r;"]*2 +
331
+ ["*p1 = rb_funcall(*p1,'*',1,*p2);"]
332
+ )
333
+
334
+ $func_body =
335
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
336
+ {
337
+ int i;
338
+ if (i1 != 0) {
339
+ #pragma omp parallel for
340
+ for (i=0; i<n; i++) {
341
+ OPERATION
342
+ }
343
+ } else {
344
+ // #pragma omp parallel for reduction(/:p1)
345
+ for (i=0; i<n; i++) {
346
+ OPERATION
347
+ }
348
+ }
349
+ }
350
+ "
351
+
352
+ mkfuncs('DivU', $data_types, $data_types,
353
+ [nil] +
354
+ ["if (*p2==0) {na_zerodiv();}
355
+ *p1 /= *p2;"]*4 +
356
+ ["*p1 /= *p2;"]*2 +
357
+ ["type1 x = *p1;
358
+ typef a = p2.r*p2.r + p2.i*p2.i;
359
+ p1.r = (x.r*p2.r + x.i*p2.i)/a;
360
+ p1.i = (x.i*p2.r - x.r*p2.i)/a;"]*2 +
361
+ ["*p1 = rb_funcall(*p1,'/',1,*p2);"]
362
+ )
363
+
364
+ mkfuncs('ModU', $data_types, $data_types,
365
+ [nil] +
366
+ ["if (*p2==0) {na_zerodiv();}
367
+ *p1 %= *p2;"]*4 +
368
+ ["*p1 = fmod(*p1, *p2);"]*2 +
369
+ [nil]*2 +
370
+ ["*p1 = rb_funcall(*p1,'%',1,*p2);"]
371
+ )
372
+
373
+
374
+ # indgen
375
+ $func_body =
376
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, int p2, na_shape_t i2)
377
+ {
378
+ int i;
379
+ #pragma omp parallel for
380
+ for (i=0; i<n; i++) {
381
+ OPERATION
382
+ }
383
+ }
384
+ "
385
+ mkfuncs('IndGen',$data_types,[$data_types[3]]*9,
386
+ [nil] +
387
+ ["*p1 = (typef)p2;"]*6 +
388
+ ["p1.r = (typef)p2;
389
+ p1.i = 0;"]*2 +
390
+ ["*p1 = INT2FIX(p2);"]
391
+ )
392
+
393
+
394
+
395
+ $func_body =
396
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
397
+ {
398
+ OPERATION
399
+ }
400
+ "
401
+ mkfuncs('ToStr',['']+[$data_types[9]]*9,$data_types,
402
+ [nil] +
403
+ ["char buf[22];
404
+ int i;
405
+ #pragma omp parallel for
406
+ for (i=0; i<n; i++) {
407
+ sprintf(buf,\"%i\",(int)*p2);
408
+ *p1 = rb_str_new2(buf);
409
+ }"]*4 +
410
+ ["char buf[24];
411
+ int i;
412
+ #pragma omp parallel for
413
+ for (i=0; i<n; i++) {
414
+ sprintf(buf,\"%.5g\",(double)*p2);
415
+ *p1 = rb_str_new2(buf);
416
+ }"] +
417
+ ["char buf[24];
418
+ int i;
419
+ #pragma omp parallel for
420
+ for (i=0; i<n; i++) {
421
+ sprintf(buf,\"%.8g\",(double)*p2);
422
+ *p1 = rb_str_new2(buf);
423
+ }"] +
424
+ ["char buf[50];
425
+ int i;
426
+ #pragma omp parallel for
427
+ for (i=0; i<n; i++) {
428
+ sprintf(buf,\"%.5g%+.5gi\",(double)p2.r,(double)p2.i);
429
+ *p1 = rb_str_new2(buf);
430
+ }"] +
431
+ ["char buf[50];
432
+ int i;
433
+ #pragma omp parallel for
434
+ for (i=0; i<n; i++) {
435
+ sprintf(buf,\"%.8g%+.8gi\",(double)p2.r,(double)p2.i);
436
+ *p1 = rb_str_new2(buf);
437
+ }"] +
438
+ ["int i;
439
+ #pragma omp parallel for
440
+ for (i=0; i<n; i++) {
441
+ *p1 = rb_obj_as_string(*p2);
442
+ }"]
443
+ )
444
+
445
+
446
+ print <<EOM
447
+
448
+ /* from numeric.c */
449
+ static void na_str_append_fp(char *buf)
450
+ {
451
+ if (buf[0]=='-' || buf[0]=='+') ++buf;
452
+ if (ISALPHA(buf[0])) return; /* NaN or Inf */
453
+ if (strchr(buf, '.') == 0) {
454
+ int len = strlen(buf);
455
+ char *ind = strchr(buf, 'e');
456
+ if (ind) {
457
+ memmove(ind+2, ind, len-(ind-buf)+1);
458
+ ind[0] = '.';
459
+ ind[1] = '0';
460
+ } else {
461
+ strcat(buf, ".0");
462
+ }
463
+ }
464
+ }
465
+ EOM
466
+
467
+ $func_body =
468
+ "static void #name#C(char *p1, char *p2)
469
+ {
470
+ OPERATION
471
+ }
472
+ "
473
+ mkfuncs('Insp',['']+[$data_types[9]]*9,$data_types,
474
+ [nil] +
475
+ ["char buf[22];
476
+ sprintf(buf,\"%i\",(int)*p2);
477
+ *p1 = rb_str_new2(buf);"]*4 +
478
+ ["char buf[24];
479
+ sprintf(buf,\"%g\",(double)*p2);
480
+ na_str_append_fp(buf);
481
+ *p1 = rb_str_new2(buf);"] +
482
+ ["char buf[24];
483
+ sprintf(buf,\"%g\",(double)*p2);
484
+ na_str_append_fp(buf);
485
+ *p1 = rb_str_new2(buf);"] +
486
+ ["char buf[50], *b;
487
+ sprintf(buf,\"%g\",(double)p2.r);
488
+ na_str_append_fp(buf);
489
+ b = buf+strlen(buf);
490
+ sprintf(b,\"%+g\",(double)p2.i);
491
+ na_str_append_fp(b);
492
+ strcat(buf,\"i\");
493
+ *p1 = rb_str_new2(buf);"] +
494
+ ["char buf[50], *b;
495
+ sprintf(buf,\"%g\",(double)p2.r);
496
+ na_str_append_fp(buf);
497
+ b = buf+strlen(buf);
498
+ sprintf(b,\"%+g\",(double)p2.i);
499
+ na_str_append_fp(b);
500
+ strcat(buf,\"i\");
501
+ *p1 = rb_str_new2(buf);"] +
502
+ ["*p1 = rb_inspect(*p2);"]
503
+ )
504
+
505
+
506
+
507
+ #
508
+ # Binary Funcs
509
+ #
510
+
511
+ =begin
512
+ # Optimize experiment
513
+ $func_body =
514
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
515
+ {
516
+ na_shape_t i;
517
+ if (i1==sizeof(type1) && i2==sizeof(type1) && i3==sizeof(type1)) {
518
+ type1 *a1=p1, *a2=p2, *a3=p3;
519
+ for (i=0; n; --n,++i) {
520
+ *a1 = *a2 * *a3; +++a1;++a2;++a3;
521
+ }
522
+ } else
523
+ for (; n; --n) {
524
+ OPERATION
525
+ p1+=i1; p2+=i2; p3+=i3;
526
+ }
527
+ }
528
+ "
529
+ mkfuncs('MulB', $data_types, $data_types,
530
+ [nil] +
531
+ ["*p1 = *p2 * *p3;"]*5 + [nil]*3
532
+ )
533
+ =end
534
+
535
+ $func_body =
536
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
537
+ {
538
+ int i;
539
+ #pragma omp parallel for
540
+ for (i=0; i<n; i++) {
541
+ OPERATION
542
+ }
543
+ }
544
+ "
545
+
546
+ mkfuncs('AddB', $data_types, $data_types,
547
+ [nil] +
548
+ ["*p1 = *p2 + *p3;"]*6 +
549
+ ["p1.r = p2.r + p3.r;
550
+ p1.i = p2.i + p3.i;"]*2 +
551
+ ["*p1 = rb_funcall(*p2,'+',1,*p3);"]
552
+ )
553
+
554
+ mkfuncs('SbtB', $data_types, $data_types,
555
+ [nil] +
556
+ ["*p1 = *p2 - *p3;"]*6 +
557
+ ["p1.r = p2.r - p3.r;
558
+ p1.i = p2.i - p3.i;"]*2 +
559
+ ["*p1 = rb_funcall(*p2,'-',1,*p3);"]
560
+ )
561
+
562
+ mkfuncs('MulB', $data_types, $data_types,
563
+ [nil] +
564
+ ["*p1 = *p2 * *p3;"]*6 +
565
+ ["type1 x = *p2;
566
+ p1.r = x.r*p3.r - x.i*p3.i;
567
+ p1.i = x.r*p3.i + x.i*p3.r;"]*2 +
568
+ ["*p1 = rb_funcall(*p2,'*',1,*p3);"]
569
+ )
570
+
571
+ mkfuncs('DivB', $data_types, $data_types,
572
+ [nil] +
573
+ ["if (*p3==0) {na_zerodiv();};
574
+ *p1 = *p2 / *p3;"]*4 +
575
+ ["*p1 = *p2 / *p3;"]*2 +
576
+ ["type1 x = *p2;
577
+ typef a = p3.r*p3.r + p3.i*p3.i;
578
+ p1.r = (x.r*p3.r + x.i*p3.i)/a;
579
+ p1.i = (x.i*p3.r - x.r*p3.i)/a;"]*2 +
580
+ ["*p1 = rb_funcall(*p2,'/',1,*p3);"]
581
+ )
582
+
583
+ mkfuncs('ModB', $data_types, $data_types,
584
+ [nil] +
585
+ ["if (*p3==0) {na_zerodiv();};
586
+ *p1 = *p2 % *p3;"]*4 +
587
+ ["*p1 = fmod(*p2, *p3);"]*2 +
588
+ [nil]*2 +
589
+ ["*p1 = rb_funcall(*p2,'%',1,*p3);"]
590
+ )
591
+
592
+
593
+ $func_body =
594
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
595
+ {
596
+ int i;
597
+ if (i1 != 0) {
598
+ #pragma omp parallel for
599
+ for (i=0; i<n; i++) {
600
+ OPERATION
601
+ }
602
+ } else {
603
+ // #pragma omp parallel for reduction(+:p1)
604
+ for (i=0; i<n; i++) {
605
+ OPERATION
606
+ }
607
+ }
608
+ }
609
+ "
610
+
611
+ mkfuncs('MulAdd', $data_types, $data_types,
612
+ [nil] +
613
+ ["*p1 += *p2 * *p3;"]*6 +
614
+ ["type1 x = *p2;
615
+ p1.r += x.r*p3.r - x.i*p3.i;
616
+ p1.i += x.r*p3.i + x.i*p3.r;"]*2 +
617
+ ["*p1 = rb_funcall(*p1,'+',1,
618
+ rb_funcall(*p2,'*',1,*p3));"]
619
+ )
620
+
621
+ mkfuncs('MulSbt', $data_types, $data_types,
622
+ [nil] +
623
+ ["*p1 -= *p2 * *p3;"]*6 +
624
+ ["type1 x = *p2;
625
+ p1.r -= x.r*p3.r - x.i*p3.i;
626
+ p1.i -= x.r*p3.i + x.i*p3.r;"]*2 +
627
+ ["*p1 = rb_funcall(*p1,'-',1,
628
+ rb_funcall(*p2,'*',1,*p3));"]
629
+ )
630
+
631
+
632
+ #
633
+ # Bit operator
634
+ #
635
+
636
+ mkfuncs('BAn', $data_types, $data_types,
637
+ [nil] +
638
+ ["*p1 = *p2 & *p3;"]*4 +
639
+ [nil]*4 +
640
+ ["*p1 = rb_funcall(*p2,'&',1,*p3);"]
641
+ )
642
+
643
+ mkfuncs('BOr', $data_types, $data_types,
644
+ [nil] +
645
+ ["*p1 = *p2 | *p3;"]*4 +
646
+ [nil]*4 +
647
+ ["*p1 = rb_funcall(*p2,'|',1,*p3);"]
648
+ )
649
+
650
+ mkfuncs('BXo', $data_types, $data_types,
651
+ [nil] +
652
+ ["*p1 = *p2 ^ *p3;"]*4 +
653
+ [nil]*4 +
654
+ ["*p1 = rb_funcall(*p2,'^',1,*p3);"]
655
+ )
656
+
657
+
658
+ #
659
+ # Comparison
660
+ #
661
+
662
+ mkfuncs('Eql', [$data_types[1]]*10, $data_types,
663
+ [nil] +
664
+ ["*p1 = (*p2==*p3) ? 1:0;"]*6 +
665
+ ["*p1 = (p2.r==p3.r) && (p2.i==p3.i) ? 1:0;"]*2 +
666
+ ["*p1 = RTEST(rb_equal(*p2, *p3)) ? 1:0;"]
667
+ )
668
+
669
+ mkfuncs('Cmp', [$data_types[1]]*10, $data_types,
670
+ [nil] +
671
+ ["if (*p2>*p3) *p1=1;
672
+ else if (*p2<*p3) *p1=2;
673
+ else *p1=0;"]*6 +
674
+ [nil]*2 +
675
+ ["int v = NUM2INT(rb_funcall(*p2,na_id_compare,1,*p3));
676
+ if (v>0) *p1=1; else if (v<0) *p1=2; else *p1=0;"]
677
+ )
678
+
679
+ mkfuncs('And', [$data_types[1]]*10, $data_types,
680
+ [nil] +
681
+ ["*p1 = (*p2!=0 && *p3!=0) ? 1:0;"]*6 +
682
+ ["*p1 = ((p2.r!=0||p2.i!=0) && (p3.r!=0||p3.i!=0)) ? 1:0;"]*2 +
683
+ ["*p1 = (RTEST(*p2) && RTEST(*p3)) ? 1:0;"]
684
+ )
685
+
686
+ mkfuncs('Or_', [$data_types[1]]*10, $data_types,
687
+ [nil] +
688
+ ["*p1 = (*p2!=0 || *p3!=0) ? 1:0;"]*6 +
689
+ ["*p1 = ((p2.r!=0||p2.i!=0) || (p3.r!=0||p3.i!=0)) ? 1:0;"]*2 +
690
+ ["*p1 = (RTEST(*p2) || RTEST(*p3)) ? 1:0;"]
691
+ )
692
+
693
+ mkfuncs('Xor', [$data_types[1]]*10, $data_types,
694
+ [nil] +
695
+ ["*p1 = ((*p2!=0) == (*p3!=0)) ? 0:1;"]*6 +
696
+ ["*p1 = ((p2.r!=0||p2.i!=0) == (p3.r!=0||p3.i!=0)) ? 0:1;"]*2 +
697
+ ["*p1 = (RTEST(*p2) == RTEST(*p3)) ? 0:1;"]
698
+ )
699
+
700
+
701
+ #
702
+ # Atan2
703
+ #
704
+
705
+ mkfuncs('atan2', $data_types, $data_types,
706
+ [nil]*5 +
707
+ ["*p1 = atan2(*p2, *p3);"]*2 +
708
+ [nil]*3
709
+ )
710
+
711
+
712
+ #
713
+ # Mask
714
+ #
715
+ $func_body =
716
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
717
+ {
718
+ int i, j=0;
719
+ for (i=0; i<n; i++) {
720
+ OPERATION
721
+ }
722
+ }
723
+ "
724
+ mkfuncs('RefMask',$data_types,$data_types,
725
+ [nil] +
726
+ ["if (*(u_int8_t*)p3) { *p1=*p2; j+=1; }"]*9,
727
+ ["j","i","i"]
728
+ )
729
+
730
+ mkfuncs('SetMask',$data_types,$data_types,
731
+ [nil] +
732
+ ["if (*(u_int8_t*)p3) { *p1=*p2; j+=1; };"]*9,
733
+ ["i","j","i"]
734
+ )