narray-bigmem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ $type_codes = %w(n B I L G F D X C O)
2
+ $data_types =
3
+ %w(none u_int8_t int16_t int32_t int64_t float double scomplex dcomplex VALUE)
4
+ $real_types =
5
+ %w(none u_int8_t int16_t int32_t int64_t float double float double VALUE)
6
+ $int_types =
7
+ %w(none u_int8_t int16_t int32_t int64_t int32_t int32_t scomplex dcomplex VALUE)
8
+ $comp_types =
9
+ %w(none scomplex scomplex scomplex dcomplex scomplex dcomplex scomplex dcomplex VALUE)
10
+ $swap_types =
11
+ %w(none u_int8_t na_size16_t na_size32_t na_size64_t na_size32_t na_size64_t na_size64_t na_size128_t VALUE)
12
+ $upcast = [
13
+ [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
14
+ [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
15
+ [ 0, 2, 2, 3, 4, 5, 6, 7, 8, 9],
16
+ [ 0, 3, 3, 3, 4, 5, 6, 7, 8, 9],
17
+ [ 0, 4, 4, 4, 4, 5, 6, 7, 8, 9],
18
+ [ 0, 5, 5, 5, 5, 5, 6, 7, 8, 9],
19
+ [ 0, 6, 6, 6, 6, 6, 6, 8, 8, 9],
20
+ [ 0, 7, 7, 7, 7, 7, 8, 7, 8, 9],
21
+ [ 0, 8, 8, 8, 8, 8, 8, 8, 8, 9],
22
+ [ 0, 9, 9, 9, 9, 9, 9, 9, 9, 9] ]
23
+ $data_obj = [
24
+ [/[O]/,/[O]/, "
25
+ *p1 = rb_funcall(*p1,#id,1,*p2);"],
26
+ [/[O]/,/[BIL]/,"
27
+ *p1 = rb_funcall(*p1,#id,1,INT2FIX(*p2));"],
28
+ [/[O]/,/[G]/,"
29
+ *p1 = rb_funcall(*p1,#id,1,LL2NUM(*p2));"],
30
+ [/[O]/,/[FD]/, "
31
+ *p1 = rb_funcall(*p1,#id,1,rb_float_new(*p2));"],
32
+ [/[O]/,/[XC]/, "
33
+ *p1 = rb_funcall(*p1,#id,1,rb_complex_new(p2->r,p2->i));"],
34
+ [/[BIL]/,/[O]/,"
35
+ *p1 = NUM2INT(rb_funcall(INT2FIX(*p1),#id,1,*p2));"],
36
+ [/[G]/,/[O]/,"
37
+ *p1 = NUM2LL(rb_funcall(LL2NUM(*p1),#id,1,*p2));"],
38
+ [/[FD]/,/[O]/, "
39
+ *p1 = NUM2DBL(rb_funcall(rb_float_new(*p1),#id,1,*p2));"],
40
+ [/[XC]/,/[O]/, "VALUE v=rb_funcall(rb_complex_new(p1->r,p1->i),#id,1,*p2);
41
+ p1->r = NUM2REAL(v); p1->i = NUM2IMAG(v);"] ]
42
+
43
+
44
+ def mksetfuncs(name,op,id,funcs)
45
+
46
+ print "
47
+ /* ------------------------- #{name} --------------------------- */\n"
48
+ c = $type_codes
49
+ n = $type_codes.size
50
+ td = $data_types
51
+ tr = $real_types
52
+
53
+ # Function Definition
54
+
55
+ for i in 0...n
56
+ for j in 0...n
57
+ funcs.each do |k|
58
+ if c[i]=~k[0] && c[j]=~k[1]
59
+ #if i==j
60
+ # f = "memcpy(p1,p1,sizeof(typed));"
61
+ #else
62
+ f = k[2]
63
+ #end
64
+ f = f.
65
+ gsub(/p1\./,"((#{td[i]}*)p1)[i].").
66
+ gsub(/p2\./,"((#{td[j]}*)p2)[i].").
67
+ gsub(/\*p1/,"((#{td[i]}*)p1)[i]").
68
+ gsub(/\*p2/,"((#{td[j]}*)p2)[i]").
69
+ gsub(/ = /," = (#{tr[i]})").
70
+ gsub(/#id/,id).
71
+ gsub(/#op/,op).
72
+ gsub(/typed/,td[i]).
73
+ gsub(/typef/,tr[i])
74
+ puts $func_body.
75
+ gsub(/typed/,td[i]).
76
+ gsub(/typef/,tr[i]).
77
+ gsub(/#name/,name).
78
+ sub(/OPERATION/,f).
79
+ gsub(/#CC/,c[i]+c[j])
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ # function pointer array
86
+ print "\nna_setfunc_t "+name+"Funcs = {\n"
87
+ m = []
88
+ for i in 0...n
89
+ l = []
90
+ for j in 0...n
91
+ f = true
92
+ for k in funcs
93
+ if c[i]=~k[0] && c[j]=~k[1]
94
+ l += [name+c[i]+c[j]]
95
+ f = false
96
+ break
97
+ end
98
+ end
99
+ if f
100
+ l += ['TpErr']
101
+ end
102
+ end
103
+ m += [' { '+l.join(', ')+' }']
104
+ end
105
+ print m.join(",\n")+"\n};\n"
106
+
107
+ end
108
+
109
+
110
+
111
+ def mkfuncs(name,t1,t2,func,idx=["i"]*3)
112
+
113
+ print "
114
+ /* ------------------------- #{name} --------------------------- */\n"
115
+ c = $type_codes
116
+ td = $data_types
117
+ tr = $real_types
118
+
119
+ for i in 0...c.size
120
+ if func[i] != nil && func[i] != "copy"
121
+ if /Insp/ =~ name
122
+ f = func[i].
123
+ gsub(/p2\./,"(*((#{t2[i]}*)p2)).").
124
+ gsub(/\*p1/,"*((#{t1[i]}*)p1)").
125
+ gsub(/\*p2/,"*((#{t2[i]}*)p2)")
126
+ else
127
+ f = func[i].
128
+ gsub(/p1\./,"(*((#{t1[i]}*)(p1+#{idx[0]}*i1))).").
129
+ gsub(/p2\./,"(*((#{t2[i]}*)(p2+#{idx[1]}*i2))).").
130
+ gsub(/p3\./,"(*((#{t2[i]}*)(p3+#{idx[2]}*i3))).").
131
+ gsub(/\*p1/,"*((#{t1[i]}*)(p1+#{idx[0]}*i1))").
132
+ gsub(/\*p2/,"*((#{t2[i]}*)(p2+#{idx[1]}*i2))").
133
+ gsub(/\*p3/,"*((#{t2[i]}*)(p3+#{idx[2]}*i3))").
134
+ gsub(/\(u_int8_t\*\)p3/,"(u_int8_t*)(p3+#{idx[2]}*i3)").
135
+ gsub(/p2;/,'p2+i*i2;')
136
+ end
137
+ f = f.
138
+ gsub(/type1/,td[i]).
139
+ gsub(/typec/,t1[i]).
140
+ gsub(/typef/,tr[i])
141
+ puts $func_body.
142
+ gsub(/type1/,td[i]).
143
+ gsub(/typec/,t1[i]).
144
+ gsub(/typef/,tr[i]).
145
+ gsub(/#name/,name).
146
+ gsub(/OPERATION/,f).
147
+ gsub(/#C/,c[i])
148
+ end
149
+ end
150
+
151
+ # Function Array
152
+
153
+ print "\nna_func_t #{name}Funcs =\n{ "
154
+ m = []
155
+ for i in 0...c.size
156
+ if func[i] == nil
157
+ m += ['TpErr']
158
+ elsif func[i]=='copy'
159
+ m += ['Set'+c[$data_types.index(t1[i])]+c[i]]
160
+ else
161
+ m += [name+c[i]]
162
+ end
163
+ end
164
+ print m.join(", ")+" };\n"
165
+
166
+ end
167
+
168
+
169
+
170
+ def mksortfuncs(bsname,t1,t2,func)
171
+
172
+ print "
173
+ /* ------------------------- #{bsname} --------------------------- */\n"
174
+ c = $type_codes
175
+ tf = $real_types
176
+ name = bsname
177
+
178
+ # Function Definition
179
+ head = "static int #{name}#code(const void *p1, const void *p2)"
180
+ for i in 0...c.size
181
+ if func[i] != nil && func[i]=~/^\{/
182
+ f = func[i].
183
+ gsub(/p1->/,"((#{t1[i]}*)p1)->").
184
+ gsub(/p2->/,"((#{t2[i]}*)p2)->").
185
+ gsub(/\*\*p1/,"**(#{t1[i]}**)p1").
186
+ gsub(/\*\*p2/,"**(#{t2[i]}**)p2").
187
+ gsub(/\*p1/,"*(#{t1[i]}*)p1").
188
+ gsub(/\*p2/,"*(#{t2[i]}*)p2").
189
+ gsub(/typef/,tf[i])
190
+ puts( (head+f).gsub(/#code/,c[i]) )
191
+ end
192
+ end
193
+
194
+ # Function Array
195
+
196
+ print "\nna_sortfunc_t #{name}Funcs =\n{ "
197
+ m = []
198
+ for i in 0...c.size
199
+ if func[i] == nil
200
+ m += ['(int (*)(const void *, const void *))TpErrI']
201
+ elsif func[i]=='copy'
202
+ m += ['Set'+c[i]*2]
203
+ elsif !( func[i] =~ /^\{/ )
204
+ m += [func[i]]
205
+ else
206
+ m += [name+c[i]]
207
+ end
208
+ end
209
+ print m.join(", ")+" };\n"
210
+
211
+ end
212
+
@@ -0,0 +1,734 @@
1
+ require "mknafunc"
2
+
3
+ fname = "na_op.c"
4
+ $> = open(fname,"w")
5
+
6
+ upcast_ary = $upcast.collect{|i| ' {'+i.join(", ")+'}'}.join(",\n")
7
+
8
+ print <<EOM
9
+ /*
10
+ #{fname}
11
+ Automatically generated code
12
+ Numerical Array Extention for Ruby
13
+ (C) Copyright 1999-2008 by Masahiro TANAKA
14
+
15
+ This program is free software.
16
+ You can distribute/modify this program
17
+ under the same terms as Ruby itself.
18
+ NO WARRANTY.
19
+ */
20
+ #include <ruby.h>
21
+ #include "narray.h"
22
+ #include "narray_local.h"
23
+ /* isalpha(3) etc. */
24
+ #include <ctype.h>
25
+
26
+ const int na_upcast[NA_NTYPES][NA_NTYPES] = {
27
+ #{upcast_ary} };
28
+
29
+ const int na_no_cast[NA_NTYPES] =
30
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
31
+ const int na_cast_real[NA_NTYPES] =
32
+ { 0, 1, 2, 3, 4, 5, 4, 5, 8, 9 };
33
+ const int na_cast_comp[NA_NTYPES] =
34
+ { 0, 7, 7, 7, 7, 7, 8, 7, 8, 9 };
35
+ const int na_cast_round[NA_NTYPES] =
36
+ { 0, 1, 2, 3, 4, 3, 3, 7, 8, 9 };
37
+ const int na_cast_byte[NA_NTYPES] =
38
+ { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
39
+
40
+
41
+ static void TpErr(void) {
42
+ rb_raise(rb_eTypeError,"illegal operation with this type");
43
+ }
44
+ static int TpErrI(void) {
45
+ rb_raise(rb_eTypeError,"illegal operation with this type");
46
+ return 0;
47
+ }
48
+ static void na_zerodiv() {
49
+ rb_raise(rb_eZeroDivError, "divided by 0");
50
+ }
51
+
52
+ static int notnanF(float *n)
53
+ {
54
+ return *n == *n;
55
+ }
56
+ static int notnanD(double *n)
57
+ {
58
+ return *n == *n;
59
+ }
60
+ EOM
61
+
62
+
63
+ #
64
+ # Set Fucs
65
+ #
66
+ data = [
67
+ [/[O]/,/[O]/, "*p1 = *p2;"],
68
+ [/[O]/,/[BI]/, "*p1 = INT2FIX(*p2);"],
69
+ [/[O]/,/[L]/, "*p1 = INT2NUM(*p2);"],
70
+ [/[O]/,/[G]/, "*p1 = LL2NUM(*p2);"],
71
+ [/[O]/,/[FD]/, "*p1 = rb_float_new(*p2);"],
72
+ [/[O]/,/[XC]/, "*p1 = rb_complex_new(p2.r,p2.i);"],
73
+ [/[BIL]/,/[O]/, "*p1 = NUM2INT(*p2);"],
74
+ [/[G]/,/[O]/, "*p1 = NUM2LL(*p2);"],
75
+ [/[FD]/,/[O]/, "*p1 = NUM2DBL(*p2);"],
76
+ [/[XC]/,/[O]/, "p1.r = NUM2REAL(*p2); p1.i = NUM2IMAG(*p2);"],
77
+ [/[BILFDG]/,/[BILFDG]/,"*p1 = *p2;"],
78
+ [/[BILFDG]/,/[XC]/, "*p1 = p2.r;"],
79
+ [/[XC]/,/[BILFDG]/, "p1.r = *p2; p1.i = 0;"],
80
+ [/[XC]/,/[XC]/, "p1.r = p2.r; p1.i = p2.i;"] ]
81
+
82
+ $func_body =
83
+ "static void #name#CC(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
84
+ {
85
+ int i;
86
+ #pragma omp parallel for
87
+ for (i=0; i<n; i++) {
88
+ OPERATION
89
+ }
90
+ }
91
+ "
92
+ mksetfuncs('Set','','',data)
93
+
94
+
95
+
96
+ #
97
+ # Unary Funcs
98
+ #
99
+ $func_body =
100
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
101
+ {
102
+ int i;
103
+ #pragma omp parallel for
104
+ for (i=0; i<n; i++) {
105
+ OPERATION
106
+ }
107
+ }
108
+ "
109
+
110
+
111
+ mkfuncs('Swp', $swap_types, $swap_types,
112
+ [nil] +
113
+ ["*p1 = *p2;"] +
114
+ ["na_size16_t x; swap16(x,*p2); *p1 = x;"] +
115
+ ["na_size32_t x; swap32(x,*p2); *p1 = x;"] +
116
+ ["na_size64_t x; swap64(x,*p2); *p1 = x;"] +
117
+ ["na_size32_t x; swap32(x,*p2); *p1 = x;"] +
118
+ ["na_size64_t x; swap64(x,*p2); *p1 = x;"] +
119
+ ["na_size64_t x; swap64c(x,*p2); *p1 = x;"] +
120
+ ["na_size128_t x; swap128c(x,*p2); *p1 = x;"] +
121
+ ["*p1 = *p2;"]
122
+ )
123
+
124
+ print <<EOM
125
+
126
+ /* ------------------------- H2N --------------------------- */
127
+ #ifdef WORDS_BIGENDIAN
128
+
129
+ na_func_t H2NFuncs =
130
+ { TpErr, SetBB, SetII, SetLL, SetGG, SetFF, SetDD, SetXX, SetCC, SetOO };
131
+
132
+ na_func_t H2VFuncs =
133
+ { TpErr, SetBB, SwpI, SwpL, SwpG, SwpF, SwpD, SwpX, SwpC, SetOO };
134
+
135
+ #else
136
+ #ifdef DYNAMIC_ENDIAN /* not supported yet */
137
+ #else /* LITTLE ENDIAN */
138
+
139
+ na_func_t H2NFuncs =
140
+ { TpErr, SetBB, SwpI, SwpL, SwpG, SwpF, SwpD, SwpX, SwpC, SetOO };
141
+
142
+ na_func_t H2VFuncs =
143
+ { TpErr, SetBB, SetII, SetLL, SetGG, SetFF, SetDD, SetXX, SetCC, SetOO };
144
+
145
+ #endif
146
+ #endif
147
+ EOM
148
+
149
+ mkfuncs('Neg', $data_types, $data_types,
150
+ [nil] +
151
+ ["*p1 = -*p2;"]*6 +
152
+ ["p1.r = -p2.r;
153
+ p1.i = -p2.i;"]*2 +
154
+ ["*p1 = rb_funcall(*p2,na_id_minus,0);"]
155
+ )
156
+
157
+ # method: imag=
158
+ mkfuncs('ImgSet',$data_types,$real_types,
159
+ [nil]*7 +
160
+ ["p1.i = *p2;"]*2 +
161
+ [nil]
162
+ )
163
+
164
+
165
+ mkfuncs('Floor',$int_types,$data_types,[nil] +
166
+ ['copy']*4 +
167
+ ["*p1 = (typec)floor(*p2);"]*2 +
168
+ [nil]*3
169
+ )
170
+
171
+ mkfuncs('Ceil',$int_types,$data_types,[nil] +
172
+ ['copy']*4 +
173
+ ["*p1 = (typec)ceil(*p2);"]*2 +
174
+ [nil]*3
175
+ )
176
+
177
+ mkfuncs('Round',$int_types,$data_types,[nil] +
178
+ ['copy']*4 +
179
+ # ["*p1 = floor(*p2+0.5);"]*2 +
180
+ ["if (*p2 >= 0) *p1 = (typec)floor(*p2+0.5);
181
+ else *p1 = (typec)ceil(*p2-0.5);"]*2 +
182
+ [nil]*3
183
+ )
184
+
185
+ mkfuncs('Abs',$real_types,$data_types,[nil] +
186
+ ["*p1 = *p2;"] +
187
+ ["*p1 = (*p2<0) ? -*p2 : *p2;"]*5 +
188
+ ["*p1 = (typec)hypot(p2.r, p2.i);"]*2 +
189
+ ["*p1 = rb_funcall(*p2,na_id_abs,0);"]
190
+ )
191
+
192
+
193
+ mkfuncs('Real',$real_types,$data_types,[nil] +
194
+ ['copy']*8 +
195
+ [nil]
196
+ )
197
+
198
+ mkfuncs('Imag',$real_types,$data_types,[nil] +
199
+ ["*p1 = 0;"]*6 +
200
+ ["*p1 = p2.i;"]*2 +
201
+ [nil]
202
+ )
203
+
204
+ mkfuncs('Angl',$real_types,$data_types,[nil] +
205
+ [nil]*6 +
206
+ ["*p1 = atan2(p2.i,p2.r);"]*2 +
207
+ [nil]
208
+ )
209
+
210
+ mkfuncs('ImagMul',$comp_types,$data_types,[nil] +
211
+ [nil]*4 +
212
+ ["p1.r = 0; p1.i = *p2;"]*2 +
213
+ ["p1.r = -p2.i; p1.i = p2.r;"]*2 +
214
+ [nil]
215
+ )
216
+
217
+ mkfuncs('Conj',$data_types,$data_types,[nil] +
218
+ ['copy']*6 +
219
+ ["p1.r = p2.r; p1.i = -p2.i;"]*2 +
220
+ [nil]
221
+ )
222
+
223
+ mkfuncs('Not', [$data_types[1]]*10, $data_types,
224
+ [nil] +
225
+ ["*p1 = (*p2==0) ? 1:0;"]*6 +
226
+ ["*p1 = (p2.r==0 && p2.i==0) ? 1:0;"]*2 +
227
+ ["*p1 = RTEST(*p2) ? 0:1;"]
228
+ )
229
+
230
+ mkfuncs('BRv', $data_types, $data_types, [nil] +
231
+ ["*p1 = ~(*p2);"]*4 +
232
+ [nil]*4 +
233
+ ["*p1 = rb_funcall(*p2,'~',0);"]
234
+ )
235
+
236
+ mkfuncs('Min', $data_types, $data_types, [nil] +
237
+ ["if (*p1>*p2) *p1=*p2;"]*4 +
238
+ ["if (notnan#C((type1*)p2) && *p1>*p2) *p1=*p2;"]*2 +
239
+ [nil]*2 +
240
+ ["if (FIX2INT(rb_funcall(*p1,na_id_compare,1,*p2))>0) *p1=*p2;"]
241
+ )
242
+
243
+ mkfuncs('Max', $data_types, $data_types, [nil] +
244
+ ["if (*p1<*p2) *p1=*p2;"]*4 +
245
+ ["if (notnan#C((type1*)p2) && *p1<*p2) *p1=*p2;"]*2 +
246
+ [nil]*2 +
247
+ ["if (FIX2INT(rb_funcall(*p1,na_id_compare,1,*p2))<0) *p1=*p2;"]
248
+ )
249
+
250
+
251
+ mksortfuncs('Sort', $data_types, $data_types, [nil] +
252
+ ["
253
+ { if (*p1 > *p2) return 1;
254
+ if (*p1 < *p2) return -1;
255
+ return 0; }"]*6 +
256
+ [nil]*2 +
257
+ ["
258
+ { VALUE r = rb_funcall(*p1, na_id_compare, 1, *p2);
259
+ return NUM2INT(r); }"]
260
+ )
261
+
262
+ mksortfuncs('SortIdx', $data_types, $data_types, [nil] +
263
+ ["
264
+ { if (**p1 > **p2) return 1;
265
+ if (**p1 < **p2) return -1;
266
+ return 0; }"]*6 +
267
+ [nil]*2 +
268
+ ["
269
+ { VALUE r = rb_funcall(**p1, na_id_compare, 1, **p2);
270
+ return NUM2INT(r); }"]
271
+ )
272
+
273
+ $func_body =
274
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
275
+ {
276
+ int i;
277
+ if (i1 != 0) {
278
+ #pragma omp parallel for
279
+ for (i=0; i<n; i++) {
280
+ OPERATION
281
+ }
282
+ } else {
283
+ // #pragma omp parallel for reduction(+:p1)
284
+ for (i=0; i<n; i++) {
285
+ OPERATION
286
+ }
287
+ }
288
+ }
289
+ "
290
+
291
+ mkfuncs('AddU', $data_types, $data_types,
292
+ [nil] +
293
+ ["*p1 += *p2;"]*6 +
294
+ ["p1.r += p2.r;
295
+ p1.i += p2.i;"]*2 +
296
+ ["*p1 = rb_funcall(*p1,'+',1,*p2);"]
297
+ )
298
+
299
+ mkfuncs('SbtU', $data_types, $data_types,
300
+ [nil] +
301
+ ["*p1 -= *p2;"]*6 +
302
+ ["p1.r -= p2.r;
303
+ p1.i -= p2.i;"]*2 +
304
+ ["*p1 = rb_funcall(*p1,'-',1,*p2);"]
305
+ )
306
+
307
+ $func_body =
308
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
309
+ {
310
+ int i;
311
+ if (i1 != 0) {
312
+ #pragma omp parallel for
313
+ for (i=0; i<n; i++) {
314
+ OPERATION
315
+ }
316
+ } else {
317
+ // #pragma omp parallel for reduction(*:p1)
318
+ for (i=0; i<n; i++) {
319
+ OPERATION
320
+ }
321
+ }
322
+ }
323
+ "
324
+
325
+ mkfuncs('MulU', $data_types, $data_types,
326
+ [nil] +
327
+ ["*p1 *= *p2;"]*6 +
328
+ ["type1 x = *p1;
329
+ p1.r = x.r*p2.r - x.i*p2.i;
330
+ p1.i = x.r*p2.i + x.i*p2.r;"]*2 +
331
+ ["*p1 = rb_funcall(*p1,'*',1,*p2);"]
332
+ )
333
+
334
+ $func_body =
335
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
336
+ {
337
+ int i;
338
+ if (i1 != 0) {
339
+ #pragma omp parallel for
340
+ for (i=0; i<n; i++) {
341
+ OPERATION
342
+ }
343
+ } else {
344
+ // #pragma omp parallel for reduction(/:p1)
345
+ for (i=0; i<n; i++) {
346
+ OPERATION
347
+ }
348
+ }
349
+ }
350
+ "
351
+
352
+ mkfuncs('DivU', $data_types, $data_types,
353
+ [nil] +
354
+ ["if (*p2==0) {na_zerodiv();}
355
+ *p1 /= *p2;"]*4 +
356
+ ["*p1 /= *p2;"]*2 +
357
+ ["type1 x = *p1;
358
+ typef a = p2.r*p2.r + p2.i*p2.i;
359
+ p1.r = (x.r*p2.r + x.i*p2.i)/a;
360
+ p1.i = (x.i*p2.r - x.r*p2.i)/a;"]*2 +
361
+ ["*p1 = rb_funcall(*p1,'/',1,*p2);"]
362
+ )
363
+
364
+ mkfuncs('ModU', $data_types, $data_types,
365
+ [nil] +
366
+ ["if (*p2==0) {na_zerodiv();}
367
+ *p1 %= *p2;"]*4 +
368
+ ["*p1 = fmod(*p1, *p2);"]*2 +
369
+ [nil]*2 +
370
+ ["*p1 = rb_funcall(*p1,'%',1,*p2);"]
371
+ )
372
+
373
+
374
+ # indgen
375
+ $func_body =
376
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, int p2, na_shape_t i2)
377
+ {
378
+ int i;
379
+ #pragma omp parallel for
380
+ for (i=0; i<n; i++) {
381
+ OPERATION
382
+ }
383
+ }
384
+ "
385
+ mkfuncs('IndGen',$data_types,[$data_types[3]]*9,
386
+ [nil] +
387
+ ["*p1 = (typef)p2;"]*6 +
388
+ ["p1.r = (typef)p2;
389
+ p1.i = 0;"]*2 +
390
+ ["*p1 = INT2FIX(p2);"]
391
+ )
392
+
393
+
394
+
395
+ $func_body =
396
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2)
397
+ {
398
+ OPERATION
399
+ }
400
+ "
401
+ mkfuncs('ToStr',['']+[$data_types[9]]*9,$data_types,
402
+ [nil] +
403
+ ["char buf[22];
404
+ int i;
405
+ #pragma omp parallel for
406
+ for (i=0; i<n; i++) {
407
+ sprintf(buf,\"%i\",(int)*p2);
408
+ *p1 = rb_str_new2(buf);
409
+ }"]*4 +
410
+ ["char buf[24];
411
+ int i;
412
+ #pragma omp parallel for
413
+ for (i=0; i<n; i++) {
414
+ sprintf(buf,\"%.5g\",(double)*p2);
415
+ *p1 = rb_str_new2(buf);
416
+ }"] +
417
+ ["char buf[24];
418
+ int i;
419
+ #pragma omp parallel for
420
+ for (i=0; i<n; i++) {
421
+ sprintf(buf,\"%.8g\",(double)*p2);
422
+ *p1 = rb_str_new2(buf);
423
+ }"] +
424
+ ["char buf[50];
425
+ int i;
426
+ #pragma omp parallel for
427
+ for (i=0; i<n; i++) {
428
+ sprintf(buf,\"%.5g%+.5gi\",(double)p2.r,(double)p2.i);
429
+ *p1 = rb_str_new2(buf);
430
+ }"] +
431
+ ["char buf[50];
432
+ int i;
433
+ #pragma omp parallel for
434
+ for (i=0; i<n; i++) {
435
+ sprintf(buf,\"%.8g%+.8gi\",(double)p2.r,(double)p2.i);
436
+ *p1 = rb_str_new2(buf);
437
+ }"] +
438
+ ["int i;
439
+ #pragma omp parallel for
440
+ for (i=0; i<n; i++) {
441
+ *p1 = rb_obj_as_string(*p2);
442
+ }"]
443
+ )
444
+
445
+
446
+ print <<EOM
447
+
448
+ /* from numeric.c */
449
+ static void na_str_append_fp(char *buf)
450
+ {
451
+ if (buf[0]=='-' || buf[0]=='+') ++buf;
452
+ if (ISALPHA(buf[0])) return; /* NaN or Inf */
453
+ if (strchr(buf, '.') == 0) {
454
+ int len = strlen(buf);
455
+ char *ind = strchr(buf, 'e');
456
+ if (ind) {
457
+ memmove(ind+2, ind, len-(ind-buf)+1);
458
+ ind[0] = '.';
459
+ ind[1] = '0';
460
+ } else {
461
+ strcat(buf, ".0");
462
+ }
463
+ }
464
+ }
465
+ EOM
466
+
467
+ $func_body =
468
+ "static void #name#C(char *p1, char *p2)
469
+ {
470
+ OPERATION
471
+ }
472
+ "
473
+ mkfuncs('Insp',['']+[$data_types[9]]*9,$data_types,
474
+ [nil] +
475
+ ["char buf[22];
476
+ sprintf(buf,\"%i\",(int)*p2);
477
+ *p1 = rb_str_new2(buf);"]*4 +
478
+ ["char buf[24];
479
+ sprintf(buf,\"%g\",(double)*p2);
480
+ na_str_append_fp(buf);
481
+ *p1 = rb_str_new2(buf);"] +
482
+ ["char buf[24];
483
+ sprintf(buf,\"%g\",(double)*p2);
484
+ na_str_append_fp(buf);
485
+ *p1 = rb_str_new2(buf);"] +
486
+ ["char buf[50], *b;
487
+ sprintf(buf,\"%g\",(double)p2.r);
488
+ na_str_append_fp(buf);
489
+ b = buf+strlen(buf);
490
+ sprintf(b,\"%+g\",(double)p2.i);
491
+ na_str_append_fp(b);
492
+ strcat(buf,\"i\");
493
+ *p1 = rb_str_new2(buf);"] +
494
+ ["char buf[50], *b;
495
+ sprintf(buf,\"%g\",(double)p2.r);
496
+ na_str_append_fp(buf);
497
+ b = buf+strlen(buf);
498
+ sprintf(b,\"%+g\",(double)p2.i);
499
+ na_str_append_fp(b);
500
+ strcat(buf,\"i\");
501
+ *p1 = rb_str_new2(buf);"] +
502
+ ["*p1 = rb_inspect(*p2);"]
503
+ )
504
+
505
+
506
+
507
+ #
508
+ # Binary Funcs
509
+ #
510
+
511
+ =begin
512
+ # Optimize experiment
513
+ $func_body =
514
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
515
+ {
516
+ na_shape_t i;
517
+ if (i1==sizeof(type1) && i2==sizeof(type1) && i3==sizeof(type1)) {
518
+ type1 *a1=p1, *a2=p2, *a3=p3;
519
+ for (i=0; n; --n,++i) {
520
+ *a1 = *a2 * *a3; +++a1;++a2;++a3;
521
+ }
522
+ } else
523
+ for (; n; --n) {
524
+ OPERATION
525
+ p1+=i1; p2+=i2; p3+=i3;
526
+ }
527
+ }
528
+ "
529
+ mkfuncs('MulB', $data_types, $data_types,
530
+ [nil] +
531
+ ["*p1 = *p2 * *p3;"]*5 + [nil]*3
532
+ )
533
+ =end
534
+
535
+ $func_body =
536
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
537
+ {
538
+ int i;
539
+ #pragma omp parallel for
540
+ for (i=0; i<n; i++) {
541
+ OPERATION
542
+ }
543
+ }
544
+ "
545
+
546
+ mkfuncs('AddB', $data_types, $data_types,
547
+ [nil] +
548
+ ["*p1 = *p2 + *p3;"]*6 +
549
+ ["p1.r = p2.r + p3.r;
550
+ p1.i = p2.i + p3.i;"]*2 +
551
+ ["*p1 = rb_funcall(*p2,'+',1,*p3);"]
552
+ )
553
+
554
+ mkfuncs('SbtB', $data_types, $data_types,
555
+ [nil] +
556
+ ["*p1 = *p2 - *p3;"]*6 +
557
+ ["p1.r = p2.r - p3.r;
558
+ p1.i = p2.i - p3.i;"]*2 +
559
+ ["*p1 = rb_funcall(*p2,'-',1,*p3);"]
560
+ )
561
+
562
+ mkfuncs('MulB', $data_types, $data_types,
563
+ [nil] +
564
+ ["*p1 = *p2 * *p3;"]*6 +
565
+ ["type1 x = *p2;
566
+ p1.r = x.r*p3.r - x.i*p3.i;
567
+ p1.i = x.r*p3.i + x.i*p3.r;"]*2 +
568
+ ["*p1 = rb_funcall(*p2,'*',1,*p3);"]
569
+ )
570
+
571
+ mkfuncs('DivB', $data_types, $data_types,
572
+ [nil] +
573
+ ["if (*p3==0) {na_zerodiv();};
574
+ *p1 = *p2 / *p3;"]*4 +
575
+ ["*p1 = *p2 / *p3;"]*2 +
576
+ ["type1 x = *p2;
577
+ typef a = p3.r*p3.r + p3.i*p3.i;
578
+ p1.r = (x.r*p3.r + x.i*p3.i)/a;
579
+ p1.i = (x.i*p3.r - x.r*p3.i)/a;"]*2 +
580
+ ["*p1 = rb_funcall(*p2,'/',1,*p3);"]
581
+ )
582
+
583
+ mkfuncs('ModB', $data_types, $data_types,
584
+ [nil] +
585
+ ["if (*p3==0) {na_zerodiv();};
586
+ *p1 = *p2 % *p3;"]*4 +
587
+ ["*p1 = fmod(*p2, *p3);"]*2 +
588
+ [nil]*2 +
589
+ ["*p1 = rb_funcall(*p2,'%',1,*p3);"]
590
+ )
591
+
592
+
593
+ $func_body =
594
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
595
+ {
596
+ int i;
597
+ if (i1 != 0) {
598
+ #pragma omp parallel for
599
+ for (i=0; i<n; i++) {
600
+ OPERATION
601
+ }
602
+ } else {
603
+ // #pragma omp parallel for reduction(+:p1)
604
+ for (i=0; i<n; i++) {
605
+ OPERATION
606
+ }
607
+ }
608
+ }
609
+ "
610
+
611
+ mkfuncs('MulAdd', $data_types, $data_types,
612
+ [nil] +
613
+ ["*p1 += *p2 * *p3;"]*6 +
614
+ ["type1 x = *p2;
615
+ p1.r += x.r*p3.r - x.i*p3.i;
616
+ p1.i += x.r*p3.i + x.i*p3.r;"]*2 +
617
+ ["*p1 = rb_funcall(*p1,'+',1,
618
+ rb_funcall(*p2,'*',1,*p3));"]
619
+ )
620
+
621
+ mkfuncs('MulSbt', $data_types, $data_types,
622
+ [nil] +
623
+ ["*p1 -= *p2 * *p3;"]*6 +
624
+ ["type1 x = *p2;
625
+ p1.r -= x.r*p3.r - x.i*p3.i;
626
+ p1.i -= x.r*p3.i + x.i*p3.r;"]*2 +
627
+ ["*p1 = rb_funcall(*p1,'-',1,
628
+ rb_funcall(*p2,'*',1,*p3));"]
629
+ )
630
+
631
+
632
+ #
633
+ # Bit operator
634
+ #
635
+
636
+ mkfuncs('BAn', $data_types, $data_types,
637
+ [nil] +
638
+ ["*p1 = *p2 & *p3;"]*4 +
639
+ [nil]*4 +
640
+ ["*p1 = rb_funcall(*p2,'&',1,*p3);"]
641
+ )
642
+
643
+ mkfuncs('BOr', $data_types, $data_types,
644
+ [nil] +
645
+ ["*p1 = *p2 | *p3;"]*4 +
646
+ [nil]*4 +
647
+ ["*p1 = rb_funcall(*p2,'|',1,*p3);"]
648
+ )
649
+
650
+ mkfuncs('BXo', $data_types, $data_types,
651
+ [nil] +
652
+ ["*p1 = *p2 ^ *p3;"]*4 +
653
+ [nil]*4 +
654
+ ["*p1 = rb_funcall(*p2,'^',1,*p3);"]
655
+ )
656
+
657
+
658
+ #
659
+ # Comparison
660
+ #
661
+
662
+ mkfuncs('Eql', [$data_types[1]]*10, $data_types,
663
+ [nil] +
664
+ ["*p1 = (*p2==*p3) ? 1:0;"]*6 +
665
+ ["*p1 = (p2.r==p3.r) && (p2.i==p3.i) ? 1:0;"]*2 +
666
+ ["*p1 = RTEST(rb_equal(*p2, *p3)) ? 1:0;"]
667
+ )
668
+
669
+ mkfuncs('Cmp', [$data_types[1]]*10, $data_types,
670
+ [nil] +
671
+ ["if (*p2>*p3) *p1=1;
672
+ else if (*p2<*p3) *p1=2;
673
+ else *p1=0;"]*6 +
674
+ [nil]*2 +
675
+ ["int v = NUM2INT(rb_funcall(*p2,na_id_compare,1,*p3));
676
+ if (v>0) *p1=1; else if (v<0) *p1=2; else *p1=0;"]
677
+ )
678
+
679
+ mkfuncs('And', [$data_types[1]]*10, $data_types,
680
+ [nil] +
681
+ ["*p1 = (*p2!=0 && *p3!=0) ? 1:0;"]*6 +
682
+ ["*p1 = ((p2.r!=0||p2.i!=0) && (p3.r!=0||p3.i!=0)) ? 1:0;"]*2 +
683
+ ["*p1 = (RTEST(*p2) && RTEST(*p3)) ? 1:0;"]
684
+ )
685
+
686
+ mkfuncs('Or_', [$data_types[1]]*10, $data_types,
687
+ [nil] +
688
+ ["*p1 = (*p2!=0 || *p3!=0) ? 1:0;"]*6 +
689
+ ["*p1 = ((p2.r!=0||p2.i!=0) || (p3.r!=0||p3.i!=0)) ? 1:0;"]*2 +
690
+ ["*p1 = (RTEST(*p2) || RTEST(*p3)) ? 1:0;"]
691
+ )
692
+
693
+ mkfuncs('Xor', [$data_types[1]]*10, $data_types,
694
+ [nil] +
695
+ ["*p1 = ((*p2!=0) == (*p3!=0)) ? 0:1;"]*6 +
696
+ ["*p1 = ((p2.r!=0||p2.i!=0) == (p3.r!=0||p3.i!=0)) ? 0:1;"]*2 +
697
+ ["*p1 = (RTEST(*p2) == RTEST(*p3)) ? 0:1;"]
698
+ )
699
+
700
+
701
+ #
702
+ # Atan2
703
+ #
704
+
705
+ mkfuncs('atan2', $data_types, $data_types,
706
+ [nil]*5 +
707
+ ["*p1 = atan2(*p2, *p3);"]*2 +
708
+ [nil]*3
709
+ )
710
+
711
+
712
+ #
713
+ # Mask
714
+ #
715
+ $func_body =
716
+ "static void #name#C(na_shape_t n, char *p1, na_shape_t i1, char *p2, na_shape_t i2, char *p3, na_shape_t i3)
717
+ {
718
+ int i, j=0;
719
+ for (i=0; i<n; i++) {
720
+ OPERATION
721
+ }
722
+ }
723
+ "
724
+ mkfuncs('RefMask',$data_types,$data_types,
725
+ [nil] +
726
+ ["if (*(u_int8_t*)p3) { *p1=*p2; j+=1; }"]*9,
727
+ ["j","i","i"]
728
+ )
729
+
730
+ mkfuncs('SetMask',$data_types,$data_types,
731
+ [nil] +
732
+ ["if (*(u_int8_t*)p3) { *p1=*p2; j+=1; };"]*9,
733
+ ["i","j","i"]
734
+ )