numo-narray 0.9.1.2 → 0.9.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +7 -1
  3. data/ext/numo/narray/array.c +6 -6
  4. data/ext/numo/narray/data.c +8 -8
  5. data/ext/numo/narray/depend.erb +4 -4
  6. data/ext/numo/narray/extconf.rb +2 -2
  7. data/ext/numo/narray/gen/cogen.rb +13 -0
  8. data/ext/numo/narray/gen/def/dfloat.rb +1 -0
  9. data/ext/numo/narray/gen/def/sfloat.rb +1 -0
  10. data/ext/numo/narray/gen/narray_def.rb +14 -2
  11. data/ext/numo/narray/gen/spec.rb +26 -10
  12. data/ext/numo/narray/gen/tmpl/accum_binary.c +1 -1
  13. data/ext/numo/narray/gen/tmpl/accum_index.c +11 -1
  14. data/ext/numo/narray/gen/tmpl/alloc_func.c +3 -3
  15. data/ext/numo/narray/gen/tmpl/binary.c +149 -10
  16. data/ext/numo/narray/gen/tmpl/binary2.c +1 -1
  17. data/ext/numo/narray/gen/tmpl/bincount.c +1 -1
  18. data/ext/numo/narray/gen/tmpl/cast.c +1 -1
  19. data/ext/numo/narray/gen/tmpl/cond_binary.c +1 -1
  20. data/ext/numo/narray/gen/tmpl/each.c +1 -1
  21. data/ext/numo/narray/gen/tmpl/each_with_index.c +1 -1
  22. data/ext/numo/narray/gen/tmpl/extract_data.c +3 -3
  23. data/ext/numo/narray/gen/tmpl/inspect.c +1 -1
  24. data/ext/numo/narray/gen/tmpl/lib.c +5 -0
  25. data/ext/numo/narray/gen/tmpl/map_with_index.c +1 -1
  26. data/ext/numo/narray/gen/tmpl/median.c +3 -2
  27. data/ext/numo/narray/gen/tmpl/pow.c +1 -1
  28. data/ext/numo/narray/gen/tmpl/qsort.c +118 -56
  29. data/ext/numo/narray/gen/tmpl/store.c +4 -4
  30. data/ext/numo/narray/gen/tmpl/store_bit.c +4 -4
  31. data/ext/numo/narray/gen/tmpl/to_a.c +1 -1
  32. data/ext/numo/narray/gen/tmpl/unary_s.c +55 -9
  33. data/ext/numo/narray/gen/tmpl_bit/each.c +1 -1
  34. data/ext/numo/narray/gen/tmpl_bit/each_with_index.c +1 -1
  35. data/ext/numo/narray/gen/tmpl_bit/inspect.c +1 -1
  36. data/ext/numo/narray/gen/tmpl_bit/mask.c +1 -1
  37. data/ext/numo/narray/gen/tmpl_bit/to_a.c +1 -1
  38. data/ext/numo/narray/index.c +64 -37
  39. data/ext/numo/narray/math.c +4 -4
  40. data/ext/numo/narray/narray.c +54 -29
  41. data/ext/numo/narray/ndloop.c +7 -7
  42. data/ext/numo/narray/numo/narray.h +9 -2
  43. data/ext/numo/narray/numo/template.h +18 -0
  44. data/ext/numo/narray/numo/types/bit.h +5 -0
  45. data/ext/numo/narray/numo/types/complex_macro.h +5 -0
  46. data/ext/numo/narray/numo/types/float_macro.h +5 -0
  47. data/ext/numo/narray/numo/types/int_macro.h +24 -0
  48. data/ext/numo/narray/numo/types/robj_macro.h +5 -0
  49. data/ext/numo/narray/numo/types/uint_macro.h +24 -0
  50. data/ext/numo/narray/numo/types/xint_macro.h +5 -25
  51. data/ext/numo/narray/rand.c +2 -29
  52. data/ext/numo/narray/step.c +1 -28
  53. data/ext/numo/narray/struct.c +26 -22
  54. data/lib/numo/narray/extra.rb +50 -1
  55. metadata +2 -2
@@ -48,7 +48,7 @@ static VALUE
48
48
  return <%=c_func%>_self(self, other);
49
49
  <% else %>
50
50
  VALUE klass, v;
51
- klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
51
+ klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
52
52
  if (klass==cT) {
53
53
  return <%=c_func%>_self(self, other);
54
54
  } else {
@@ -170,7 +170,7 @@ static VALUE
170
170
  return <%=c_func%>_32(self, length);
171
171
  }
172
172
  } else {
173
- wclass = CLASS_OF(weight);
173
+ wclass = rb_obj_class(weight);
174
174
  if (wclass == numo_cSFloat) {
175
175
  return <%=c_func%>_sf(self, weight, length);
176
176
  } else {
@@ -17,7 +17,7 @@ static VALUE
17
17
  narray_t *na;
18
18
  dtype x;
19
19
 
20
- if (CLASS_OF(obj)==cT) {
20
+ if (rb_obj_class(obj)==cT) {
21
21
  return obj;
22
22
  }
23
23
  if (RTEST(rb_obj_is_kind_of(obj,rb_cNumeric))) {
@@ -44,7 +44,7 @@ static VALUE
44
44
  return <%=c_func%>_self(self, other);
45
45
  <% else %>
46
46
  VALUE klass, v;
47
- klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
47
+ klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
48
48
  if (klass==cT) {
49
49
  return <%=c_func%>_self(self, other);
50
50
  } else {
@@ -1,4 +1,4 @@
1
- void
1
+ static void
2
2
  <%=c_iter%>(na_loop_t *const lp)
3
3
  {
4
4
  size_t i, s1;
@@ -11,7 +11,7 @@ yield_each_with_index(dtype x, size_t *c, VALUE *a, int nd, int md)
11
11
  }
12
12
 
13
13
 
14
- void
14
+ static void
15
15
  <%=c_iter%>(na_loop_t *const lp)
16
16
  {
17
17
  size_t i, s1;
@@ -15,7 +15,7 @@ static dtype
15
15
  if (na->size != 1) {
16
16
  rb_raise(nary_eShapeError,"narray size should be 1");
17
17
  }
18
- klass = CLASS_OF(obj);
18
+ klass = rb_obj_class(obj);
19
19
  ptr = na_get_pointer_for_read(obj);
20
20
  pos = na_get_offset(obj);
21
21
  <% find_tmpl("store").definitions.select{|x| x.class==Store}.each do |x| %>
@@ -27,14 +27,14 @@ static dtype
27
27
 
28
28
  // coerce
29
29
  r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
30
- if (CLASS_OF(r)==cT) {
30
+ if (rb_obj_class(r)==cT) {
31
31
  return <%=c_func%>(r);
32
32
  }
33
33
  <% if is_object %>
34
34
  return obj;
35
35
  <% else %>
36
36
  rb_raise(nary_eCastError, "unknown conversion from %s to %s",
37
- rb_class2name(CLASS_OF(obj)),
37
+ rb_class2name(rb_obj_class(obj)),
38
38
  rb_class2name(cT));
39
39
  <% end %>
40
40
  }
@@ -13,7 +13,7 @@ static VALUE
13
13
  @overload inspect
14
14
  @return [String]
15
15
  */
16
- VALUE
16
+ static VALUE
17
17
  <%=c_func(0)%>(VALUE ary)
18
18
  {
19
19
  return na_ndloop_inspect(ary, <%=c_iter%>, Qnil);
@@ -14,6 +14,11 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
+ <% if is_simd %>
18
+ #include <emmintrin.h>
19
+ #define SIMD_ALIGNMENT_SIZE 16
20
+ <% end %>
21
+
17
22
  <% id_decl.each do |x| %>
18
23
  <%= x %>
19
24
  <% end %>
@@ -12,7 +12,7 @@ yield_map_with_index(dtype x, size_t *c, VALUE *a, int nd, int md)
12
12
  return m_num_to_data(y);
13
13
  }
14
14
 
15
- void
15
+ static void
16
16
  <%=c_iter%>(na_loop_t *const lp)
17
17
  {
18
18
  size_t i;
@@ -47,7 +47,7 @@ static void
47
47
  static VALUE
48
48
  <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
49
49
  {
50
- VALUE reduce;
50
+ VALUE v, reduce;
51
51
  ndfunc_arg_in_t ain[2] = {{OVERWRITE,0},{sym_reduce,0}};
52
52
  ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
53
53
  ndfunc_t ndf = {0, NDF_HAS_LOOP|NDF_FLAT_REDUCE, 2,1, ain,aout};
@@ -60,5 +60,6 @@ static VALUE
60
60
  ndf.func = <%=c_iter%>;
61
61
  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
62
62
  <% end %>
63
- return na_ndloop(&ndf, 2, self, reduce);
63
+ v = na_ndloop(&ndf, 2, self, reduce);
64
+ return <%=type_name%>_extract(v);
64
65
  }
@@ -67,7 +67,7 @@ static VALUE
67
67
  return <%=c_func%>_self(self,other);
68
68
  <% else %>
69
69
  VALUE klass, v;
70
- klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
70
+ klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
71
71
  if (klass==cT) {
72
72
  return <%=c_func%>_self(self,other);
73
73
  } else {
@@ -53,12 +53,52 @@
53
53
  #define QSORT_INCL
54
54
  #define Min(x, y) ((x) < (y) ? (x) : (y))
55
55
 
56
- #define swap(type,a,b) \
57
- do {type tmp=*(type*)(a); *(type*)(a)=*(type*)(b); *(type*)(b)=tmp;} while(0)
56
+ /*
57
+ * Qsort routine based on J. L. Bentley and M. D. McIlroy,
58
+ * "Engineering a sort function",
59
+ * Software--Practice and Experience 23 (1993) 1249-1265.
60
+ * We have modified their original by adding a check for already-sorted input,
61
+ * which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
62
+ */
63
+ #define swapcode(TYPE, parmi, parmj, n) \
64
+ do { \
65
+ size_t i = (n) / sizeof (TYPE); \
66
+ TYPE *pi = (TYPE *)(void *)(parmi); \
67
+ TYPE *pj = (TYPE *)(void *)(parmj); \
68
+ do { \
69
+ TYPE t = *pi; \
70
+ *pi++ = *pj; \
71
+ *pj++ = t; \
72
+ } while (--i > 0); \
73
+ } while (0)
74
+
75
+ #define SWAPINIT(a, es) swaptype = ((char *)(a) - (char *)0) % sizeof(long) || \
76
+ (es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1;
77
+
78
+ static inline void
79
+ swapfunc(a, b, n, swaptype)
80
+ char *a,
81
+ *b;
82
+ size_t n;
83
+ int swaptype;
84
+ {
85
+ if (swaptype <= 1)
86
+ swapcode(long, a, b, n);
87
+ else
88
+ swapcode(char, a, b, n);
89
+ }
58
90
 
59
- #define vecswap(type, a, b, n) if ((n)>0) swap(type,(a),(b))
91
+ #define swap(a, b) \
92
+ if (swaptype == 0) { \
93
+ long t = *(long *)(void *)(a); \
94
+ *(long *)(void *)(a) = *(long *)(void *)(b); \
95
+ *(long *)(void *)(b) = t; \
96
+ } else \
97
+ swapfunc(a, b, es, swaptype)
60
98
 
61
- #define MED3(a,b,c) \
99
+ #define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype)
100
+
101
+ #define med3(a,b,c,_cmp) \
62
102
  (cmpgt(b,a) ? \
63
103
  (cmpgt(c,b) ? b : (cmpgt(c,a) ? c : a)) \
64
104
  : (cmpgt(b,c) ? b : (cmpgt(c,a) ? a : c)))
@@ -76,75 +116,97 @@
76
116
  <% end %>
77
117
  <% c_func(:nodef)%>
78
118
 
79
- void
119
+ static void
80
120
  <%=type_name%>_qsort<%=suffix%>(void *a, size_t n, ssize_t es)
81
121
  {
82
- char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
83
- int d, r, presorted;
122
+ char *pa,
123
+ *pb,
124
+ *pc,
125
+ *pd,
126
+ *pl,
127
+ *pm,
128
+ *pn;
129
+ int d,
130
+ r,
131
+ swaptype,
132
+ presorted;
84
133
 
85
- loop:
86
- if (n < 7) {
87
- for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
88
- for (pl = pm; pl > (char *) a && cmpgt(pl - es, pl);
89
- pl -= es)
90
- swap(qsort_dtype, pl, pl - es);
91
- return;
92
- }
134
+ loop:SWAPINIT(a, es);
135
+ if (n < 7)
136
+ {
137
+ for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
138
+ for (pl = pm; pl > (char *) a && cmpgt(pl - es, pl);
139
+ pl -= es)
140
+ swap(pl, pl - es);
141
+ return;
142
+ }
93
143
  presorted = 1;
94
- for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) {
95
- if (cmpgt(pm - es, pm)) {
96
- presorted = 0;
97
- break;
144
+ for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
145
+ {
146
+ if (cmpgt(pm - es, pm))
147
+ {
148
+ presorted = 0;
149
+ break;
150
+ }
98
151
  }
99
- }
100
152
  if (presorted)
101
153
  return;
102
154
  pm = (char *) a + (n / 2) * es;
103
- if (n > 7) {
104
- pl = (char *) a;
105
- pn = (char *) a + (n - 1) * es;
106
- if (n > 40) {
107
- d = (n / 8) * es;
108
- pl = MED3(pl, pl + d, pl + 2 * d);
109
- pm = MED3(pm - d, pm, pm + d);
110
- pn = MED3(pn - 2 * d, pn - d, pn);
155
+ if (n > 7)
156
+ {
157
+ pl = (char *) a;
158
+ pn = (char *) a + (n - 1) * es;
159
+ if (n > 40)
160
+ {
161
+ d = (n / 8) * es;
162
+ pl = med3(pl, pl + d, pl + 2 * d, cmp);
163
+ pm = med3(pm - d, pm, pm + d, cmp);
164
+ pn = med3(pn - 2 * d, pn - d, pn, cmp);
165
+ }
166
+ pm = med3(pl, pm, pn, cmp);
111
167
  }
112
- pm = MED3(pl, pm, pn);
113
- }
114
- swap(qsort_dtype, a, pm);
168
+ swap(a, pm);
115
169
  pa = pb = (char *) a + es;
116
170
  pc = pd = (char *) a + (n - 1) * es;
117
- for (;;) {
118
- while (pb <= pc && (r = cmp(pb, a)) <= 0) {
119
- if (r == 0) {
120
- swap(qsort_dtype, pa, pb);
121
- pa += es;
122
- }
171
+ for (;;)
172
+ {
173
+ while (pb <= pc && (r = cmp(pb, a)) <= 0)
174
+ {
175
+ if (r == 0)
176
+ {
177
+ swap(pa, pb);
178
+ pa += es;
179
+ }
180
+ pb += es;
181
+ }
182
+ while (pb <= pc && (r = cmp(pc, a)) >= 0)
183
+ {
184
+ if (r == 0)
185
+ {
186
+ swap(pc, pd);
187
+ pd -= es;
188
+ }
189
+ pc -= es;
190
+ }
191
+ if (pb > pc)
192
+ break;
193
+ swap(pb, pc);
123
194
  pb += es;
124
- }
125
- while (pb <= pc && (r = cmp(pc, a)) >= 0) {
126
- if (r == 0) {
127
- swap(qsort_dtype, pc, pd);
128
- pd -= es;
129
- }
130
195
  pc -= es;
131
196
  }
132
- if (pb > pc)
133
- break;
134
- swap(qsort_dtype, pb, pc);
135
- pb += es;
136
- pc -= es;
137
- }
138
197
  pn = (char *) a + n * es;
139
198
  r = Min(pa - (char *) a, pb - pa);
140
- vecswap(qsort_dtype, a, pb - r, r);
199
+ vecswap(a, pb - r, r);
141
200
  r = Min(pd - pc, pn - pd - es);
142
- vecswap(qsort_dtype, pb, pn - r, r);
201
+ vecswap(pb, pn - r, r);
143
202
  if ((r = pb - pa) > es)
144
203
  <%=type_name%>_qsort<%=suffix%>(a, r / es, es);
145
- if ((r = pd - pc) > es) {
146
- a = pn - r;
147
- n = r / es;
148
- goto loop;
149
- }
204
+ if ((r = pd - pc) > es)
205
+ {
206
+ /* Iterate rather than recurse to save stack space */
207
+ a = pn - r;
208
+ n = r / es;
209
+ goto loop;
210
+ }
211
+ /* qsort(pn - r, r / es, es, cmp);*/
150
212
  }
@@ -13,7 +13,7 @@ static VALUE
13
13
  {
14
14
  VALUE r, klass;
15
15
 
16
- klass = CLASS_OF(obj);
16
+ klass = rb_obj_class(obj);
17
17
 
18
18
  <% definitions.each do |x| %>
19
19
  if (<%=x.condition("klass")%>) {
@@ -24,7 +24,7 @@ static VALUE
24
24
 
25
25
  if (IsNArray(obj)) {
26
26
  r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
27
- if (CLASS_OF(r)==cT) {
27
+ if (rb_obj_class(r)==cT) {
28
28
  <%=c_func%>(self,r);
29
29
  return self;
30
30
  }
@@ -34,8 +34,8 @@ static VALUE
34
34
  robject_store_numeric(self,obj);
35
35
  <% else %>
36
36
  rb_raise(nary_eCastError, "unknown conversion from %s to %s",
37
- rb_class2name(CLASS_OF(obj)),
38
- rb_class2name(CLASS_OF(self)));
37
+ rb_class2name(rb_obj_class(obj)),
38
+ rb_class2name(rb_obj_class(self)));
39
39
  <% end %>
40
40
  return self;
41
41
  }
@@ -16,13 +16,13 @@ static void
16
16
  if (idx1) {
17
17
  for (; i--;) {
18
18
  LOAD_BIT(a2, p2+*idx2, x); idx2++;
19
- y = m_from_real(x);
19
+ y = m_from_sint(x);
20
20
  SET_DATA_INDEX(p1,idx1,dtype,y);
21
21
  }
22
22
  } else {
23
23
  for (; i--;) {
24
24
  LOAD_BIT(a2, p2+*idx2, x); idx2++;
25
- y = m_from_real(x);
25
+ y = m_from_sint(x);
26
26
  SET_DATA_STRIDE(p1,s1,dtype,y);
27
27
  }
28
28
  }
@@ -30,13 +30,13 @@ static void
30
30
  if (idx1) {
31
31
  for (; i--;) {
32
32
  LOAD_BIT(a2, p2, x); p2 += s2;
33
- y = m_from_real(x);
33
+ y = m_from_sint(x);
34
34
  SET_DATA_INDEX(p1,idx1,dtype,y);
35
35
  }
36
36
  } else {
37
37
  for (; i--;) {
38
38
  LOAD_BIT(a2, p2, x); p2 += s2;
39
- y = m_from_real(x);
39
+ y = m_from_sint(x);
40
40
  SET_DATA_STRIDE(p1,s1,dtype,y);
41
41
  }
42
42
  }
@@ -1,4 +1,4 @@
1
- void
1
+ static void
2
2
  <%=c_iter%>(na_loop_t *const lp)
3
3
  {
4
4
  size_t i, s1;
@@ -1,23 +1,36 @@
1
1
  static void
2
2
  <%=c_iter%>(na_loop_t *const lp)
3
3
  {
4
- size_t i;
4
+ size_t i=0, n;
5
5
  char *p1, *p2;
6
6
  ssize_t s1, s2;
7
7
  size_t *idx1, *idx2;
8
8
  dtype x;
9
- INIT_COUNTER(lp, i);
9
+
10
+ <% if is_simd and !is_complex and %w[sqrt].include? name %>
11
+ size_t cnt;
12
+ size_t cnt_simd_loop = -1;
13
+ <% if is_double_precision %>
14
+ __m128d a;
15
+ <% else %>
16
+ __m128 a;
17
+ <% end %>
18
+ size_t num_pack; // Number of elements packed for SIMD.
19
+ num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
20
+ <% end %>
21
+ INIT_COUNTER(lp, n);
10
22
  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
11
23
  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
24
+
12
25
  if (idx1) {
13
26
  if (idx2) {
14
- for (; i--;) {
27
+ for (i=0; i<n; i++) {
15
28
  GET_DATA_INDEX(p1,idx1,dtype,x);
16
29
  x = m_<%=name%>(x);
17
30
  SET_DATA_INDEX(p2,idx2,dtype,x);
18
31
  }
19
32
  } else {
20
- for (; i--;) {
33
+ for (i=0; i<n; i++) {
21
34
  GET_DATA_INDEX(p1,idx1,dtype,x);
22
35
  x = m_<%=name%>(x);
23
36
  SET_DATA_STRIDE(p2,s2,dtype,x);
@@ -25,17 +38,50 @@ static void
25
38
  }
26
39
  } else {
27
40
  if (idx2) {
28
- for (; i--;) {
41
+ for (i=0; i<n; i++) {
29
42
  GET_DATA_STRIDE(p1,s1,dtype,x);
30
43
  x = m_<%=name%>(x);
31
44
  SET_DATA_INDEX(p2,idx2,dtype,x);
32
45
  }
33
46
  } else {
34
- for (; i--;) {
35
- GET_DATA_STRIDE(p1,s1,dtype,x);
36
- x = m_<%=name%>(x);
37
- SET_DATA_STRIDE(p2,s2,dtype,x);
47
+ <% if is_simd and !is_complex and %w[sqrt].include? name %>
48
+ // Check number of elements. & Check same alignment.
49
+ if ((n >= num_pack) && is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p2)[i], SIMD_ALIGNMENT_SIZE)){
50
+ // Calculate up to the position just before the start of SIMD computation.
51
+ cnt = get_count_of_elements_not_aligned_to_simd_size(&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype));
52
+ for (i=0; i < cnt; i++) {
53
+ ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
54
+ }
55
+
56
+ // Get the count of SIMD computation loops.
57
+ cnt_simd_loop = (n - i) % num_pack;
58
+
59
+ // SIMD computation.
60
+ if (p1 == p2) { // inplace case
61
+ for(; i < n - cnt_simd_loop; i += num_pack){
62
+ a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
63
+ a = _mm_<%=name%>_<%=simd_type%>(a);
64
+ _mm_store_<%=simd_type%>(&((dtype*)p1)[i], a);
65
+ }
66
+ } else {
67
+ for(; i < n - cnt_simd_loop; i += num_pack){
68
+ a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
69
+ a = _mm_<%=name%>_<%=simd_type%>(a);
70
+ _mm_stream_<%=simd_type%>(&((dtype*)p2)[i], a);
71
+ }
72
+ }
73
+
74
+ }
75
+ // Compute the remainder of the SIMD operation.
76
+ if (cnt_simd_loop != 0){
77
+ <% end %>
78
+ for (; i<n; i++) {
79
+ ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
80
+ }
81
+ <% if is_simd and !is_complex and %w[sqrt].include? name %>
38
82
  }
83
+ <% end %>
84
+ return;
39
85
  }
40
86
  }
41
87
  }