numo-narray 0.9.1.2 → 0.9.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +7 -1
- data/ext/numo/narray/array.c +6 -6
- data/ext/numo/narray/data.c +8 -8
- data/ext/numo/narray/depend.erb +4 -4
- data/ext/numo/narray/extconf.rb +2 -2
- data/ext/numo/narray/gen/cogen.rb +13 -0
- data/ext/numo/narray/gen/def/dfloat.rb +1 -0
- data/ext/numo/narray/gen/def/sfloat.rb +1 -0
- data/ext/numo/narray/gen/narray_def.rb +14 -2
- data/ext/numo/narray/gen/spec.rb +26 -10
- data/ext/numo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/numo/narray/gen/tmpl/accum_index.c +11 -1
- data/ext/numo/narray/gen/tmpl/alloc_func.c +3 -3
- data/ext/numo/narray/gen/tmpl/binary.c +149 -10
- data/ext/numo/narray/gen/tmpl/binary2.c +1 -1
- data/ext/numo/narray/gen/tmpl/bincount.c +1 -1
- data/ext/numo/narray/gen/tmpl/cast.c +1 -1
- data/ext/numo/narray/gen/tmpl/cond_binary.c +1 -1
- data/ext/numo/narray/gen/tmpl/each.c +1 -1
- data/ext/numo/narray/gen/tmpl/each_with_index.c +1 -1
- data/ext/numo/narray/gen/tmpl/extract_data.c +3 -3
- data/ext/numo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/numo/narray/gen/tmpl/lib.c +5 -0
- data/ext/numo/narray/gen/tmpl/map_with_index.c +1 -1
- data/ext/numo/narray/gen/tmpl/median.c +3 -2
- data/ext/numo/narray/gen/tmpl/pow.c +1 -1
- data/ext/numo/narray/gen/tmpl/qsort.c +118 -56
- data/ext/numo/narray/gen/tmpl/store.c +4 -4
- data/ext/numo/narray/gen/tmpl/store_bit.c +4 -4
- data/ext/numo/narray/gen/tmpl/to_a.c +1 -1
- data/ext/numo/narray/gen/tmpl/unary_s.c +55 -9
- data/ext/numo/narray/gen/tmpl_bit/each.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/each_with_index.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/inspect.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/mask.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/to_a.c +1 -1
- data/ext/numo/narray/index.c +64 -37
- data/ext/numo/narray/math.c +4 -4
- data/ext/numo/narray/narray.c +54 -29
- data/ext/numo/narray/ndloop.c +7 -7
- data/ext/numo/narray/numo/narray.h +9 -2
- data/ext/numo/narray/numo/template.h +18 -0
- data/ext/numo/narray/numo/types/bit.h +5 -0
- data/ext/numo/narray/numo/types/complex_macro.h +5 -0
- data/ext/numo/narray/numo/types/float_macro.h +5 -0
- data/ext/numo/narray/numo/types/int_macro.h +24 -0
- data/ext/numo/narray/numo/types/robj_macro.h +5 -0
- data/ext/numo/narray/numo/types/uint_macro.h +24 -0
- data/ext/numo/narray/numo/types/xint_macro.h +5 -25
- data/ext/numo/narray/rand.c +2 -29
- data/ext/numo/narray/step.c +1 -28
- data/ext/numo/narray/struct.c +26 -22
- data/lib/numo/narray/extra.rb +50 -1
- metadata +2 -2
@@ -48,7 +48,7 @@ static VALUE
|
|
48
48
|
return <%=c_func%>_self(self, other);
|
49
49
|
<% else %>
|
50
50
|
VALUE klass, v;
|
51
|
-
klass = na_upcast(
|
51
|
+
klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
|
52
52
|
if (klass==cT) {
|
53
53
|
return <%=c_func%>_self(self, other);
|
54
54
|
} else {
|
@@ -44,7 +44,7 @@ static VALUE
|
|
44
44
|
return <%=c_func%>_self(self, other);
|
45
45
|
<% else %>
|
46
46
|
VALUE klass, v;
|
47
|
-
klass = na_upcast(
|
47
|
+
klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
|
48
48
|
if (klass==cT) {
|
49
49
|
return <%=c_func%>_self(self, other);
|
50
50
|
} else {
|
@@ -15,7 +15,7 @@ static dtype
|
|
15
15
|
if (na->size != 1) {
|
16
16
|
rb_raise(nary_eShapeError,"narray size should be 1");
|
17
17
|
}
|
18
|
-
klass =
|
18
|
+
klass = rb_obj_class(obj);
|
19
19
|
ptr = na_get_pointer_for_read(obj);
|
20
20
|
pos = na_get_offset(obj);
|
21
21
|
<% find_tmpl("store").definitions.select{|x| x.class==Store}.each do |x| %>
|
@@ -27,14 +27,14 @@ static dtype
|
|
27
27
|
|
28
28
|
// coerce
|
29
29
|
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
|
30
|
-
if (
|
30
|
+
if (rb_obj_class(r)==cT) {
|
31
31
|
return <%=c_func%>(r);
|
32
32
|
}
|
33
33
|
<% if is_object %>
|
34
34
|
return obj;
|
35
35
|
<% else %>
|
36
36
|
rb_raise(nary_eCastError, "unknown conversion from %s to %s",
|
37
|
-
rb_class2name(
|
37
|
+
rb_class2name(rb_obj_class(obj)),
|
38
38
|
rb_class2name(cT));
|
39
39
|
<% end %>
|
40
40
|
}
|
@@ -47,7 +47,7 @@ static void
|
|
47
47
|
static VALUE
|
48
48
|
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
|
49
49
|
{
|
50
|
-
VALUE reduce;
|
50
|
+
VALUE v, reduce;
|
51
51
|
ndfunc_arg_in_t ain[2] = {{OVERWRITE,0},{sym_reduce,0}};
|
52
52
|
ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
|
53
53
|
ndfunc_t ndf = {0, NDF_HAS_LOOP|NDF_FLAT_REDUCE, 2,1, ain,aout};
|
@@ -60,5 +60,6 @@ static VALUE
|
|
60
60
|
ndf.func = <%=c_iter%>;
|
61
61
|
reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
|
62
62
|
<% end %>
|
63
|
-
|
63
|
+
v = na_ndloop(&ndf, 2, self, reduce);
|
64
|
+
return <%=type_name%>_extract(v);
|
64
65
|
}
|
@@ -67,7 +67,7 @@ static VALUE
|
|
67
67
|
return <%=c_func%>_self(self,other);
|
68
68
|
<% else %>
|
69
69
|
VALUE klass, v;
|
70
|
-
klass = na_upcast(
|
70
|
+
klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
|
71
71
|
if (klass==cT) {
|
72
72
|
return <%=c_func%>_self(self,other);
|
73
73
|
} else {
|
@@ -53,12 +53,52 @@
|
|
53
53
|
#define QSORT_INCL
|
54
54
|
#define Min(x, y) ((x) < (y) ? (x) : (y))
|
55
55
|
|
56
|
-
|
57
|
-
|
56
|
+
/*
|
57
|
+
* Qsort routine based on J. L. Bentley and M. D. McIlroy,
|
58
|
+
* "Engineering a sort function",
|
59
|
+
* Software--Practice and Experience 23 (1993) 1249-1265.
|
60
|
+
* We have modified their original by adding a check for already-sorted input,
|
61
|
+
* which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
|
62
|
+
*/
|
63
|
+
#define swapcode(TYPE, parmi, parmj, n) \
|
64
|
+
do { \
|
65
|
+
size_t i = (n) / sizeof (TYPE); \
|
66
|
+
TYPE *pi = (TYPE *)(void *)(parmi); \
|
67
|
+
TYPE *pj = (TYPE *)(void *)(parmj); \
|
68
|
+
do { \
|
69
|
+
TYPE t = *pi; \
|
70
|
+
*pi++ = *pj; \
|
71
|
+
*pj++ = t; \
|
72
|
+
} while (--i > 0); \
|
73
|
+
} while (0)
|
74
|
+
|
75
|
+
#define SWAPINIT(a, es) swaptype = ((char *)(a) - (char *)0) % sizeof(long) || \
|
76
|
+
(es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1;
|
77
|
+
|
78
|
+
static inline void
|
79
|
+
swapfunc(a, b, n, swaptype)
|
80
|
+
char *a,
|
81
|
+
*b;
|
82
|
+
size_t n;
|
83
|
+
int swaptype;
|
84
|
+
{
|
85
|
+
if (swaptype <= 1)
|
86
|
+
swapcode(long, a, b, n);
|
87
|
+
else
|
88
|
+
swapcode(char, a, b, n);
|
89
|
+
}
|
58
90
|
|
59
|
-
#define
|
91
|
+
#define swap(a, b) \
|
92
|
+
if (swaptype == 0) { \
|
93
|
+
long t = *(long *)(void *)(a); \
|
94
|
+
*(long *)(void *)(a) = *(long *)(void *)(b); \
|
95
|
+
*(long *)(void *)(b) = t; \
|
96
|
+
} else \
|
97
|
+
swapfunc(a, b, es, swaptype)
|
60
98
|
|
61
|
-
#define
|
99
|
+
#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype)
|
100
|
+
|
101
|
+
#define med3(a,b,c,_cmp) \
|
62
102
|
(cmpgt(b,a) ? \
|
63
103
|
(cmpgt(c,b) ? b : (cmpgt(c,a) ? c : a)) \
|
64
104
|
: (cmpgt(b,c) ? b : (cmpgt(c,a) ? a : c)))
|
@@ -76,75 +116,97 @@
|
|
76
116
|
<% end %>
|
77
117
|
<% c_func(:nodef)%>
|
78
118
|
|
79
|
-
void
|
119
|
+
static void
|
80
120
|
<%=type_name%>_qsort<%=suffix%>(void *a, size_t n, ssize_t es)
|
81
121
|
{
|
82
|
-
char
|
83
|
-
|
122
|
+
char *pa,
|
123
|
+
*pb,
|
124
|
+
*pc,
|
125
|
+
*pd,
|
126
|
+
*pl,
|
127
|
+
*pm,
|
128
|
+
*pn;
|
129
|
+
int d,
|
130
|
+
r,
|
131
|
+
swaptype,
|
132
|
+
presorted;
|
84
133
|
|
85
|
-
loop:
|
86
|
-
if (n < 7)
|
87
|
-
|
88
|
-
for (
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
134
|
+
loop:SWAPINIT(a, es);
|
135
|
+
if (n < 7)
|
136
|
+
{
|
137
|
+
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
138
|
+
for (pl = pm; pl > (char *) a && cmpgt(pl - es, pl);
|
139
|
+
pl -= es)
|
140
|
+
swap(pl, pl - es);
|
141
|
+
return;
|
142
|
+
}
|
93
143
|
presorted = 1;
|
94
|
-
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
95
|
-
|
96
|
-
|
97
|
-
|
144
|
+
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
145
|
+
{
|
146
|
+
if (cmpgt(pm - es, pm))
|
147
|
+
{
|
148
|
+
presorted = 0;
|
149
|
+
break;
|
150
|
+
}
|
98
151
|
}
|
99
|
-
}
|
100
152
|
if (presorted)
|
101
153
|
return;
|
102
154
|
pm = (char *) a + (n / 2) * es;
|
103
|
-
if (n > 7)
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
155
|
+
if (n > 7)
|
156
|
+
{
|
157
|
+
pl = (char *) a;
|
158
|
+
pn = (char *) a + (n - 1) * es;
|
159
|
+
if (n > 40)
|
160
|
+
{
|
161
|
+
d = (n / 8) * es;
|
162
|
+
pl = med3(pl, pl + d, pl + 2 * d, cmp);
|
163
|
+
pm = med3(pm - d, pm, pm + d, cmp);
|
164
|
+
pn = med3(pn - 2 * d, pn - d, pn, cmp);
|
165
|
+
}
|
166
|
+
pm = med3(pl, pm, pn, cmp);
|
111
167
|
}
|
112
|
-
|
113
|
-
}
|
114
|
-
swap(qsort_dtype, a, pm);
|
168
|
+
swap(a, pm);
|
115
169
|
pa = pb = (char *) a + es;
|
116
170
|
pc = pd = (char *) a + (n - 1) * es;
|
117
|
-
for (;;)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
171
|
+
for (;;)
|
172
|
+
{
|
173
|
+
while (pb <= pc && (r = cmp(pb, a)) <= 0)
|
174
|
+
{
|
175
|
+
if (r == 0)
|
176
|
+
{
|
177
|
+
swap(pa, pb);
|
178
|
+
pa += es;
|
179
|
+
}
|
180
|
+
pb += es;
|
181
|
+
}
|
182
|
+
while (pb <= pc && (r = cmp(pc, a)) >= 0)
|
183
|
+
{
|
184
|
+
if (r == 0)
|
185
|
+
{
|
186
|
+
swap(pc, pd);
|
187
|
+
pd -= es;
|
188
|
+
}
|
189
|
+
pc -= es;
|
190
|
+
}
|
191
|
+
if (pb > pc)
|
192
|
+
break;
|
193
|
+
swap(pb, pc);
|
123
194
|
pb += es;
|
124
|
-
}
|
125
|
-
while (pb <= pc && (r = cmp(pc, a)) >= 0) {
|
126
|
-
if (r == 0) {
|
127
|
-
swap(qsort_dtype, pc, pd);
|
128
|
-
pd -= es;
|
129
|
-
}
|
130
195
|
pc -= es;
|
131
196
|
}
|
132
|
-
if (pb > pc)
|
133
|
-
break;
|
134
|
-
swap(qsort_dtype, pb, pc);
|
135
|
-
pb += es;
|
136
|
-
pc -= es;
|
137
|
-
}
|
138
197
|
pn = (char *) a + n * es;
|
139
198
|
r = Min(pa - (char *) a, pb - pa);
|
140
|
-
vecswap(
|
199
|
+
vecswap(a, pb - r, r);
|
141
200
|
r = Min(pd - pc, pn - pd - es);
|
142
|
-
vecswap(
|
201
|
+
vecswap(pb, pn - r, r);
|
143
202
|
if ((r = pb - pa) > es)
|
144
203
|
<%=type_name%>_qsort<%=suffix%>(a, r / es, es);
|
145
|
-
if ((r = pd - pc) > es)
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
204
|
+
if ((r = pd - pc) > es)
|
205
|
+
{
|
206
|
+
/* Iterate rather than recurse to save stack space */
|
207
|
+
a = pn - r;
|
208
|
+
n = r / es;
|
209
|
+
goto loop;
|
210
|
+
}
|
211
|
+
/* qsort(pn - r, r / es, es, cmp);*/
|
150
212
|
}
|
@@ -13,7 +13,7 @@ static VALUE
|
|
13
13
|
{
|
14
14
|
VALUE r, klass;
|
15
15
|
|
16
|
-
klass =
|
16
|
+
klass = rb_obj_class(obj);
|
17
17
|
|
18
18
|
<% definitions.each do |x| %>
|
19
19
|
if (<%=x.condition("klass")%>) {
|
@@ -24,7 +24,7 @@ static VALUE
|
|
24
24
|
|
25
25
|
if (IsNArray(obj)) {
|
26
26
|
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
|
27
|
-
if (
|
27
|
+
if (rb_obj_class(r)==cT) {
|
28
28
|
<%=c_func%>(self,r);
|
29
29
|
return self;
|
30
30
|
}
|
@@ -34,8 +34,8 @@ static VALUE
|
|
34
34
|
robject_store_numeric(self,obj);
|
35
35
|
<% else %>
|
36
36
|
rb_raise(nary_eCastError, "unknown conversion from %s to %s",
|
37
|
-
rb_class2name(
|
38
|
-
rb_class2name(
|
37
|
+
rb_class2name(rb_obj_class(obj)),
|
38
|
+
rb_class2name(rb_obj_class(self)));
|
39
39
|
<% end %>
|
40
40
|
return self;
|
41
41
|
}
|
@@ -16,13 +16,13 @@ static void
|
|
16
16
|
if (idx1) {
|
17
17
|
for (; i--;) {
|
18
18
|
LOAD_BIT(a2, p2+*idx2, x); idx2++;
|
19
|
-
y =
|
19
|
+
y = m_from_sint(x);
|
20
20
|
SET_DATA_INDEX(p1,idx1,dtype,y);
|
21
21
|
}
|
22
22
|
} else {
|
23
23
|
for (; i--;) {
|
24
24
|
LOAD_BIT(a2, p2+*idx2, x); idx2++;
|
25
|
-
y =
|
25
|
+
y = m_from_sint(x);
|
26
26
|
SET_DATA_STRIDE(p1,s1,dtype,y);
|
27
27
|
}
|
28
28
|
}
|
@@ -30,13 +30,13 @@ static void
|
|
30
30
|
if (idx1) {
|
31
31
|
for (; i--;) {
|
32
32
|
LOAD_BIT(a2, p2, x); p2 += s2;
|
33
|
-
y =
|
33
|
+
y = m_from_sint(x);
|
34
34
|
SET_DATA_INDEX(p1,idx1,dtype,y);
|
35
35
|
}
|
36
36
|
} else {
|
37
37
|
for (; i--;) {
|
38
38
|
LOAD_BIT(a2, p2, x); p2 += s2;
|
39
|
-
y =
|
39
|
+
y = m_from_sint(x);
|
40
40
|
SET_DATA_STRIDE(p1,s1,dtype,y);
|
41
41
|
}
|
42
42
|
}
|
@@ -1,23 +1,36 @@
|
|
1
1
|
static void
|
2
2
|
<%=c_iter%>(na_loop_t *const lp)
|
3
3
|
{
|
4
|
-
size_t i;
|
4
|
+
size_t i=0, n;
|
5
5
|
char *p1, *p2;
|
6
6
|
ssize_t s1, s2;
|
7
7
|
size_t *idx1, *idx2;
|
8
8
|
dtype x;
|
9
|
-
|
9
|
+
|
10
|
+
<% if is_simd and !is_complex and %w[sqrt].include? name %>
|
11
|
+
size_t cnt;
|
12
|
+
size_t cnt_simd_loop = -1;
|
13
|
+
<% if is_double_precision %>
|
14
|
+
__m128d a;
|
15
|
+
<% else %>
|
16
|
+
__m128 a;
|
17
|
+
<% end %>
|
18
|
+
size_t num_pack; // Number of elements packed for SIMD.
|
19
|
+
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
20
|
+
<% end %>
|
21
|
+
INIT_COUNTER(lp, n);
|
10
22
|
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
11
23
|
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
24
|
+
|
12
25
|
if (idx1) {
|
13
26
|
if (idx2) {
|
14
|
-
for (; i
|
27
|
+
for (i=0; i<n; i++) {
|
15
28
|
GET_DATA_INDEX(p1,idx1,dtype,x);
|
16
29
|
x = m_<%=name%>(x);
|
17
30
|
SET_DATA_INDEX(p2,idx2,dtype,x);
|
18
31
|
}
|
19
32
|
} else {
|
20
|
-
for (; i
|
33
|
+
for (i=0; i<n; i++) {
|
21
34
|
GET_DATA_INDEX(p1,idx1,dtype,x);
|
22
35
|
x = m_<%=name%>(x);
|
23
36
|
SET_DATA_STRIDE(p2,s2,dtype,x);
|
@@ -25,17 +38,50 @@ static void
|
|
25
38
|
}
|
26
39
|
} else {
|
27
40
|
if (idx2) {
|
28
|
-
for (; i
|
41
|
+
for (i=0; i<n; i++) {
|
29
42
|
GET_DATA_STRIDE(p1,s1,dtype,x);
|
30
43
|
x = m_<%=name%>(x);
|
31
44
|
SET_DATA_INDEX(p2,idx2,dtype,x);
|
32
45
|
}
|
33
46
|
} else {
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
47
|
+
<% if is_simd and !is_complex and %w[sqrt].include? name %>
|
48
|
+
// Check number of elements. & Check same alignment.
|
49
|
+
if ((n >= num_pack) && is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p2)[i], SIMD_ALIGNMENT_SIZE)){
|
50
|
+
// Calculate up to the position just before the start of SIMD computation.
|
51
|
+
cnt = get_count_of_elements_not_aligned_to_simd_size(&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype));
|
52
|
+
for (i=0; i < cnt; i++) {
|
53
|
+
((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
|
54
|
+
}
|
55
|
+
|
56
|
+
// Get the count of SIMD computation loops.
|
57
|
+
cnt_simd_loop = (n - i) % num_pack;
|
58
|
+
|
59
|
+
// SIMD computation.
|
60
|
+
if (p1 == p2) { // inplace case
|
61
|
+
for(; i < n - cnt_simd_loop; i += num_pack){
|
62
|
+
a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
|
63
|
+
a = _mm_<%=name%>_<%=simd_type%>(a);
|
64
|
+
_mm_store_<%=simd_type%>(&((dtype*)p1)[i], a);
|
65
|
+
}
|
66
|
+
} else {
|
67
|
+
for(; i < n - cnt_simd_loop; i += num_pack){
|
68
|
+
a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
|
69
|
+
a = _mm_<%=name%>_<%=simd_type%>(a);
|
70
|
+
_mm_stream_<%=simd_type%>(&((dtype*)p2)[i], a);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
}
|
75
|
+
// Compute the remainder of the SIMD operation.
|
76
|
+
if (cnt_simd_loop != 0){
|
77
|
+
<% end %>
|
78
|
+
for (; i<n; i++) {
|
79
|
+
((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
|
80
|
+
}
|
81
|
+
<% if is_simd and !is_complex and %w[sqrt].include? name %>
|
38
82
|
}
|
83
|
+
<% end %>
|
84
|
+
return;
|
39
85
|
}
|
40
86
|
}
|
41
87
|
}
|