numo-narray 0.9.1.2 → 0.9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +7 -1
- data/ext/numo/narray/array.c +6 -6
- data/ext/numo/narray/data.c +8 -8
- data/ext/numo/narray/depend.erb +4 -4
- data/ext/numo/narray/extconf.rb +2 -2
- data/ext/numo/narray/gen/cogen.rb +13 -0
- data/ext/numo/narray/gen/def/dfloat.rb +1 -0
- data/ext/numo/narray/gen/def/sfloat.rb +1 -0
- data/ext/numo/narray/gen/narray_def.rb +14 -2
- data/ext/numo/narray/gen/spec.rb +26 -10
- data/ext/numo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/numo/narray/gen/tmpl/accum_index.c +11 -1
- data/ext/numo/narray/gen/tmpl/alloc_func.c +3 -3
- data/ext/numo/narray/gen/tmpl/binary.c +149 -10
- data/ext/numo/narray/gen/tmpl/binary2.c +1 -1
- data/ext/numo/narray/gen/tmpl/bincount.c +1 -1
- data/ext/numo/narray/gen/tmpl/cast.c +1 -1
- data/ext/numo/narray/gen/tmpl/cond_binary.c +1 -1
- data/ext/numo/narray/gen/tmpl/each.c +1 -1
- data/ext/numo/narray/gen/tmpl/each_with_index.c +1 -1
- data/ext/numo/narray/gen/tmpl/extract_data.c +3 -3
- data/ext/numo/narray/gen/tmpl/inspect.c +1 -1
- data/ext/numo/narray/gen/tmpl/lib.c +5 -0
- data/ext/numo/narray/gen/tmpl/map_with_index.c +1 -1
- data/ext/numo/narray/gen/tmpl/median.c +3 -2
- data/ext/numo/narray/gen/tmpl/pow.c +1 -1
- data/ext/numo/narray/gen/tmpl/qsort.c +118 -56
- data/ext/numo/narray/gen/tmpl/store.c +4 -4
- data/ext/numo/narray/gen/tmpl/store_bit.c +4 -4
- data/ext/numo/narray/gen/tmpl/to_a.c +1 -1
- data/ext/numo/narray/gen/tmpl/unary_s.c +55 -9
- data/ext/numo/narray/gen/tmpl_bit/each.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/each_with_index.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/inspect.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/mask.c +1 -1
- data/ext/numo/narray/gen/tmpl_bit/to_a.c +1 -1
- data/ext/numo/narray/index.c +64 -37
- data/ext/numo/narray/math.c +4 -4
- data/ext/numo/narray/narray.c +54 -29
- data/ext/numo/narray/ndloop.c +7 -7
- data/ext/numo/narray/numo/narray.h +9 -2
- data/ext/numo/narray/numo/template.h +18 -0
- data/ext/numo/narray/numo/types/bit.h +5 -0
- data/ext/numo/narray/numo/types/complex_macro.h +5 -0
- data/ext/numo/narray/numo/types/float_macro.h +5 -0
- data/ext/numo/narray/numo/types/int_macro.h +24 -0
- data/ext/numo/narray/numo/types/robj_macro.h +5 -0
- data/ext/numo/narray/numo/types/uint_macro.h +24 -0
- data/ext/numo/narray/numo/types/xint_macro.h +5 -25
- data/ext/numo/narray/rand.c +2 -29
- data/ext/numo/narray/step.c +1 -28
- data/ext/numo/narray/struct.c +26 -22
- data/lib/numo/narray/extra.rb +50 -1
- metadata +2 -2
@@ -48,7 +48,7 @@ static VALUE
|
|
48
48
|
return <%=c_func%>_self(self, other);
|
49
49
|
<% else %>
|
50
50
|
VALUE klass, v;
|
51
|
-
klass = na_upcast(
|
51
|
+
klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
|
52
52
|
if (klass==cT) {
|
53
53
|
return <%=c_func%>_self(self, other);
|
54
54
|
} else {
|
@@ -44,7 +44,7 @@ static VALUE
|
|
44
44
|
return <%=c_func%>_self(self, other);
|
45
45
|
<% else %>
|
46
46
|
VALUE klass, v;
|
47
|
-
klass = na_upcast(
|
47
|
+
klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
|
48
48
|
if (klass==cT) {
|
49
49
|
return <%=c_func%>_self(self, other);
|
50
50
|
} else {
|
@@ -15,7 +15,7 @@ static dtype
|
|
15
15
|
if (na->size != 1) {
|
16
16
|
rb_raise(nary_eShapeError,"narray size should be 1");
|
17
17
|
}
|
18
|
-
klass =
|
18
|
+
klass = rb_obj_class(obj);
|
19
19
|
ptr = na_get_pointer_for_read(obj);
|
20
20
|
pos = na_get_offset(obj);
|
21
21
|
<% find_tmpl("store").definitions.select{|x| x.class==Store}.each do |x| %>
|
@@ -27,14 +27,14 @@ static dtype
|
|
27
27
|
|
28
28
|
// coerce
|
29
29
|
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
|
30
|
-
if (
|
30
|
+
if (rb_obj_class(r)==cT) {
|
31
31
|
return <%=c_func%>(r);
|
32
32
|
}
|
33
33
|
<% if is_object %>
|
34
34
|
return obj;
|
35
35
|
<% else %>
|
36
36
|
rb_raise(nary_eCastError, "unknown conversion from %s to %s",
|
37
|
-
rb_class2name(
|
37
|
+
rb_class2name(rb_obj_class(obj)),
|
38
38
|
rb_class2name(cT));
|
39
39
|
<% end %>
|
40
40
|
}
|
@@ -47,7 +47,7 @@ static void
|
|
47
47
|
static VALUE
|
48
48
|
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
|
49
49
|
{
|
50
|
-
VALUE reduce;
|
50
|
+
VALUE v, reduce;
|
51
51
|
ndfunc_arg_in_t ain[2] = {{OVERWRITE,0},{sym_reduce,0}};
|
52
52
|
ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
|
53
53
|
ndfunc_t ndf = {0, NDF_HAS_LOOP|NDF_FLAT_REDUCE, 2,1, ain,aout};
|
@@ -60,5 +60,6 @@ static VALUE
|
|
60
60
|
ndf.func = <%=c_iter%>;
|
61
61
|
reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
|
62
62
|
<% end %>
|
63
|
-
|
63
|
+
v = na_ndloop(&ndf, 2, self, reduce);
|
64
|
+
return <%=type_name%>_extract(v);
|
64
65
|
}
|
@@ -67,7 +67,7 @@ static VALUE
|
|
67
67
|
return <%=c_func%>_self(self,other);
|
68
68
|
<% else %>
|
69
69
|
VALUE klass, v;
|
70
|
-
klass = na_upcast(
|
70
|
+
klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
|
71
71
|
if (klass==cT) {
|
72
72
|
return <%=c_func%>_self(self,other);
|
73
73
|
} else {
|
@@ -53,12 +53,52 @@
|
|
53
53
|
#define QSORT_INCL
|
54
54
|
#define Min(x, y) ((x) < (y) ? (x) : (y))
|
55
55
|
|
56
|
-
|
57
|
-
|
56
|
+
/*
|
57
|
+
* Qsort routine based on J. L. Bentley and M. D. McIlroy,
|
58
|
+
* "Engineering a sort function",
|
59
|
+
* Software--Practice and Experience 23 (1993) 1249-1265.
|
60
|
+
* We have modified their original by adding a check for already-sorted input,
|
61
|
+
* which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
|
62
|
+
*/
|
63
|
+
#define swapcode(TYPE, parmi, parmj, n) \
|
64
|
+
do { \
|
65
|
+
size_t i = (n) / sizeof (TYPE); \
|
66
|
+
TYPE *pi = (TYPE *)(void *)(parmi); \
|
67
|
+
TYPE *pj = (TYPE *)(void *)(parmj); \
|
68
|
+
do { \
|
69
|
+
TYPE t = *pi; \
|
70
|
+
*pi++ = *pj; \
|
71
|
+
*pj++ = t; \
|
72
|
+
} while (--i > 0); \
|
73
|
+
} while (0)
|
74
|
+
|
75
|
+
#define SWAPINIT(a, es) swaptype = ((char *)(a) - (char *)0) % sizeof(long) || \
|
76
|
+
(es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1;
|
77
|
+
|
78
|
+
static inline void
|
79
|
+
swapfunc(a, b, n, swaptype)
|
80
|
+
char *a,
|
81
|
+
*b;
|
82
|
+
size_t n;
|
83
|
+
int swaptype;
|
84
|
+
{
|
85
|
+
if (swaptype <= 1)
|
86
|
+
swapcode(long, a, b, n);
|
87
|
+
else
|
88
|
+
swapcode(char, a, b, n);
|
89
|
+
}
|
58
90
|
|
59
|
-
#define
|
91
|
+
#define swap(a, b) \
|
92
|
+
if (swaptype == 0) { \
|
93
|
+
long t = *(long *)(void *)(a); \
|
94
|
+
*(long *)(void *)(a) = *(long *)(void *)(b); \
|
95
|
+
*(long *)(void *)(b) = t; \
|
96
|
+
} else \
|
97
|
+
swapfunc(a, b, es, swaptype)
|
60
98
|
|
61
|
-
#define
|
99
|
+
#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype)
|
100
|
+
|
101
|
+
#define med3(a,b,c,_cmp) \
|
62
102
|
(cmpgt(b,a) ? \
|
63
103
|
(cmpgt(c,b) ? b : (cmpgt(c,a) ? c : a)) \
|
64
104
|
: (cmpgt(b,c) ? b : (cmpgt(c,a) ? a : c)))
|
@@ -76,75 +116,97 @@
|
|
76
116
|
<% end %>
|
77
117
|
<% c_func(:nodef)%>
|
78
118
|
|
79
|
-
void
|
119
|
+
static void
|
80
120
|
<%=type_name%>_qsort<%=suffix%>(void *a, size_t n, ssize_t es)
|
81
121
|
{
|
82
|
-
char
|
83
|
-
|
122
|
+
char *pa,
|
123
|
+
*pb,
|
124
|
+
*pc,
|
125
|
+
*pd,
|
126
|
+
*pl,
|
127
|
+
*pm,
|
128
|
+
*pn;
|
129
|
+
int d,
|
130
|
+
r,
|
131
|
+
swaptype,
|
132
|
+
presorted;
|
84
133
|
|
85
|
-
loop:
|
86
|
-
if (n < 7)
|
87
|
-
|
88
|
-
for (
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
134
|
+
loop:SWAPINIT(a, es);
|
135
|
+
if (n < 7)
|
136
|
+
{
|
137
|
+
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
138
|
+
for (pl = pm; pl > (char *) a && cmpgt(pl - es, pl);
|
139
|
+
pl -= es)
|
140
|
+
swap(pl, pl - es);
|
141
|
+
return;
|
142
|
+
}
|
93
143
|
presorted = 1;
|
94
|
-
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
95
|
-
|
96
|
-
|
97
|
-
|
144
|
+
for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
|
145
|
+
{
|
146
|
+
if (cmpgt(pm - es, pm))
|
147
|
+
{
|
148
|
+
presorted = 0;
|
149
|
+
break;
|
150
|
+
}
|
98
151
|
}
|
99
|
-
}
|
100
152
|
if (presorted)
|
101
153
|
return;
|
102
154
|
pm = (char *) a + (n / 2) * es;
|
103
|
-
if (n > 7)
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
155
|
+
if (n > 7)
|
156
|
+
{
|
157
|
+
pl = (char *) a;
|
158
|
+
pn = (char *) a + (n - 1) * es;
|
159
|
+
if (n > 40)
|
160
|
+
{
|
161
|
+
d = (n / 8) * es;
|
162
|
+
pl = med3(pl, pl + d, pl + 2 * d, cmp);
|
163
|
+
pm = med3(pm - d, pm, pm + d, cmp);
|
164
|
+
pn = med3(pn - 2 * d, pn - d, pn, cmp);
|
165
|
+
}
|
166
|
+
pm = med3(pl, pm, pn, cmp);
|
111
167
|
}
|
112
|
-
|
113
|
-
}
|
114
|
-
swap(qsort_dtype, a, pm);
|
168
|
+
swap(a, pm);
|
115
169
|
pa = pb = (char *) a + es;
|
116
170
|
pc = pd = (char *) a + (n - 1) * es;
|
117
|
-
for (;;)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
171
|
+
for (;;)
|
172
|
+
{
|
173
|
+
while (pb <= pc && (r = cmp(pb, a)) <= 0)
|
174
|
+
{
|
175
|
+
if (r == 0)
|
176
|
+
{
|
177
|
+
swap(pa, pb);
|
178
|
+
pa += es;
|
179
|
+
}
|
180
|
+
pb += es;
|
181
|
+
}
|
182
|
+
while (pb <= pc && (r = cmp(pc, a)) >= 0)
|
183
|
+
{
|
184
|
+
if (r == 0)
|
185
|
+
{
|
186
|
+
swap(pc, pd);
|
187
|
+
pd -= es;
|
188
|
+
}
|
189
|
+
pc -= es;
|
190
|
+
}
|
191
|
+
if (pb > pc)
|
192
|
+
break;
|
193
|
+
swap(pb, pc);
|
123
194
|
pb += es;
|
124
|
-
}
|
125
|
-
while (pb <= pc && (r = cmp(pc, a)) >= 0) {
|
126
|
-
if (r == 0) {
|
127
|
-
swap(qsort_dtype, pc, pd);
|
128
|
-
pd -= es;
|
129
|
-
}
|
130
195
|
pc -= es;
|
131
196
|
}
|
132
|
-
if (pb > pc)
|
133
|
-
break;
|
134
|
-
swap(qsort_dtype, pb, pc);
|
135
|
-
pb += es;
|
136
|
-
pc -= es;
|
137
|
-
}
|
138
197
|
pn = (char *) a + n * es;
|
139
198
|
r = Min(pa - (char *) a, pb - pa);
|
140
|
-
vecswap(
|
199
|
+
vecswap(a, pb - r, r);
|
141
200
|
r = Min(pd - pc, pn - pd - es);
|
142
|
-
vecswap(
|
201
|
+
vecswap(pb, pn - r, r);
|
143
202
|
if ((r = pb - pa) > es)
|
144
203
|
<%=type_name%>_qsort<%=suffix%>(a, r / es, es);
|
145
|
-
if ((r = pd - pc) > es)
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
204
|
+
if ((r = pd - pc) > es)
|
205
|
+
{
|
206
|
+
/* Iterate rather than recurse to save stack space */
|
207
|
+
a = pn - r;
|
208
|
+
n = r / es;
|
209
|
+
goto loop;
|
210
|
+
}
|
211
|
+
/* qsort(pn - r, r / es, es, cmp);*/
|
150
212
|
}
|
@@ -13,7 +13,7 @@ static VALUE
|
|
13
13
|
{
|
14
14
|
VALUE r, klass;
|
15
15
|
|
16
|
-
klass =
|
16
|
+
klass = rb_obj_class(obj);
|
17
17
|
|
18
18
|
<% definitions.each do |x| %>
|
19
19
|
if (<%=x.condition("klass")%>) {
|
@@ -24,7 +24,7 @@ static VALUE
|
|
24
24
|
|
25
25
|
if (IsNArray(obj)) {
|
26
26
|
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
|
27
|
-
if (
|
27
|
+
if (rb_obj_class(r)==cT) {
|
28
28
|
<%=c_func%>(self,r);
|
29
29
|
return self;
|
30
30
|
}
|
@@ -34,8 +34,8 @@ static VALUE
|
|
34
34
|
robject_store_numeric(self,obj);
|
35
35
|
<% else %>
|
36
36
|
rb_raise(nary_eCastError, "unknown conversion from %s to %s",
|
37
|
-
rb_class2name(
|
38
|
-
rb_class2name(
|
37
|
+
rb_class2name(rb_obj_class(obj)),
|
38
|
+
rb_class2name(rb_obj_class(self)));
|
39
39
|
<% end %>
|
40
40
|
return self;
|
41
41
|
}
|
@@ -16,13 +16,13 @@ static void
|
|
16
16
|
if (idx1) {
|
17
17
|
for (; i--;) {
|
18
18
|
LOAD_BIT(a2, p2+*idx2, x); idx2++;
|
19
|
-
y =
|
19
|
+
y = m_from_sint(x);
|
20
20
|
SET_DATA_INDEX(p1,idx1,dtype,y);
|
21
21
|
}
|
22
22
|
} else {
|
23
23
|
for (; i--;) {
|
24
24
|
LOAD_BIT(a2, p2+*idx2, x); idx2++;
|
25
|
-
y =
|
25
|
+
y = m_from_sint(x);
|
26
26
|
SET_DATA_STRIDE(p1,s1,dtype,y);
|
27
27
|
}
|
28
28
|
}
|
@@ -30,13 +30,13 @@ static void
|
|
30
30
|
if (idx1) {
|
31
31
|
for (; i--;) {
|
32
32
|
LOAD_BIT(a2, p2, x); p2 += s2;
|
33
|
-
y =
|
33
|
+
y = m_from_sint(x);
|
34
34
|
SET_DATA_INDEX(p1,idx1,dtype,y);
|
35
35
|
}
|
36
36
|
} else {
|
37
37
|
for (; i--;) {
|
38
38
|
LOAD_BIT(a2, p2, x); p2 += s2;
|
39
|
-
y =
|
39
|
+
y = m_from_sint(x);
|
40
40
|
SET_DATA_STRIDE(p1,s1,dtype,y);
|
41
41
|
}
|
42
42
|
}
|
@@ -1,23 +1,36 @@
|
|
1
1
|
static void
|
2
2
|
<%=c_iter%>(na_loop_t *const lp)
|
3
3
|
{
|
4
|
-
size_t i;
|
4
|
+
size_t i=0, n;
|
5
5
|
char *p1, *p2;
|
6
6
|
ssize_t s1, s2;
|
7
7
|
size_t *idx1, *idx2;
|
8
8
|
dtype x;
|
9
|
-
|
9
|
+
|
10
|
+
<% if is_simd and !is_complex and %w[sqrt].include? name %>
|
11
|
+
size_t cnt;
|
12
|
+
size_t cnt_simd_loop = -1;
|
13
|
+
<% if is_double_precision %>
|
14
|
+
__m128d a;
|
15
|
+
<% else %>
|
16
|
+
__m128 a;
|
17
|
+
<% end %>
|
18
|
+
size_t num_pack; // Number of elements packed for SIMD.
|
19
|
+
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
20
|
+
<% end %>
|
21
|
+
INIT_COUNTER(lp, n);
|
10
22
|
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
11
23
|
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
24
|
+
|
12
25
|
if (idx1) {
|
13
26
|
if (idx2) {
|
14
|
-
for (; i
|
27
|
+
for (i=0; i<n; i++) {
|
15
28
|
GET_DATA_INDEX(p1,idx1,dtype,x);
|
16
29
|
x = m_<%=name%>(x);
|
17
30
|
SET_DATA_INDEX(p2,idx2,dtype,x);
|
18
31
|
}
|
19
32
|
} else {
|
20
|
-
for (; i
|
33
|
+
for (i=0; i<n; i++) {
|
21
34
|
GET_DATA_INDEX(p1,idx1,dtype,x);
|
22
35
|
x = m_<%=name%>(x);
|
23
36
|
SET_DATA_STRIDE(p2,s2,dtype,x);
|
@@ -25,17 +38,50 @@ static void
|
|
25
38
|
}
|
26
39
|
} else {
|
27
40
|
if (idx2) {
|
28
|
-
for (; i
|
41
|
+
for (i=0; i<n; i++) {
|
29
42
|
GET_DATA_STRIDE(p1,s1,dtype,x);
|
30
43
|
x = m_<%=name%>(x);
|
31
44
|
SET_DATA_INDEX(p2,idx2,dtype,x);
|
32
45
|
}
|
33
46
|
} else {
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
47
|
+
<% if is_simd and !is_complex and %w[sqrt].include? name %>
|
48
|
+
// Check number of elements. & Check same alignment.
|
49
|
+
if ((n >= num_pack) && is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p2)[i], SIMD_ALIGNMENT_SIZE)){
|
50
|
+
// Calculate up to the position just before the start of SIMD computation.
|
51
|
+
cnt = get_count_of_elements_not_aligned_to_simd_size(&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype));
|
52
|
+
for (i=0; i < cnt; i++) {
|
53
|
+
((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
|
54
|
+
}
|
55
|
+
|
56
|
+
// Get the count of SIMD computation loops.
|
57
|
+
cnt_simd_loop = (n - i) % num_pack;
|
58
|
+
|
59
|
+
// SIMD computation.
|
60
|
+
if (p1 == p2) { // inplace case
|
61
|
+
for(; i < n - cnt_simd_loop; i += num_pack){
|
62
|
+
a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
|
63
|
+
a = _mm_<%=name%>_<%=simd_type%>(a);
|
64
|
+
_mm_store_<%=simd_type%>(&((dtype*)p1)[i], a);
|
65
|
+
}
|
66
|
+
} else {
|
67
|
+
for(; i < n - cnt_simd_loop; i += num_pack){
|
68
|
+
a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
|
69
|
+
a = _mm_<%=name%>_<%=simd_type%>(a);
|
70
|
+
_mm_stream_<%=simd_type%>(&((dtype*)p2)[i], a);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
}
|
75
|
+
// Compute the remainder of the SIMD operation.
|
76
|
+
if (cnt_simd_loop != 0){
|
77
|
+
<% end %>
|
78
|
+
for (; i<n; i++) {
|
79
|
+
((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
|
80
|
+
}
|
81
|
+
<% if is_simd and !is_complex and %w[sqrt].include? name %>
|
38
82
|
}
|
83
|
+
<% end %>
|
84
|
+
return;
|
39
85
|
}
|
40
86
|
}
|
41
87
|
}
|