numo-narray-alt 0.9.11 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +7 -0
- data/ext/numo/narray/numo/narray.h +2 -2
- data/ext/numo/narray/numo/types/robj_macro.h +1 -1
- data/ext/numo/narray/src/mh/bincount.h +233 -0
- data/ext/numo/narray/src/mh/bit/and.h +225 -0
- data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
- data/ext/numo/narray/src/mh/bit/not.h +173 -0
- data/ext/numo/narray/src/mh/bit/or.h +225 -0
- data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
- data/ext/numo/narray/src/mh/bit/xor.h +225 -0
- data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
- data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
- data/ext/numo/narray/src/mh/comp/eq.h +26 -0
- data/ext/numo/narray/src/mh/comp/ge.h +26 -0
- data/ext/numo/narray/src/mh/comp/gt.h +26 -0
- data/ext/numo/narray/src/mh/comp/le.h +26 -0
- data/ext/numo/narray/src/mh/comp/lt.h +26 -0
- data/ext/numo/narray/src/mh/comp/ne.h +26 -0
- data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
- data/ext/numo/narray/src/mh/divmod.h +142 -0
- data/ext/numo/narray/src/mh/eye.h +1 -1
- data/ext/numo/narray/src/mh/fill.h +94 -0
- data/ext/numo/narray/src/mh/format.h +108 -0
- data/ext/numo/narray/src/mh/format_to_a.h +89 -0
- data/ext/numo/narray/src/mh/inspect.h +33 -0
- data/ext/numo/narray/src/mh/isfinite.h +42 -0
- data/ext/numo/narray/src/mh/isinf.h +42 -0
- data/ext/numo/narray/src/mh/isnan.h +42 -0
- data/ext/numo/narray/src/mh/isneginf.h +42 -0
- data/ext/numo/narray/src/mh/isposinf.h +42 -0
- data/ext/numo/narray/src/mh/math/acos.h +2 -2
- data/ext/numo/narray/src/mh/math/acosh.h +2 -2
- data/ext/numo/narray/src/mh/math/asin.h +2 -2
- data/ext/numo/narray/src/mh/math/asinh.h +2 -2
- data/ext/numo/narray/src/mh/math/atan.h +2 -2
- data/ext/numo/narray/src/mh/math/atan2.h +3 -3
- data/ext/numo/narray/src/mh/math/atanh.h +2 -2
- data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
- data/ext/numo/narray/src/mh/math/cos.h +2 -2
- data/ext/numo/narray/src/mh/math/cosh.h +2 -2
- data/ext/numo/narray/src/mh/math/erf.h +2 -2
- data/ext/numo/narray/src/mh/math/erfc.h +2 -2
- data/ext/numo/narray/src/mh/math/exp.h +2 -2
- data/ext/numo/narray/src/mh/math/exp10.h +2 -2
- data/ext/numo/narray/src/mh/math/exp2.h +2 -2
- data/ext/numo/narray/src/mh/math/expm1.h +2 -2
- data/ext/numo/narray/src/mh/math/frexp.h +3 -3
- data/ext/numo/narray/src/mh/math/hypot.h +3 -3
- data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
- data/ext/numo/narray/src/mh/math/log.h +2 -2
- data/ext/numo/narray/src/mh/math/log10.h +2 -2
- data/ext/numo/narray/src/mh/math/log1p.h +2 -2
- data/ext/numo/narray/src/mh/math/log2.h +2 -2
- data/ext/numo/narray/src/mh/math/sin.h +2 -2
- data/ext/numo/narray/src/mh/math/sinc.h +2 -2
- data/ext/numo/narray/src/mh/math/sinh.h +2 -2
- data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
- data/ext/numo/narray/src/mh/math/tan.h +2 -2
- data/ext/numo/narray/src/mh/math/tanh.h +2 -2
- data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
- data/ext/numo/narray/src/mh/minus.h +125 -0
- data/ext/numo/narray/src/mh/op/add.h +78 -0
- data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
- data/ext/numo/narray/src/mh/op/div.h +118 -0
- data/ext/numo/narray/src/mh/op/mod.h +108 -0
- data/ext/numo/narray/src/mh/op/mul.h +78 -0
- data/ext/numo/narray/src/mh/op/sub.h +78 -0
- data/ext/numo/narray/src/mh/pow.h +197 -0
- data/ext/numo/narray/src/mh/rand.h +2 -2
- data/ext/numo/narray/src/mh/rand_norm.h +125 -0
- data/ext/numo/narray/src/mh/reciprocal.h +125 -0
- data/ext/numo/narray/src/mh/round/ceil.h +11 -0
- data/ext/numo/narray/src/mh/round/floor.h +11 -0
- data/ext/numo/narray/src/mh/round/rint.h +9 -0
- data/ext/numo/narray/src/mh/round/round.h +11 -0
- data/ext/numo/narray/src/mh/round/trunc.h +11 -0
- data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
- data/ext/numo/narray/src/mh/sign.h +125 -0
- data/ext/numo/narray/src/mh/square.h +125 -0
- data/ext/numo/narray/src/mh/to_a.h +78 -0
- data/ext/numo/narray/src/t_bit.c +45 -234
- data/ext/numo/narray/src/t_dcomplex.c +457 -2075
- data/ext/numo/narray/src/t_dfloat.c +154 -2560
- data/ext/numo/narray/src/t_int16.c +408 -2542
- data/ext/numo/narray/src/t_int32.c +408 -2542
- data/ext/numo/narray/src/t_int64.c +408 -2542
- data/ext/numo/narray/src/t_int8.c +409 -2138
- data/ext/numo/narray/src/t_robject.c +376 -2161
- data/ext/numo/narray/src/t_scomplex.c +435 -2053
- data/ext/numo/narray/src/t_sfloat.c +149 -2557
- data/ext/numo/narray/src/t_uint16.c +407 -2537
- data/ext/numo/narray/src/t_uint32.c +407 -2537
- data/ext/numo/narray/src/t_uint64.c +407 -2537
- data/ext/numo/narray/src/t_uint8.c +407 -2132
- metadata +47 -2
|
@@ -42,7 +42,41 @@ static ID id_to_a;
|
|
|
42
42
|
VALUE cT;
|
|
43
43
|
extern VALUE cRT;
|
|
44
44
|
|
|
45
|
+
#include "mh/coerce_cast.h"
|
|
46
|
+
#include "mh/to_a.h"
|
|
47
|
+
#include "mh/fill.h"
|
|
48
|
+
#include "mh/format.h"
|
|
49
|
+
#include "mh/format_to_a.h"
|
|
50
|
+
#include "mh/inspect.h"
|
|
51
|
+
#include "mh/op/add.h"
|
|
52
|
+
#include "mh/op/sub.h"
|
|
53
|
+
#include "mh/op/mul.h"
|
|
54
|
+
#include "mh/op/div.h"
|
|
55
|
+
#include "mh/op/mod.h"
|
|
56
|
+
#include "mh/divmod.h"
|
|
57
|
+
#include "mh/pow.h"
|
|
58
|
+
#include "mh/minus.h"
|
|
59
|
+
#include "mh/reciprocal.h"
|
|
60
|
+
#include "mh/sign.h"
|
|
61
|
+
#include "mh/square.h"
|
|
62
|
+
#include "mh/round/floor.h"
|
|
63
|
+
#include "mh/round/round.h"
|
|
64
|
+
#include "mh/round/ceil.h"
|
|
65
|
+
#include "mh/round/trunc.h"
|
|
66
|
+
#include "mh/round/rint.h"
|
|
67
|
+
#include "mh/comp/eq.h"
|
|
68
|
+
#include "mh/comp/ne.h"
|
|
69
|
+
#include "mh/comp/nearly_eq.h"
|
|
70
|
+
#include "mh/comp/gt.h"
|
|
71
|
+
#include "mh/comp/ge.h"
|
|
72
|
+
#include "mh/comp/lt.h"
|
|
73
|
+
#include "mh/comp/le.h"
|
|
45
74
|
#include "mh/clip.h"
|
|
75
|
+
#include "mh/isnan.h"
|
|
76
|
+
#include "mh/isinf.h"
|
|
77
|
+
#include "mh/isposinf.h"
|
|
78
|
+
#include "mh/isneginf.h"
|
|
79
|
+
#include "mh/isfinite.h"
|
|
46
80
|
#include "mh/sum.h"
|
|
47
81
|
#include "mh/prod.h"
|
|
48
82
|
#include "mh/mean.h"
|
|
@@ -66,6 +100,7 @@ extern VALUE cRT;
|
|
|
66
100
|
#include "mh/logseq.h"
|
|
67
101
|
#include "mh/eye.h"
|
|
68
102
|
#include "mh/rand.h"
|
|
103
|
+
#include "mh/rand_norm.h"
|
|
69
104
|
#include "mh/math/sqrt.h"
|
|
70
105
|
#include "mh/math/cbrt.h"
|
|
71
106
|
#include "mh/math/log.h"
|
|
@@ -98,7 +133,48 @@ extern VALUE cRT;
|
|
|
98
133
|
|
|
99
134
|
typedef double dfloat; // Type aliases for shorter notation
|
|
100
135
|
// following the codebase naming convention.
|
|
136
|
+
DEF_NARRAY_COERCE_CAST_METHOD_FUNC(dfloat)
|
|
137
|
+
DEF_NARRAY_TO_A_METHOD_FUNC(dfloat)
|
|
138
|
+
DEF_NARRAY_FILL_METHOD_FUNC(dfloat)
|
|
139
|
+
DEF_NARRAY_FORMAT_METHOD_FUNC(dfloat)
|
|
140
|
+
DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(dfloat)
|
|
141
|
+
DEF_NARRAY_INSPECT_METHOD_FUNC(dfloat)
|
|
142
|
+
#ifdef __SSE2__
|
|
143
|
+
DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
|
|
144
|
+
DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
|
|
145
|
+
DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
|
|
146
|
+
DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
|
|
147
|
+
#else
|
|
148
|
+
DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
149
|
+
DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
150
|
+
DEF_NARRAY_MUL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
151
|
+
DEF_NARRAY_FLT_DIV_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
152
|
+
#endif
|
|
153
|
+
DEF_NARRAY_FLT_MOD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
154
|
+
DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
155
|
+
DEF_NARRAY_POW_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
156
|
+
DEF_NARRAY_MINUS_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
157
|
+
DEF_NARRAY_RECIPROCAL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
158
|
+
DEF_NARRAY_SIGN_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
159
|
+
DEF_NARRAY_SQUARE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
160
|
+
DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
161
|
+
DEF_NARRAY_FLT_ROUND_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
162
|
+
DEF_NARRAY_FLT_CEIL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
163
|
+
DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
164
|
+
DEF_NARRAY_FLT_RINT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
165
|
+
DEF_NARRAY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
166
|
+
DEF_NARRAY_NE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
167
|
+
DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
168
|
+
DEF_NARRAY_GT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
169
|
+
DEF_NARRAY_GE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
170
|
+
DEF_NARRAY_LT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
171
|
+
DEF_NARRAY_LE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
101
172
|
DEF_NARRAY_CLIP_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
173
|
+
DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
174
|
+
DEF_NARRAY_FLT_ISINF_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
175
|
+
DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
176
|
+
DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
177
|
+
DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
102
178
|
DEF_NARRAY_FLT_SUM_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
103
179
|
DEF_NARRAY_FLT_PROD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
104
180
|
DEF_NARRAY_FLT_MEAN_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
|
|
@@ -122,6 +198,7 @@ DEF_NARRAY_FLT_SEQ_METHOD_FUNC(dfloat)
|
|
|
122
198
|
DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(dfloat)
|
|
123
199
|
DEF_NARRAY_EYE_METHOD_FUNC(dfloat)
|
|
124
200
|
DEF_NARRAY_FLT_RAND_METHOD_FUNC(dfloat)
|
|
201
|
+
DEF_NARRAY_FLT_RAND_NORM_METHOD_FUNC(dfloat)
|
|
125
202
|
#ifdef __SSE2__
|
|
126
203
|
DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
127
204
|
#else
|
|
@@ -1275,171 +1352,6 @@ static VALUE dfloat_aset(int argc, VALUE* argv, VALUE self) {
|
|
|
1275
1352
|
return argv[argc];
|
|
1276
1353
|
}
|
|
1277
1354
|
|
|
1278
|
-
static VALUE dfloat_coerce_cast(VALUE self, VALUE type) {
|
|
1279
|
-
return Qnil;
|
|
1280
|
-
}
|
|
1281
|
-
|
|
1282
|
-
static void iter_dfloat_to_a(na_loop_t* const lp) {
|
|
1283
|
-
size_t i, s1;
|
|
1284
|
-
char* p1;
|
|
1285
|
-
size_t* idx1;
|
|
1286
|
-
dtype x;
|
|
1287
|
-
volatile VALUE a, y;
|
|
1288
|
-
|
|
1289
|
-
INIT_COUNTER(lp, i);
|
|
1290
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1291
|
-
a = rb_ary_new2(i);
|
|
1292
|
-
rb_ary_push(lp->args[1].value, a);
|
|
1293
|
-
if (idx1) {
|
|
1294
|
-
for (; i--;) {
|
|
1295
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1296
|
-
y = m_data_to_num(x);
|
|
1297
|
-
rb_ary_push(a, y);
|
|
1298
|
-
}
|
|
1299
|
-
} else {
|
|
1300
|
-
for (; i--;) {
|
|
1301
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1302
|
-
y = m_data_to_num(x);
|
|
1303
|
-
rb_ary_push(a, y);
|
|
1304
|
-
}
|
|
1305
|
-
}
|
|
1306
|
-
}
|
|
1307
|
-
|
|
1308
|
-
static VALUE dfloat_to_a(VALUE self) {
|
|
1309
|
-
ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
|
|
1310
|
-
ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
|
|
1311
|
-
ndfunc_t ndf = { iter_dfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
|
|
1312
|
-
return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
static void iter_dfloat_fill(na_loop_t* const lp) {
|
|
1316
|
-
size_t i;
|
|
1317
|
-
char* p1;
|
|
1318
|
-
ssize_t s1;
|
|
1319
|
-
size_t* idx1;
|
|
1320
|
-
VALUE x = lp->option;
|
|
1321
|
-
dtype y;
|
|
1322
|
-
INIT_COUNTER(lp, i);
|
|
1323
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1324
|
-
y = m_num_to_data(x);
|
|
1325
|
-
if (idx1) {
|
|
1326
|
-
for (; i--;) {
|
|
1327
|
-
SET_DATA_INDEX(p1, idx1, dtype, y);
|
|
1328
|
-
}
|
|
1329
|
-
} else {
|
|
1330
|
-
for (; i--;) {
|
|
1331
|
-
SET_DATA_STRIDE(p1, s1, dtype, y);
|
|
1332
|
-
}
|
|
1333
|
-
}
|
|
1334
|
-
}
|
|
1335
|
-
|
|
1336
|
-
static VALUE dfloat_fill(VALUE self, VALUE val) {
|
|
1337
|
-
ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
|
|
1338
|
-
ndfunc_t ndf = { iter_dfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
|
|
1339
|
-
|
|
1340
|
-
na_ndloop(&ndf, 2, self, val);
|
|
1341
|
-
return self;
|
|
1342
|
-
}
|
|
1343
|
-
|
|
1344
|
-
static VALUE format_dfloat(VALUE fmt, dtype* x) {
|
|
1345
|
-
// fix-me
|
|
1346
|
-
char s[48];
|
|
1347
|
-
int n;
|
|
1348
|
-
|
|
1349
|
-
if (NIL_P(fmt)) {
|
|
1350
|
-
n = m_sprintf(s, *x);
|
|
1351
|
-
return rb_str_new(s, n);
|
|
1352
|
-
}
|
|
1353
|
-
return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
static void iter_dfloat_format(na_loop_t* const lp) {
|
|
1357
|
-
size_t i;
|
|
1358
|
-
char *p1, *p2;
|
|
1359
|
-
ssize_t s1, s2;
|
|
1360
|
-
size_t* idx1;
|
|
1361
|
-
dtype* x;
|
|
1362
|
-
VALUE y;
|
|
1363
|
-
VALUE fmt = lp->option;
|
|
1364
|
-
INIT_COUNTER(lp, i);
|
|
1365
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1366
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
1367
|
-
if (idx1) {
|
|
1368
|
-
for (; i--;) {
|
|
1369
|
-
x = (dtype*)(p1 + *idx1);
|
|
1370
|
-
idx1++;
|
|
1371
|
-
y = format_dfloat(fmt, x);
|
|
1372
|
-
SET_DATA_STRIDE(p2, s2, VALUE, y);
|
|
1373
|
-
}
|
|
1374
|
-
} else {
|
|
1375
|
-
for (; i--;) {
|
|
1376
|
-
x = (dtype*)p1;
|
|
1377
|
-
p1 += s1;
|
|
1378
|
-
y = format_dfloat(fmt, x);
|
|
1379
|
-
SET_DATA_STRIDE(p2, s2, VALUE, y);
|
|
1380
|
-
}
|
|
1381
|
-
}
|
|
1382
|
-
}
|
|
1383
|
-
|
|
1384
|
-
static VALUE dfloat_format(int argc, VALUE* argv, VALUE self) {
|
|
1385
|
-
VALUE fmt = Qnil;
|
|
1386
|
-
|
|
1387
|
-
ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
|
|
1388
|
-
ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
|
|
1389
|
-
ndfunc_t ndf = { iter_dfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
|
|
1390
|
-
|
|
1391
|
-
rb_scan_args(argc, argv, "01", &fmt);
|
|
1392
|
-
return na_ndloop(&ndf, 2, self, fmt);
|
|
1393
|
-
}
|
|
1394
|
-
|
|
1395
|
-
static void iter_dfloat_format_to_a(na_loop_t* const lp) {
|
|
1396
|
-
size_t i;
|
|
1397
|
-
char* p1;
|
|
1398
|
-
ssize_t s1;
|
|
1399
|
-
size_t* idx1;
|
|
1400
|
-
dtype* x;
|
|
1401
|
-
VALUE y;
|
|
1402
|
-
volatile VALUE a;
|
|
1403
|
-
VALUE fmt = lp->option;
|
|
1404
|
-
INIT_COUNTER(lp, i);
|
|
1405
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1406
|
-
a = rb_ary_new2(i);
|
|
1407
|
-
rb_ary_push(lp->args[1].value, a);
|
|
1408
|
-
if (idx1) {
|
|
1409
|
-
for (; i--;) {
|
|
1410
|
-
x = (dtype*)(p1 + *idx1);
|
|
1411
|
-
idx1++;
|
|
1412
|
-
y = format_dfloat(fmt, x);
|
|
1413
|
-
rb_ary_push(a, y);
|
|
1414
|
-
}
|
|
1415
|
-
} else {
|
|
1416
|
-
for (; i--;) {
|
|
1417
|
-
x = (dtype*)p1;
|
|
1418
|
-
p1 += s1;
|
|
1419
|
-
y = format_dfloat(fmt, x);
|
|
1420
|
-
rb_ary_push(a, y);
|
|
1421
|
-
}
|
|
1422
|
-
}
|
|
1423
|
-
}
|
|
1424
|
-
|
|
1425
|
-
static VALUE dfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
|
|
1426
|
-
VALUE fmt = Qnil;
|
|
1427
|
-
ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
|
|
1428
|
-
ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
|
|
1429
|
-
ndfunc_t ndf = { iter_dfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
|
|
1430
|
-
|
|
1431
|
-
rb_scan_args(argc, argv, "01", &fmt);
|
|
1432
|
-
return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
|
|
1433
|
-
}
|
|
1434
|
-
|
|
1435
|
-
static VALUE iter_dfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
|
|
1436
|
-
return format_dfloat(fmt, (dtype*)(ptr + pos));
|
|
1437
|
-
}
|
|
1438
|
-
|
|
1439
|
-
static VALUE dfloat_inspect(VALUE ary) {
|
|
1440
|
-
return na_ndloop_inspect(ary, iter_dfloat_inspect, Qnil);
|
|
1441
|
-
}
|
|
1442
|
-
|
|
1443
1355
|
static void iter_dfloat_each(na_loop_t* const lp) {
|
|
1444
1356
|
size_t i, s1;
|
|
1445
1357
|
char* p1;
|
|
@@ -1725,22 +1637,12 @@ static VALUE dfloat_abs(VALUE self) {
|
|
|
1725
1637
|
#define check_intdivzero(y) \
|
|
1726
1638
|
{}
|
|
1727
1639
|
|
|
1728
|
-
static void
|
|
1640
|
+
static void iter_dfloat_copysign(na_loop_t* const lp) {
|
|
1729
1641
|
size_t i = 0;
|
|
1730
1642
|
size_t n;
|
|
1731
1643
|
char *p1, *p2, *p3;
|
|
1732
1644
|
ssize_t s1, s2, s3;
|
|
1733
1645
|
|
|
1734
|
-
#ifdef __SSE2__
|
|
1735
|
-
size_t cnt;
|
|
1736
|
-
size_t cnt_simd_loop = -1;
|
|
1737
|
-
|
|
1738
|
-
__m128d a;
|
|
1739
|
-
__m128d b;
|
|
1740
|
-
|
|
1741
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
1742
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
1743
|
-
#endif
|
|
1744
1646
|
INIT_COUNTER(lp, n);
|
|
1745
1647
|
INIT_PTR(lp, 0, p1, s1);
|
|
1746
1648
|
INIT_PTR(lp, 1, p2, s2);
|
|
@@ -1751,73 +1653,17 @@ static void iter_dfloat_add(na_loop_t* const lp) {
|
|
|
1751
1653
|
is_aligned(p3, sizeof(dtype))) {
|
|
1752
1654
|
|
|
1753
1655
|
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1754
|
-
|
|
1755
|
-
// Check number of elements. & Check same alignment.
|
|
1756
|
-
if ((n >= num_pack) &&
|
|
1757
|
-
is_same_aligned3(
|
|
1758
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
1759
|
-
)) {
|
|
1760
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1761
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1762
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1763
|
-
);
|
|
1764
|
-
#endif
|
|
1765
|
-
if (p1 == p3) { // inplace case
|
|
1766
|
-
#ifdef __SSE2__
|
|
1767
|
-
for (; i < cnt; i++) {
|
|
1768
|
-
#else
|
|
1656
|
+
if (p1 == p3) { // inplace case
|
|
1769
1657
|
for (; i < n; i++) {
|
|
1770
1658
|
check_intdivzero(((dtype*)p2)[i]);
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
} else {
|
|
1775
|
-
#ifdef __SSE2__
|
|
1776
|
-
for (; i < cnt; i++) {
|
|
1777
|
-
#else
|
|
1659
|
+
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1660
|
+
}
|
|
1661
|
+
} else {
|
|
1778
1662
|
for (; i < n; i++) {
|
|
1779
1663
|
check_intdivzero(((dtype*)p2)[i]);
|
|
1780
|
-
|
|
1781
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1782
|
-
}
|
|
1783
|
-
}
|
|
1784
|
-
#ifdef __SSE2__
|
|
1785
|
-
// Get the count of SIMD computation loops.
|
|
1786
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1787
|
-
|
|
1788
|
-
// SIMD computation.
|
|
1789
|
-
if (p1 == p3) { // inplace case
|
|
1790
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1791
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1792
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
1793
|
-
a = _mm_add_pd(a, b);
|
|
1794
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
1795
|
-
}
|
|
1796
|
-
} else {
|
|
1797
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1798
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1799
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
1800
|
-
a = _mm_add_pd(a, b);
|
|
1801
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
1802
|
-
}
|
|
1803
|
-
}
|
|
1804
|
-
}
|
|
1805
|
-
|
|
1806
|
-
// Compute the remainder of the SIMD operation.
|
|
1807
|
-
if (cnt_simd_loop != 0) {
|
|
1808
|
-
if (p1 == p3) { // inplace case
|
|
1809
|
-
for (; i < n; i++) {
|
|
1810
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1811
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1812
|
-
}
|
|
1813
|
-
} else {
|
|
1814
|
-
for (; i < n; i++) {
|
|
1815
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1816
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1817
|
-
}
|
|
1664
|
+
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1818
1665
|
}
|
|
1819
1666
|
}
|
|
1820
|
-
#endif
|
|
1821
1667
|
return;
|
|
1822
1668
|
}
|
|
1823
1669
|
|
|
@@ -1828,71 +1674,18 @@ static void iter_dfloat_add(na_loop_t* const lp) {
|
|
|
1828
1674
|
if (s2 == 0) { // Broadcasting from scalar value.
|
|
1829
1675
|
check_intdivzero(*(dtype*)p2);
|
|
1830
1676
|
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1831
|
-
|
|
1832
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
1833
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
1834
|
-
|
|
1835
|
-
// Check number of elements. & Check same alignment.
|
|
1836
|
-
if ((n >= num_pack) &&
|
|
1837
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
1838
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1839
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1840
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1841
|
-
);
|
|
1842
|
-
#endif
|
|
1843
|
-
if (p1 == p3) { // inplace case
|
|
1844
|
-
#ifdef __SSE2__
|
|
1845
|
-
for (; i < cnt; i++) {
|
|
1846
|
-
#else
|
|
1847
|
-
for (; i < n; i++) {
|
|
1848
|
-
#endif
|
|
1849
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1850
|
-
}
|
|
1851
|
-
} else {
|
|
1852
|
-
#ifdef __SSE2__
|
|
1853
|
-
for (; i < cnt; i++) {
|
|
1854
|
-
#else
|
|
1677
|
+
if (p1 == p3) { // inplace case
|
|
1855
1678
|
for (; i < n; i++) {
|
|
1856
|
-
|
|
1857
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1858
|
-
}
|
|
1859
|
-
}
|
|
1860
|
-
#ifdef __SSE2__
|
|
1861
|
-
// Get the count of SIMD computation loops.
|
|
1862
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1863
|
-
|
|
1864
|
-
// SIMD computation.
|
|
1865
|
-
if (p1 == p3) { // inplace case
|
|
1866
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1867
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1868
|
-
a = _mm_add_pd(a, b);
|
|
1869
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
1870
|
-
}
|
|
1871
|
-
} else {
|
|
1872
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1873
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1874
|
-
a = _mm_add_pd(a, b);
|
|
1875
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
1876
|
-
}
|
|
1679
|
+
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
1877
1680
|
}
|
|
1878
|
-
}
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
if (cnt_simd_loop != 0) {
|
|
1882
|
-
if (p1 == p3) { // inplace case
|
|
1883
|
-
for (; i < n; i++) {
|
|
1884
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1885
|
-
}
|
|
1886
|
-
} else {
|
|
1887
|
-
for (; i < n; i++) {
|
|
1888
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1889
|
-
}
|
|
1681
|
+
} else {
|
|
1682
|
+
for (; i < n; i++) {
|
|
1683
|
+
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
1890
1684
|
}
|
|
1891
1685
|
}
|
|
1892
|
-
#endif
|
|
1893
1686
|
} else {
|
|
1894
1687
|
for (i = 0; i < n; i++) {
|
|
1895
|
-
*(dtype*)p3 =
|
|
1688
|
+
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
1896
1689
|
p1 += s1;
|
|
1897
1690
|
p3 += s3;
|
|
1898
1691
|
}
|
|
@@ -1901,14 +1694,14 @@ static void iter_dfloat_add(na_loop_t* const lp) {
|
|
|
1901
1694
|
if (p1 == p3) { // inplace case
|
|
1902
1695
|
for (i = 0; i < n; i++) {
|
|
1903
1696
|
check_intdivzero(*(dtype*)p2);
|
|
1904
|
-
*(dtype*)p1 =
|
|
1697
|
+
*(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
1905
1698
|
p1 += s1;
|
|
1906
1699
|
p2 += s2;
|
|
1907
1700
|
}
|
|
1908
1701
|
} else {
|
|
1909
1702
|
for (i = 0; i < n; i++) {
|
|
1910
1703
|
check_intdivzero(*(dtype*)p2);
|
|
1911
|
-
*(dtype*)p3 =
|
|
1704
|
+
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
1912
1705
|
p1 += s1;
|
|
1913
1706
|
p2 += s2;
|
|
1914
1707
|
p3 += s3;
|
|
@@ -1925,2226 +1718,100 @@ static void iter_dfloat_add(na_loop_t* const lp) {
|
|
|
1925
1718
|
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1926
1719
|
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
1927
1720
|
check_intdivzero(y);
|
|
1928
|
-
z =
|
|
1721
|
+
z = m_copysign(x, y);
|
|
1929
1722
|
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
1930
1723
|
}
|
|
1931
1724
|
//
|
|
1932
1725
|
}
|
|
1933
1726
|
#undef check_intdivzero
|
|
1934
1727
|
|
|
1935
|
-
static VALUE
|
|
1728
|
+
static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
|
|
1936
1729
|
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
1937
1730
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1938
|
-
ndfunc_t ndf = {
|
|
1731
|
+
ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
|
|
1939
1732
|
|
|
1940
1733
|
return na_ndloop(&ndf, 2, self, other);
|
|
1941
1734
|
}
|
|
1942
1735
|
|
|
1943
|
-
static VALUE
|
|
1736
|
+
static VALUE dfloat_copysign(VALUE self, VALUE other) {
|
|
1944
1737
|
|
|
1945
1738
|
VALUE klass, v;
|
|
1946
1739
|
|
|
1947
1740
|
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
1948
1741
|
if (klass == cT) {
|
|
1949
|
-
return
|
|
1742
|
+
return dfloat_copysign_self(self, other);
|
|
1950
1743
|
} else {
|
|
1951
1744
|
v = rb_funcall(klass, id_cast, 1, self);
|
|
1952
|
-
return rb_funcall(v,
|
|
1745
|
+
return rb_funcall(v, id_copysign, 1, other);
|
|
1953
1746
|
}
|
|
1954
1747
|
}
|
|
1955
1748
|
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
size_t
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1749
|
+
static void iter_dfloat_signbit(na_loop_t* const lp) {
|
|
1750
|
+
size_t i;
|
|
1751
|
+
char* p1;
|
|
1752
|
+
BIT_DIGIT* a2;
|
|
1753
|
+
size_t p2;
|
|
1754
|
+
ssize_t s1, s2;
|
|
1755
|
+
size_t* idx1;
|
|
1756
|
+
dtype x;
|
|
1757
|
+
BIT_DIGIT b;
|
|
1758
|
+
INIT_COUNTER(lp, i);
|
|
1759
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1760
|
+
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
1761
|
+
if (idx1) {
|
|
1762
|
+
for (; i--;) {
|
|
1763
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1764
|
+
b = (m_signbit(x)) ? 1 : 0;
|
|
1765
|
+
STORE_BIT(a2, p2, b);
|
|
1766
|
+
p2 += s2;
|
|
1767
|
+
}
|
|
1768
|
+
} else {
|
|
1769
|
+
for (; i--;) {
|
|
1770
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1771
|
+
b = (m_signbit(x)) ? 1 : 0;
|
|
1772
|
+
STORE_BIT(a2, p2, b);
|
|
1773
|
+
p2 += s2;
|
|
1774
|
+
}
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1964
1777
|
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1778
|
+
static VALUE dfloat_signbit(VALUE self) {
|
|
1779
|
+
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
1780
|
+
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
1781
|
+
ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
|
|
1968
1782
|
|
|
1969
|
-
|
|
1970
|
-
|
|
1783
|
+
return na_ndloop(&ndf, 1, self);
|
|
1784
|
+
}
|
|
1971
1785
|
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1786
|
+
static void iter_dfloat_modf(na_loop_t* const lp) {
|
|
1787
|
+
size_t i;
|
|
1788
|
+
char *p1, *p2, *p3;
|
|
1789
|
+
ssize_t s1, s2, s3;
|
|
1790
|
+
dtype x, y, z;
|
|
1791
|
+
INIT_COUNTER(lp, i);
|
|
1976
1792
|
INIT_PTR(lp, 0, p1, s1);
|
|
1977
1793
|
INIT_PTR(lp, 1, p2, s2);
|
|
1978
1794
|
INIT_PTR(lp, 2, p3, s3);
|
|
1795
|
+
for (; i--;) {
|
|
1796
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1797
|
+
m_modf(x, y, z);
|
|
1798
|
+
SET_DATA_STRIDE(p2, s2, dtype, y);
|
|
1799
|
+
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1979
1802
|
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1803
|
+
static VALUE dfloat_modf(VALUE self) {
|
|
1804
|
+
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
1805
|
+
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
1806
|
+
ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
|
|
1983
1807
|
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
// Check number of elements. & Check same alignment.
|
|
1987
|
-
if ((n >= num_pack) &&
|
|
1988
|
-
is_same_aligned3(
|
|
1989
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
1990
|
-
)) {
|
|
1991
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1992
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1993
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1994
|
-
);
|
|
1995
|
-
#endif
|
|
1996
|
-
if (p1 == p3) { // inplace case
|
|
1997
|
-
#ifdef __SSE2__
|
|
1998
|
-
for (; i < cnt; i++) {
|
|
1999
|
-
#else
|
|
2000
|
-
for (; i < n; i++) {
|
|
2001
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2002
|
-
#endif
|
|
2003
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2004
|
-
}
|
|
2005
|
-
} else {
|
|
2006
|
-
#ifdef __SSE2__
|
|
2007
|
-
for (; i < cnt; i++) {
|
|
2008
|
-
#else
|
|
2009
|
-
for (; i < n; i++) {
|
|
2010
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2011
|
-
#endif
|
|
2012
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2013
|
-
}
|
|
2014
|
-
}
|
|
1808
|
+
return na_ndloop(&ndf, 1, self);
|
|
1809
|
+
}
|
|
2015
1810
|
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
// SIMD computation.
|
|
2021
|
-
if (p1 == p3) { // inplace case
|
|
2022
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2023
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2024
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2025
|
-
a = _mm_sub_pd(a, b);
|
|
2026
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2027
|
-
}
|
|
2028
|
-
} else {
|
|
2029
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2030
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2031
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2032
|
-
a = _mm_sub_pd(a, b);
|
|
2033
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2034
|
-
}
|
|
2035
|
-
}
|
|
2036
|
-
}
|
|
2037
|
-
|
|
2038
|
-
// Compute the remainder of the SIMD operation.
|
|
2039
|
-
if (cnt_simd_loop != 0) {
|
|
2040
|
-
if (p1 == p3) { // inplace case
|
|
2041
|
-
for (; i < n; i++) {
|
|
2042
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2043
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2044
|
-
}
|
|
2045
|
-
} else {
|
|
2046
|
-
for (; i < n; i++) {
|
|
2047
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2048
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2049
|
-
}
|
|
2050
|
-
}
|
|
2051
|
-
}
|
|
2052
|
-
#endif
|
|
2053
|
-
return;
|
|
2054
|
-
}
|
|
2055
|
-
|
|
2056
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2057
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2058
|
-
//
|
|
2059
|
-
|
|
2060
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2061
|
-
check_intdivzero(*(dtype*)p2);
|
|
2062
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2063
|
-
#ifdef __SSE2__
|
|
2064
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2065
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
2066
|
-
|
|
2067
|
-
// Check number of elements. & Check same alignment.
|
|
2068
|
-
if ((n >= num_pack) &&
|
|
2069
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2070
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2071
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2072
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2073
|
-
);
|
|
2074
|
-
#endif
|
|
2075
|
-
if (p1 == p3) { // inplace case
|
|
2076
|
-
#ifdef __SSE2__
|
|
2077
|
-
for (; i < cnt; i++) {
|
|
2078
|
-
#else
|
|
2079
|
-
for (; i < n; i++) {
|
|
2080
|
-
#endif
|
|
2081
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2082
|
-
}
|
|
2083
|
-
} else {
|
|
2084
|
-
#ifdef __SSE2__
|
|
2085
|
-
for (; i < cnt; i++) {
|
|
2086
|
-
#else
|
|
2087
|
-
for (; i < n; i++) {
|
|
2088
|
-
#endif
|
|
2089
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2090
|
-
}
|
|
2091
|
-
}
|
|
2092
|
-
|
|
2093
|
-
#ifdef __SSE2__
|
|
2094
|
-
// Get the count of SIMD computation loops.
|
|
2095
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2096
|
-
|
|
2097
|
-
// SIMD computation.
|
|
2098
|
-
if (p1 == p3) { // inplace case
|
|
2099
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2100
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2101
|
-
a = _mm_sub_pd(a, b);
|
|
2102
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2103
|
-
}
|
|
2104
|
-
} else {
|
|
2105
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2106
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2107
|
-
a = _mm_sub_pd(a, b);
|
|
2108
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2109
|
-
}
|
|
2110
|
-
}
|
|
2111
|
-
}
|
|
2112
|
-
|
|
2113
|
-
// Compute the remainder of the SIMD operation.
|
|
2114
|
-
if (cnt_simd_loop != 0) {
|
|
2115
|
-
if (p1 == p3) { // inplace case
|
|
2116
|
-
for (; i < n; i++) {
|
|
2117
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2118
|
-
}
|
|
2119
|
-
} else {
|
|
2120
|
-
for (; i < n; i++) {
|
|
2121
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2122
|
-
}
|
|
2123
|
-
}
|
|
2124
|
-
}
|
|
2125
|
-
#endif
|
|
2126
|
-
} else {
|
|
2127
|
-
for (i = 0; i < n; i++) {
|
|
2128
|
-
*(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
2129
|
-
p1 += s1;
|
|
2130
|
-
p3 += s3;
|
|
2131
|
-
}
|
|
2132
|
-
}
|
|
2133
|
-
} else {
|
|
2134
|
-
if (p1 == p3) { // inplace case
|
|
2135
|
-
for (i = 0; i < n; i++) {
|
|
2136
|
-
check_intdivzero(*(dtype*)p2);
|
|
2137
|
-
*(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
2138
|
-
p1 += s1;
|
|
2139
|
-
p2 += s2;
|
|
2140
|
-
}
|
|
2141
|
-
} else {
|
|
2142
|
-
for (i = 0; i < n; i++) {
|
|
2143
|
-
check_intdivzero(*(dtype*)p2);
|
|
2144
|
-
*(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
2145
|
-
p1 += s1;
|
|
2146
|
-
p2 += s2;
|
|
2147
|
-
p3 += s3;
|
|
2148
|
-
}
|
|
2149
|
-
}
|
|
2150
|
-
}
|
|
2151
|
-
|
|
2152
|
-
return;
|
|
2153
|
-
//
|
|
2154
|
-
}
|
|
2155
|
-
}
|
|
2156
|
-
for (i = 0; i < n; i++) {
|
|
2157
|
-
dtype x, y, z;
|
|
2158
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2159
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2160
|
-
check_intdivzero(y);
|
|
2161
|
-
z = m_sub(x, y);
|
|
2162
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2163
|
-
}
|
|
2164
|
-
//
|
|
2165
|
-
}
|
|
2166
|
-
#undef check_intdivzero
|
|
2167
|
-
|
|
2168
|
-
static VALUE dfloat_sub_self(VALUE self, VALUE other) {
|
|
2169
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2170
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2171
|
-
ndfunc_t ndf = { iter_dfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2172
|
-
|
|
2173
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2174
|
-
}
|
|
2175
|
-
|
|
2176
|
-
static VALUE dfloat_sub(VALUE self, VALUE other) {
|
|
2177
|
-
|
|
2178
|
-
VALUE klass, v;
|
|
2179
|
-
|
|
2180
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2181
|
-
if (klass == cT) {
|
|
2182
|
-
return dfloat_sub_self(self, other);
|
|
2183
|
-
} else {
|
|
2184
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2185
|
-
return rb_funcall(v, '-', 1, other);
|
|
2186
|
-
}
|
|
2187
|
-
}
|
|
2188
|
-
|
|
2189
|
-
#define check_intdivzero(y) \
|
|
2190
|
-
{}
|
|
2191
|
-
|
|
2192
|
-
static void iter_dfloat_mul(na_loop_t* const lp) {
|
|
2193
|
-
size_t i = 0;
|
|
2194
|
-
size_t n;
|
|
2195
|
-
char *p1, *p2, *p3;
|
|
2196
|
-
ssize_t s1, s2, s3;
|
|
2197
|
-
|
|
2198
|
-
#ifdef __SSE2__
|
|
2199
|
-
size_t cnt;
|
|
2200
|
-
size_t cnt_simd_loop = -1;
|
|
2201
|
-
|
|
2202
|
-
__m128d a;
|
|
2203
|
-
__m128d b;
|
|
2204
|
-
|
|
2205
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
2206
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
2207
|
-
#endif
|
|
2208
|
-
INIT_COUNTER(lp, n);
|
|
2209
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2210
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2211
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2212
|
-
|
|
2213
|
-
//
|
|
2214
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2215
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2216
|
-
|
|
2217
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2218
|
-
#ifdef __SSE2__
|
|
2219
|
-
// Check number of elements. & Check same alignment.
|
|
2220
|
-
if ((n >= num_pack) &&
|
|
2221
|
-
is_same_aligned3(
|
|
2222
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
2223
|
-
)) {
|
|
2224
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2225
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2226
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2227
|
-
);
|
|
2228
|
-
#endif
|
|
2229
|
-
if (p1 == p3) { // inplace case
|
|
2230
|
-
#ifdef __SSE2__
|
|
2231
|
-
for (; i < cnt; i++) {
|
|
2232
|
-
#else
|
|
2233
|
-
for (; i < n; i++) {
|
|
2234
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2235
|
-
#endif
|
|
2236
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2237
|
-
}
|
|
2238
|
-
} else {
|
|
2239
|
-
#ifdef __SSE2__
|
|
2240
|
-
for (; i < cnt; i++) {
|
|
2241
|
-
#else
|
|
2242
|
-
for (; i < n; i++) {
|
|
2243
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2244
|
-
#endif
|
|
2245
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2246
|
-
}
|
|
2247
|
-
}
|
|
2248
|
-
|
|
2249
|
-
#ifdef __SSE2__
|
|
2250
|
-
// Get the count of SIMD computation loops.
|
|
2251
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2252
|
-
|
|
2253
|
-
// SIMD computation.
|
|
2254
|
-
if (p1 == p3) { // inplace case
|
|
2255
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2256
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2257
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2258
|
-
a = _mm_mul_pd(a, b);
|
|
2259
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2260
|
-
}
|
|
2261
|
-
} else {
|
|
2262
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2263
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2264
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2265
|
-
a = _mm_mul_pd(a, b);
|
|
2266
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2267
|
-
}
|
|
2268
|
-
}
|
|
2269
|
-
}
|
|
2270
|
-
|
|
2271
|
-
// Compute the remainder of the SIMD operation.
|
|
2272
|
-
if (cnt_simd_loop != 0) {
|
|
2273
|
-
if (p1 == p3) { // inplace case
|
|
2274
|
-
for (; i < n; i++) {
|
|
2275
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2276
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2277
|
-
}
|
|
2278
|
-
} else {
|
|
2279
|
-
for (; i < n; i++) {
|
|
2280
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2281
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2282
|
-
}
|
|
2283
|
-
}
|
|
2284
|
-
}
|
|
2285
|
-
#endif
|
|
2286
|
-
return;
|
|
2287
|
-
}
|
|
2288
|
-
|
|
2289
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2290
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2291
|
-
//
|
|
2292
|
-
|
|
2293
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2294
|
-
check_intdivzero(*(dtype*)p2);
|
|
2295
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2296
|
-
#ifdef __SSE2__
|
|
2297
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2298
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
2299
|
-
|
|
2300
|
-
// Check number of elements. & Check same alignment.
|
|
2301
|
-
if ((n >= num_pack) &&
|
|
2302
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2303
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2304
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2305
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2306
|
-
);
|
|
2307
|
-
#endif
|
|
2308
|
-
if (p1 == p3) { // inplace case
|
|
2309
|
-
#ifdef __SSE2__
|
|
2310
|
-
for (; i < cnt; i++) {
|
|
2311
|
-
#else
|
|
2312
|
-
for (; i < n; i++) {
|
|
2313
|
-
#endif
|
|
2314
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2315
|
-
}
|
|
2316
|
-
} else {
|
|
2317
|
-
#ifdef __SSE2__
|
|
2318
|
-
for (; i < cnt; i++) {
|
|
2319
|
-
#else
|
|
2320
|
-
for (; i < n; i++) {
|
|
2321
|
-
#endif
|
|
2322
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2323
|
-
}
|
|
2324
|
-
}
|
|
2325
|
-
|
|
2326
|
-
#ifdef __SSE2__
|
|
2327
|
-
// Get the count of SIMD computation loops.
|
|
2328
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2329
|
-
|
|
2330
|
-
// SIMD computation.
|
|
2331
|
-
if (p1 == p3) { // inplace case
|
|
2332
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2333
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2334
|
-
a = _mm_mul_pd(a, b);
|
|
2335
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2336
|
-
}
|
|
2337
|
-
} else {
|
|
2338
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2339
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2340
|
-
a = _mm_mul_pd(a, b);
|
|
2341
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2342
|
-
}
|
|
2343
|
-
}
|
|
2344
|
-
}
|
|
2345
|
-
|
|
2346
|
-
// Compute the remainder of the SIMD operation.
|
|
2347
|
-
if (cnt_simd_loop != 0) {
|
|
2348
|
-
if (p1 == p3) { // inplace case
|
|
2349
|
-
for (; i < n; i++) {
|
|
2350
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2351
|
-
}
|
|
2352
|
-
} else {
|
|
2353
|
-
for (; i < n; i++) {
|
|
2354
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2355
|
-
}
|
|
2356
|
-
}
|
|
2357
|
-
}
|
|
2358
|
-
#endif
|
|
2359
|
-
} else {
|
|
2360
|
-
for (i = 0; i < n; i++) {
|
|
2361
|
-
*(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2362
|
-
p1 += s1;
|
|
2363
|
-
p3 += s3;
|
|
2364
|
-
}
|
|
2365
|
-
}
|
|
2366
|
-
} else {
|
|
2367
|
-
if (p1 == p3) { // inplace case
|
|
2368
|
-
for (i = 0; i < n; i++) {
|
|
2369
|
-
check_intdivzero(*(dtype*)p2);
|
|
2370
|
-
*(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2371
|
-
p1 += s1;
|
|
2372
|
-
p2 += s2;
|
|
2373
|
-
}
|
|
2374
|
-
} else {
|
|
2375
|
-
for (i = 0; i < n; i++) {
|
|
2376
|
-
check_intdivzero(*(dtype*)p2);
|
|
2377
|
-
*(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2378
|
-
p1 += s1;
|
|
2379
|
-
p2 += s2;
|
|
2380
|
-
p3 += s3;
|
|
2381
|
-
}
|
|
2382
|
-
}
|
|
2383
|
-
}
|
|
2384
|
-
|
|
2385
|
-
return;
|
|
2386
|
-
//
|
|
2387
|
-
}
|
|
2388
|
-
}
|
|
2389
|
-
for (i = 0; i < n; i++) {
|
|
2390
|
-
dtype x, y, z;
|
|
2391
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2392
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2393
|
-
check_intdivzero(y);
|
|
2394
|
-
z = m_mul(x, y);
|
|
2395
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2396
|
-
}
|
|
2397
|
-
//
|
|
2398
|
-
}
|
|
2399
|
-
#undef check_intdivzero
|
|
2400
|
-
|
|
2401
|
-
static VALUE dfloat_mul_self(VALUE self, VALUE other) {
|
|
2402
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2403
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2404
|
-
ndfunc_t ndf = { iter_dfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2405
|
-
|
|
2406
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2407
|
-
}
|
|
2408
|
-
|
|
2409
|
-
static VALUE dfloat_mul(VALUE self, VALUE other) {
|
|
2410
|
-
|
|
2411
|
-
VALUE klass, v;
|
|
2412
|
-
|
|
2413
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2414
|
-
if (klass == cT) {
|
|
2415
|
-
return dfloat_mul_self(self, other);
|
|
2416
|
-
} else {
|
|
2417
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2418
|
-
return rb_funcall(v, '*', 1, other);
|
|
2419
|
-
}
|
|
2420
|
-
}
|
|
2421
|
-
|
|
2422
|
-
#define check_intdivzero(y) \
|
|
2423
|
-
{}
|
|
2424
|
-
|
|
2425
|
-
static void iter_dfloat_div(na_loop_t* const lp) {
|
|
2426
|
-
size_t i = 0;
|
|
2427
|
-
size_t n;
|
|
2428
|
-
char *p1, *p2, *p3;
|
|
2429
|
-
ssize_t s1, s2, s3;
|
|
2430
|
-
|
|
2431
|
-
#ifdef __SSE2__
|
|
2432
|
-
size_t cnt;
|
|
2433
|
-
size_t cnt_simd_loop = -1;
|
|
2434
|
-
|
|
2435
|
-
__m128d a;
|
|
2436
|
-
__m128d b;
|
|
2437
|
-
|
|
2438
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
2439
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
2440
|
-
#endif
|
|
2441
|
-
INIT_COUNTER(lp, n);
|
|
2442
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2443
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2444
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2445
|
-
|
|
2446
|
-
//
|
|
2447
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2448
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2449
|
-
|
|
2450
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2451
|
-
#ifdef __SSE2__
|
|
2452
|
-
// Check number of elements. & Check same alignment.
|
|
2453
|
-
if ((n >= num_pack) &&
|
|
2454
|
-
is_same_aligned3(
|
|
2455
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
2456
|
-
)) {
|
|
2457
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2458
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2459
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2460
|
-
);
|
|
2461
|
-
#endif
|
|
2462
|
-
if (p1 == p3) { // inplace case
|
|
2463
|
-
#ifdef __SSE2__
|
|
2464
|
-
for (; i < cnt; i++) {
|
|
2465
|
-
#else
|
|
2466
|
-
for (; i < n; i++) {
|
|
2467
|
-
#endif
|
|
2468
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2469
|
-
}
|
|
2470
|
-
} else {
|
|
2471
|
-
#ifdef __SSE2__
|
|
2472
|
-
for (; i < cnt; i++) {
|
|
2473
|
-
#else
|
|
2474
|
-
for (; i < n; i++) {
|
|
2475
|
-
#endif
|
|
2476
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2477
|
-
}
|
|
2478
|
-
}
|
|
2479
|
-
|
|
2480
|
-
#ifdef __SSE2__
|
|
2481
|
-
// Get the count of SIMD computation loops.
|
|
2482
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2483
|
-
|
|
2484
|
-
// SIMD computation.
|
|
2485
|
-
if (p1 == p3) { // inplace case
|
|
2486
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2487
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2488
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2489
|
-
a = _mm_div_pd(a, b);
|
|
2490
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2491
|
-
}
|
|
2492
|
-
} else {
|
|
2493
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2494
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2495
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2496
|
-
a = _mm_div_pd(a, b);
|
|
2497
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2498
|
-
}
|
|
2499
|
-
}
|
|
2500
|
-
}
|
|
2501
|
-
|
|
2502
|
-
// Compute the remainder of the SIMD operation.
|
|
2503
|
-
if (cnt_simd_loop != 0) {
|
|
2504
|
-
if (p1 == p3) { // inplace case
|
|
2505
|
-
for (; i < n; i++) {
|
|
2506
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2507
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2508
|
-
}
|
|
2509
|
-
} else {
|
|
2510
|
-
for (; i < n; i++) {
|
|
2511
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2512
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2513
|
-
}
|
|
2514
|
-
}
|
|
2515
|
-
}
|
|
2516
|
-
#endif
|
|
2517
|
-
return;
|
|
2518
|
-
}
|
|
2519
|
-
|
|
2520
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2521
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2522
|
-
//
|
|
2523
|
-
|
|
2524
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2525
|
-
check_intdivzero(*(dtype*)p2);
|
|
2526
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2527
|
-
#ifdef __SSE2__
|
|
2528
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2529
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
2530
|
-
|
|
2531
|
-
// Check number of elements. & Check same alignment.
|
|
2532
|
-
if ((n >= num_pack) &&
|
|
2533
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2534
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2535
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2536
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2537
|
-
);
|
|
2538
|
-
#endif
|
|
2539
|
-
if (p1 == p3) { // inplace case
|
|
2540
|
-
#ifdef __SSE2__
|
|
2541
|
-
for (; i < cnt; i++) {
|
|
2542
|
-
#else
|
|
2543
|
-
for (; i < n; i++) {
|
|
2544
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2545
|
-
#endif
|
|
2546
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2547
|
-
}
|
|
2548
|
-
} else {
|
|
2549
|
-
#ifdef __SSE2__
|
|
2550
|
-
for (; i < cnt; i++) {
|
|
2551
|
-
#else
|
|
2552
|
-
for (; i < n; i++) {
|
|
2553
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2554
|
-
#endif
|
|
2555
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2556
|
-
}
|
|
2557
|
-
}
|
|
2558
|
-
|
|
2559
|
-
#ifdef __SSE2__
|
|
2560
|
-
// Get the count of SIMD computation loops.
|
|
2561
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2562
|
-
|
|
2563
|
-
// SIMD computation.
|
|
2564
|
-
if (p1 == p3) { // inplace case
|
|
2565
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2566
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2567
|
-
a = _mm_div_pd(a, b);
|
|
2568
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2569
|
-
}
|
|
2570
|
-
} else {
|
|
2571
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2572
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2573
|
-
a = _mm_div_pd(a, b);
|
|
2574
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2575
|
-
}
|
|
2576
|
-
}
|
|
2577
|
-
}
|
|
2578
|
-
|
|
2579
|
-
// Compute the remainder of the SIMD operation.
|
|
2580
|
-
if (cnt_simd_loop != 0) {
|
|
2581
|
-
if (p1 == p3) { // inplace case
|
|
2582
|
-
for (; i < n; i++) {
|
|
2583
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2584
|
-
}
|
|
2585
|
-
} else {
|
|
2586
|
-
for (; i < n; i++) {
|
|
2587
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2588
|
-
}
|
|
2589
|
-
}
|
|
2590
|
-
}
|
|
2591
|
-
#endif
|
|
2592
|
-
} else {
|
|
2593
|
-
for (i = 0; i < n; i++) {
|
|
2594
|
-
*(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
2595
|
-
p1 += s1;
|
|
2596
|
-
p3 += s3;
|
|
2597
|
-
}
|
|
2598
|
-
}
|
|
2599
|
-
} else {
|
|
2600
|
-
if (p1 == p3) { // inplace case
|
|
2601
|
-
for (i = 0; i < n; i++) {
|
|
2602
|
-
check_intdivzero(*(dtype*)p2);
|
|
2603
|
-
*(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
2604
|
-
p1 += s1;
|
|
2605
|
-
p2 += s2;
|
|
2606
|
-
}
|
|
2607
|
-
} else {
|
|
2608
|
-
for (i = 0; i < n; i++) {
|
|
2609
|
-
check_intdivzero(*(dtype*)p2);
|
|
2610
|
-
*(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
2611
|
-
p1 += s1;
|
|
2612
|
-
p2 += s2;
|
|
2613
|
-
p3 += s3;
|
|
2614
|
-
}
|
|
2615
|
-
}
|
|
2616
|
-
}
|
|
2617
|
-
|
|
2618
|
-
return;
|
|
2619
|
-
//
|
|
2620
|
-
}
|
|
2621
|
-
}
|
|
2622
|
-
for (i = 0; i < n; i++) {
|
|
2623
|
-
dtype x, y, z;
|
|
2624
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2625
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2626
|
-
check_intdivzero(y);
|
|
2627
|
-
z = m_div(x, y);
|
|
2628
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2629
|
-
}
|
|
2630
|
-
//
|
|
2631
|
-
}
|
|
2632
|
-
#undef check_intdivzero
|
|
2633
|
-
|
|
2634
|
-
static VALUE dfloat_div_self(VALUE self, VALUE other) {
|
|
2635
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2636
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2637
|
-
ndfunc_t ndf = { iter_dfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2638
|
-
|
|
2639
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2640
|
-
}
|
|
2641
|
-
|
|
2642
|
-
static VALUE dfloat_div(VALUE self, VALUE other) {
|
|
2643
|
-
|
|
2644
|
-
VALUE klass, v;
|
|
2645
|
-
|
|
2646
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2647
|
-
if (klass == cT) {
|
|
2648
|
-
return dfloat_div_self(self, other);
|
|
2649
|
-
} else {
|
|
2650
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2651
|
-
return rb_funcall(v, '/', 1, other);
|
|
2652
|
-
}
|
|
2653
|
-
}
|
|
2654
|
-
|
|
2655
|
-
#define check_intdivzero(y) \
|
|
2656
|
-
{}
|
|
2657
|
-
|
|
2658
|
-
static void iter_dfloat_mod(na_loop_t* const lp) {
|
|
2659
|
-
size_t i = 0;
|
|
2660
|
-
size_t n;
|
|
2661
|
-
char *p1, *p2, *p3;
|
|
2662
|
-
ssize_t s1, s2, s3;
|
|
2663
|
-
|
|
2664
|
-
INIT_COUNTER(lp, n);
|
|
2665
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2666
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2667
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2668
|
-
|
|
2669
|
-
//
|
|
2670
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2671
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2672
|
-
|
|
2673
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2674
|
-
if (p1 == p3) { // inplace case
|
|
2675
|
-
for (; i < n; i++) {
|
|
2676
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2677
|
-
((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2678
|
-
}
|
|
2679
|
-
} else {
|
|
2680
|
-
for (; i < n; i++) {
|
|
2681
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2682
|
-
((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2683
|
-
}
|
|
2684
|
-
}
|
|
2685
|
-
return;
|
|
2686
|
-
}
|
|
2687
|
-
|
|
2688
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2689
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2690
|
-
//
|
|
2691
|
-
|
|
2692
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2693
|
-
check_intdivzero(*(dtype*)p2);
|
|
2694
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2695
|
-
if (p1 == p3) { // inplace case
|
|
2696
|
-
for (; i < n; i++) {
|
|
2697
|
-
((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
|
|
2698
|
-
}
|
|
2699
|
-
} else {
|
|
2700
|
-
for (; i < n; i++) {
|
|
2701
|
-
((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
|
|
2702
|
-
}
|
|
2703
|
-
}
|
|
2704
|
-
} else {
|
|
2705
|
-
for (i = 0; i < n; i++) {
|
|
2706
|
-
*(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2707
|
-
p1 += s1;
|
|
2708
|
-
p3 += s3;
|
|
2709
|
-
}
|
|
2710
|
-
}
|
|
2711
|
-
} else {
|
|
2712
|
-
if (p1 == p3) { // inplace case
|
|
2713
|
-
for (i = 0; i < n; i++) {
|
|
2714
|
-
check_intdivzero(*(dtype*)p2);
|
|
2715
|
-
*(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2716
|
-
p1 += s1;
|
|
2717
|
-
p2 += s2;
|
|
2718
|
-
}
|
|
2719
|
-
} else {
|
|
2720
|
-
for (i = 0; i < n; i++) {
|
|
2721
|
-
check_intdivzero(*(dtype*)p2);
|
|
2722
|
-
*(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2723
|
-
p1 += s1;
|
|
2724
|
-
p2 += s2;
|
|
2725
|
-
p3 += s3;
|
|
2726
|
-
}
|
|
2727
|
-
}
|
|
2728
|
-
}
|
|
2729
|
-
|
|
2730
|
-
return;
|
|
2731
|
-
//
|
|
2732
|
-
}
|
|
2733
|
-
}
|
|
2734
|
-
for (i = 0; i < n; i++) {
|
|
2735
|
-
dtype x, y, z;
|
|
2736
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2737
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2738
|
-
check_intdivzero(y);
|
|
2739
|
-
z = m_mod(x, y);
|
|
2740
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2741
|
-
}
|
|
2742
|
-
//
|
|
2743
|
-
}
|
|
2744
|
-
#undef check_intdivzero
|
|
2745
|
-
|
|
2746
|
-
static VALUE dfloat_mod_self(VALUE self, VALUE other) {
|
|
2747
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2748
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2749
|
-
ndfunc_t ndf = { iter_dfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2750
|
-
|
|
2751
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2752
|
-
}
|
|
2753
|
-
|
|
2754
|
-
static VALUE dfloat_mod(VALUE self, VALUE other) {
|
|
2755
|
-
|
|
2756
|
-
VALUE klass, v;
|
|
2757
|
-
|
|
2758
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2759
|
-
if (klass == cT) {
|
|
2760
|
-
return dfloat_mod_self(self, other);
|
|
2761
|
-
} else {
|
|
2762
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2763
|
-
return rb_funcall(v, '%', 1, other);
|
|
2764
|
-
}
|
|
2765
|
-
}
|
|
2766
|
-
|
|
2767
|
-
static void iter_dfloat_divmod(na_loop_t* const lp) {
|
|
2768
|
-
size_t i, n;
|
|
2769
|
-
char *p1, *p2, *p3, *p4;
|
|
2770
|
-
ssize_t s1, s2, s3, s4;
|
|
2771
|
-
dtype x, y, a, b;
|
|
2772
|
-
INIT_COUNTER(lp, n);
|
|
2773
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2774
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2775
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2776
|
-
INIT_PTR(lp, 3, p4, s4);
|
|
2777
|
-
for (i = n; i--;) {
|
|
2778
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2779
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2780
|
-
m_divmod(x, y, a, b);
|
|
2781
|
-
SET_DATA_STRIDE(p3, s3, dtype, a);
|
|
2782
|
-
SET_DATA_STRIDE(p4, s4, dtype, b);
|
|
2783
|
-
}
|
|
2784
|
-
}
|
|
2785
|
-
|
|
2786
|
-
static VALUE dfloat_divmod_self(VALUE self, VALUE other) {
|
|
2787
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2788
|
-
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
2789
|
-
ndfunc_t ndf = { iter_dfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
|
|
2790
|
-
|
|
2791
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2792
|
-
}
|
|
2793
|
-
|
|
2794
|
-
static VALUE dfloat_divmod(VALUE self, VALUE other) {
|
|
2795
|
-
|
|
2796
|
-
VALUE klass, v;
|
|
2797
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2798
|
-
if (klass == cT) {
|
|
2799
|
-
return dfloat_divmod_self(self, other);
|
|
2800
|
-
} else {
|
|
2801
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2802
|
-
return rb_funcall(v, id_divmod, 1, other);
|
|
2803
|
-
}
|
|
2804
|
-
}
|
|
2805
|
-
|
|
2806
|
-
static void iter_dfloat_pow(na_loop_t* const lp) {
|
|
2807
|
-
size_t i;
|
|
2808
|
-
char *p1, *p2, *p3;
|
|
2809
|
-
ssize_t s1, s2, s3;
|
|
2810
|
-
dtype x, y;
|
|
2811
|
-
INIT_COUNTER(lp, i);
|
|
2812
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2813
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2814
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2815
|
-
for (; i--;) {
|
|
2816
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2817
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2818
|
-
x = m_pow(x, y);
|
|
2819
|
-
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
2820
|
-
}
|
|
2821
|
-
}
|
|
2822
|
-
|
|
2823
|
-
static void iter_dfloat_pow_int32(na_loop_t* const lp) {
|
|
2824
|
-
size_t i;
|
|
2825
|
-
char *p1, *p2, *p3;
|
|
2826
|
-
ssize_t s1, s2, s3;
|
|
2827
|
-
dtype x;
|
|
2828
|
-
int32_t y;
|
|
2829
|
-
INIT_COUNTER(lp, i);
|
|
2830
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2831
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2832
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2833
|
-
for (; i--;) {
|
|
2834
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2835
|
-
GET_DATA_STRIDE(p2, s2, int32_t, y);
|
|
2836
|
-
x = m_pow_int(x, y);
|
|
2837
|
-
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
2838
|
-
}
|
|
2839
|
-
}
|
|
2840
|
-
|
|
2841
|
-
static VALUE dfloat_pow_self(VALUE self, VALUE other) {
|
|
2842
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2843
|
-
ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
|
|
2844
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2845
|
-
ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2846
|
-
ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
|
|
2847
|
-
|
|
2848
|
-
// fixme : use na.integer?
|
|
2849
|
-
if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
|
|
2850
|
-
return na_ndloop(&ndf_i, 2, self, other);
|
|
2851
|
-
} else {
|
|
2852
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2853
|
-
}
|
|
2854
|
-
}
|
|
2855
|
-
|
|
2856
|
-
static VALUE dfloat_pow(VALUE self, VALUE other) {
|
|
2857
|
-
|
|
2858
|
-
VALUE klass, v;
|
|
2859
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2860
|
-
if (klass == cT) {
|
|
2861
|
-
return dfloat_pow_self(self, other);
|
|
2862
|
-
} else {
|
|
2863
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2864
|
-
return rb_funcall(v, id_pow, 1, other);
|
|
2865
|
-
}
|
|
2866
|
-
}
|
|
2867
|
-
|
|
2868
|
-
static void iter_dfloat_minus(na_loop_t* const lp) {
|
|
2869
|
-
size_t i, n;
|
|
2870
|
-
char *p1, *p2;
|
|
2871
|
-
ssize_t s1, s2;
|
|
2872
|
-
size_t *idx1, *idx2;
|
|
2873
|
-
dtype x;
|
|
2874
|
-
|
|
2875
|
-
INIT_COUNTER(lp, n);
|
|
2876
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
2877
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2878
|
-
|
|
2879
|
-
if (idx1) {
|
|
2880
|
-
if (idx2) {
|
|
2881
|
-
for (i = 0; i < n; i++) {
|
|
2882
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2883
|
-
x = m_minus(x);
|
|
2884
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2885
|
-
}
|
|
2886
|
-
} else {
|
|
2887
|
-
for (i = 0; i < n; i++) {
|
|
2888
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2889
|
-
x = m_minus(x);
|
|
2890
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2891
|
-
}
|
|
2892
|
-
}
|
|
2893
|
-
} else {
|
|
2894
|
-
if (idx2) {
|
|
2895
|
-
for (i = 0; i < n; i++) {
|
|
2896
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2897
|
-
x = m_minus(x);
|
|
2898
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2899
|
-
}
|
|
2900
|
-
} else {
|
|
2901
|
-
//
|
|
2902
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
2903
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2904
|
-
for (i = 0; i < n; i++) {
|
|
2905
|
-
((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
|
|
2906
|
-
}
|
|
2907
|
-
return;
|
|
2908
|
-
}
|
|
2909
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
2910
|
-
//
|
|
2911
|
-
for (i = 0; i < n; i++) {
|
|
2912
|
-
*(dtype*)p2 = m_minus(*(dtype*)p1);
|
|
2913
|
-
p1 += s1;
|
|
2914
|
-
p2 += s2;
|
|
2915
|
-
}
|
|
2916
|
-
return;
|
|
2917
|
-
//
|
|
2918
|
-
}
|
|
2919
|
-
}
|
|
2920
|
-
for (i = 0; i < n; i++) {
|
|
2921
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2922
|
-
x = m_minus(x);
|
|
2923
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2924
|
-
}
|
|
2925
|
-
//
|
|
2926
|
-
}
|
|
2927
|
-
}
|
|
2928
|
-
}
|
|
2929
|
-
|
|
2930
|
-
static VALUE dfloat_minus(VALUE self) {
|
|
2931
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2932
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2933
|
-
ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
|
|
2934
|
-
|
|
2935
|
-
return na_ndloop(&ndf, 1, self);
|
|
2936
|
-
}
|
|
2937
|
-
|
|
2938
|
-
static void iter_dfloat_reciprocal(na_loop_t* const lp) {
|
|
2939
|
-
size_t i, n;
|
|
2940
|
-
char *p1, *p2;
|
|
2941
|
-
ssize_t s1, s2;
|
|
2942
|
-
size_t *idx1, *idx2;
|
|
2943
|
-
dtype x;
|
|
2944
|
-
|
|
2945
|
-
INIT_COUNTER(lp, n);
|
|
2946
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
2947
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2948
|
-
|
|
2949
|
-
if (idx1) {
|
|
2950
|
-
if (idx2) {
|
|
2951
|
-
for (i = 0; i < n; i++) {
|
|
2952
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2953
|
-
x = m_reciprocal(x);
|
|
2954
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2955
|
-
}
|
|
2956
|
-
} else {
|
|
2957
|
-
for (i = 0; i < n; i++) {
|
|
2958
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2959
|
-
x = m_reciprocal(x);
|
|
2960
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2961
|
-
}
|
|
2962
|
-
}
|
|
2963
|
-
} else {
|
|
2964
|
-
if (idx2) {
|
|
2965
|
-
for (i = 0; i < n; i++) {
|
|
2966
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2967
|
-
x = m_reciprocal(x);
|
|
2968
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2969
|
-
}
|
|
2970
|
-
} else {
|
|
2971
|
-
//
|
|
2972
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
2973
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2974
|
-
for (i = 0; i < n; i++) {
|
|
2975
|
-
((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
|
|
2976
|
-
}
|
|
2977
|
-
return;
|
|
2978
|
-
}
|
|
2979
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
2980
|
-
//
|
|
2981
|
-
for (i = 0; i < n; i++) {
|
|
2982
|
-
*(dtype*)p2 = m_reciprocal(*(dtype*)p1);
|
|
2983
|
-
p1 += s1;
|
|
2984
|
-
p2 += s2;
|
|
2985
|
-
}
|
|
2986
|
-
return;
|
|
2987
|
-
//
|
|
2988
|
-
}
|
|
2989
|
-
}
|
|
2990
|
-
for (i = 0; i < n; i++) {
|
|
2991
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2992
|
-
x = m_reciprocal(x);
|
|
2993
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2994
|
-
}
|
|
2995
|
-
//
|
|
2996
|
-
}
|
|
2997
|
-
}
|
|
2998
|
-
}
|
|
2999
|
-
|
|
3000
|
-
static VALUE dfloat_reciprocal(VALUE self) {
|
|
3001
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3002
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3003
|
-
ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
|
|
3004
|
-
|
|
3005
|
-
return na_ndloop(&ndf, 1, self);
|
|
3006
|
-
}
|
|
3007
|
-
|
|
3008
|
-
static void iter_dfloat_sign(na_loop_t* const lp) {
|
|
3009
|
-
size_t i, n;
|
|
3010
|
-
char *p1, *p2;
|
|
3011
|
-
ssize_t s1, s2;
|
|
3012
|
-
size_t *idx1, *idx2;
|
|
3013
|
-
dtype x;
|
|
3014
|
-
|
|
3015
|
-
INIT_COUNTER(lp, n);
|
|
3016
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3017
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3018
|
-
|
|
3019
|
-
if (idx1) {
|
|
3020
|
-
if (idx2) {
|
|
3021
|
-
for (i = 0; i < n; i++) {
|
|
3022
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3023
|
-
x = m_sign(x);
|
|
3024
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3025
|
-
}
|
|
3026
|
-
} else {
|
|
3027
|
-
for (i = 0; i < n; i++) {
|
|
3028
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3029
|
-
x = m_sign(x);
|
|
3030
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3031
|
-
}
|
|
3032
|
-
}
|
|
3033
|
-
} else {
|
|
3034
|
-
if (idx2) {
|
|
3035
|
-
for (i = 0; i < n; i++) {
|
|
3036
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3037
|
-
x = m_sign(x);
|
|
3038
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3039
|
-
}
|
|
3040
|
-
} else {
|
|
3041
|
-
//
|
|
3042
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3043
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3044
|
-
for (i = 0; i < n; i++) {
|
|
3045
|
-
((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
|
|
3046
|
-
}
|
|
3047
|
-
return;
|
|
3048
|
-
}
|
|
3049
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3050
|
-
//
|
|
3051
|
-
for (i = 0; i < n; i++) {
|
|
3052
|
-
*(dtype*)p2 = m_sign(*(dtype*)p1);
|
|
3053
|
-
p1 += s1;
|
|
3054
|
-
p2 += s2;
|
|
3055
|
-
}
|
|
3056
|
-
return;
|
|
3057
|
-
//
|
|
3058
|
-
}
|
|
3059
|
-
}
|
|
3060
|
-
for (i = 0; i < n; i++) {
|
|
3061
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3062
|
-
x = m_sign(x);
|
|
3063
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3064
|
-
}
|
|
3065
|
-
//
|
|
3066
|
-
}
|
|
3067
|
-
}
|
|
3068
|
-
}
|
|
3069
|
-
|
|
3070
|
-
static VALUE dfloat_sign(VALUE self) {
|
|
3071
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3072
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3073
|
-
ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
|
|
3074
|
-
|
|
3075
|
-
return na_ndloop(&ndf, 1, self);
|
|
3076
|
-
}
|
|
3077
|
-
|
|
3078
|
-
static void iter_dfloat_square(na_loop_t* const lp) {
|
|
3079
|
-
size_t i, n;
|
|
3080
|
-
char *p1, *p2;
|
|
3081
|
-
ssize_t s1, s2;
|
|
3082
|
-
size_t *idx1, *idx2;
|
|
3083
|
-
dtype x;
|
|
3084
|
-
|
|
3085
|
-
INIT_COUNTER(lp, n);
|
|
3086
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3087
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3088
|
-
|
|
3089
|
-
if (idx1) {
|
|
3090
|
-
if (idx2) {
|
|
3091
|
-
for (i = 0; i < n; i++) {
|
|
3092
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3093
|
-
x = m_square(x);
|
|
3094
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3095
|
-
}
|
|
3096
|
-
} else {
|
|
3097
|
-
for (i = 0; i < n; i++) {
|
|
3098
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3099
|
-
x = m_square(x);
|
|
3100
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3101
|
-
}
|
|
3102
|
-
}
|
|
3103
|
-
} else {
|
|
3104
|
-
if (idx2) {
|
|
3105
|
-
for (i = 0; i < n; i++) {
|
|
3106
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3107
|
-
x = m_square(x);
|
|
3108
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3109
|
-
}
|
|
3110
|
-
} else {
|
|
3111
|
-
//
|
|
3112
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3113
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3114
|
-
for (i = 0; i < n; i++) {
|
|
3115
|
-
((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
|
|
3116
|
-
}
|
|
3117
|
-
return;
|
|
3118
|
-
}
|
|
3119
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3120
|
-
//
|
|
3121
|
-
for (i = 0; i < n; i++) {
|
|
3122
|
-
*(dtype*)p2 = m_square(*(dtype*)p1);
|
|
3123
|
-
p1 += s1;
|
|
3124
|
-
p2 += s2;
|
|
3125
|
-
}
|
|
3126
|
-
return;
|
|
3127
|
-
//
|
|
3128
|
-
}
|
|
3129
|
-
}
|
|
3130
|
-
for (i = 0; i < n; i++) {
|
|
3131
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3132
|
-
x = m_square(x);
|
|
3133
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3134
|
-
}
|
|
3135
|
-
//
|
|
3136
|
-
}
|
|
3137
|
-
}
|
|
3138
|
-
}
|
|
3139
|
-
|
|
3140
|
-
static VALUE dfloat_square(VALUE self) {
|
|
3141
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3142
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3143
|
-
ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
|
|
3144
|
-
|
|
3145
|
-
return na_ndloop(&ndf, 1, self);
|
|
3146
|
-
}
|
|
3147
|
-
|
|
3148
|
-
static void iter_dfloat_eq(na_loop_t* const lp) {
|
|
3149
|
-
size_t i;
|
|
3150
|
-
char *p1, *p2;
|
|
3151
|
-
BIT_DIGIT* a3;
|
|
3152
|
-
size_t p3;
|
|
3153
|
-
ssize_t s1, s2, s3;
|
|
3154
|
-
dtype x, y;
|
|
3155
|
-
BIT_DIGIT b;
|
|
3156
|
-
INIT_COUNTER(lp, i);
|
|
3157
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3158
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3159
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3160
|
-
for (; i--;) {
|
|
3161
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3162
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3163
|
-
b = (m_eq(x, y)) ? 1 : 0;
|
|
3164
|
-
STORE_BIT(a3, p3, b);
|
|
3165
|
-
p3 += s3;
|
|
3166
|
-
}
|
|
3167
|
-
}
|
|
3168
|
-
|
|
3169
|
-
static VALUE dfloat_eq_self(VALUE self, VALUE other) {
|
|
3170
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3171
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3172
|
-
ndfunc_t ndf = { iter_dfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3173
|
-
|
|
3174
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3175
|
-
}
|
|
3176
|
-
|
|
3177
|
-
static VALUE dfloat_eq(VALUE self, VALUE other) {
|
|
3178
|
-
|
|
3179
|
-
VALUE klass, v;
|
|
3180
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3181
|
-
if (klass == cT) {
|
|
3182
|
-
return dfloat_eq_self(self, other);
|
|
3183
|
-
} else {
|
|
3184
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3185
|
-
return rb_funcall(v, id_eq, 1, other);
|
|
3186
|
-
}
|
|
3187
|
-
}
|
|
3188
|
-
|
|
3189
|
-
static void iter_dfloat_ne(na_loop_t* const lp) {
|
|
3190
|
-
size_t i;
|
|
3191
|
-
char *p1, *p2;
|
|
3192
|
-
BIT_DIGIT* a3;
|
|
3193
|
-
size_t p3;
|
|
3194
|
-
ssize_t s1, s2, s3;
|
|
3195
|
-
dtype x, y;
|
|
3196
|
-
BIT_DIGIT b;
|
|
3197
|
-
INIT_COUNTER(lp, i);
|
|
3198
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3199
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3200
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3201
|
-
for (; i--;) {
|
|
3202
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3203
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3204
|
-
b = (m_ne(x, y)) ? 1 : 0;
|
|
3205
|
-
STORE_BIT(a3, p3, b);
|
|
3206
|
-
p3 += s3;
|
|
3207
|
-
}
|
|
3208
|
-
}
|
|
3209
|
-
|
|
3210
|
-
static VALUE dfloat_ne_self(VALUE self, VALUE other) {
|
|
3211
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3212
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3213
|
-
ndfunc_t ndf = { iter_dfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3214
|
-
|
|
3215
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3216
|
-
}
|
|
3217
|
-
|
|
3218
|
-
static VALUE dfloat_ne(VALUE self, VALUE other) {
|
|
3219
|
-
|
|
3220
|
-
VALUE klass, v;
|
|
3221
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3222
|
-
if (klass == cT) {
|
|
3223
|
-
return dfloat_ne_self(self, other);
|
|
3224
|
-
} else {
|
|
3225
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3226
|
-
return rb_funcall(v, id_ne, 1, other);
|
|
3227
|
-
}
|
|
3228
|
-
}
|
|
3229
|
-
|
|
3230
|
-
static void iter_dfloat_nearly_eq(na_loop_t* const lp) {
|
|
3231
|
-
size_t i;
|
|
3232
|
-
char *p1, *p2;
|
|
3233
|
-
BIT_DIGIT* a3;
|
|
3234
|
-
size_t p3;
|
|
3235
|
-
ssize_t s1, s2, s3;
|
|
3236
|
-
dtype x, y;
|
|
3237
|
-
BIT_DIGIT b;
|
|
3238
|
-
INIT_COUNTER(lp, i);
|
|
3239
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3240
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3241
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3242
|
-
for (; i--;) {
|
|
3243
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3244
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3245
|
-
b = (m_nearly_eq(x, y)) ? 1 : 0;
|
|
3246
|
-
STORE_BIT(a3, p3, b);
|
|
3247
|
-
p3 += s3;
|
|
3248
|
-
}
|
|
3249
|
-
}
|
|
3250
|
-
|
|
3251
|
-
static VALUE dfloat_nearly_eq_self(VALUE self, VALUE other) {
|
|
3252
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3253
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3254
|
-
ndfunc_t ndf = { iter_dfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3255
|
-
|
|
3256
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3257
|
-
}
|
|
3258
|
-
|
|
3259
|
-
static VALUE dfloat_nearly_eq(VALUE self, VALUE other) {
|
|
3260
|
-
|
|
3261
|
-
VALUE klass, v;
|
|
3262
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3263
|
-
if (klass == cT) {
|
|
3264
|
-
return dfloat_nearly_eq_self(self, other);
|
|
3265
|
-
} else {
|
|
3266
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3267
|
-
return rb_funcall(v, id_nearly_eq, 1, other);
|
|
3268
|
-
}
|
|
3269
|
-
}
|
|
3270
|
-
|
|
3271
|
-
static void iter_dfloat_floor(na_loop_t* const lp) {
|
|
3272
|
-
size_t i, n;
|
|
3273
|
-
char *p1, *p2;
|
|
3274
|
-
ssize_t s1, s2;
|
|
3275
|
-
size_t *idx1, *idx2;
|
|
3276
|
-
dtype x;
|
|
3277
|
-
|
|
3278
|
-
INIT_COUNTER(lp, n);
|
|
3279
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3280
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3281
|
-
|
|
3282
|
-
if (idx1) {
|
|
3283
|
-
if (idx2) {
|
|
3284
|
-
for (i = 0; i < n; i++) {
|
|
3285
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3286
|
-
x = m_floor(x);
|
|
3287
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3288
|
-
}
|
|
3289
|
-
} else {
|
|
3290
|
-
for (i = 0; i < n; i++) {
|
|
3291
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3292
|
-
x = m_floor(x);
|
|
3293
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3294
|
-
}
|
|
3295
|
-
}
|
|
3296
|
-
} else {
|
|
3297
|
-
if (idx2) {
|
|
3298
|
-
for (i = 0; i < n; i++) {
|
|
3299
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3300
|
-
x = m_floor(x);
|
|
3301
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3302
|
-
}
|
|
3303
|
-
} else {
|
|
3304
|
-
//
|
|
3305
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3306
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3307
|
-
for (i = 0; i < n; i++) {
|
|
3308
|
-
((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
|
|
3309
|
-
}
|
|
3310
|
-
return;
|
|
3311
|
-
}
|
|
3312
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3313
|
-
//
|
|
3314
|
-
for (i = 0; i < n; i++) {
|
|
3315
|
-
*(dtype*)p2 = m_floor(*(dtype*)p1);
|
|
3316
|
-
p1 += s1;
|
|
3317
|
-
p2 += s2;
|
|
3318
|
-
}
|
|
3319
|
-
return;
|
|
3320
|
-
//
|
|
3321
|
-
}
|
|
3322
|
-
}
|
|
3323
|
-
for (i = 0; i < n; i++) {
|
|
3324
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3325
|
-
x = m_floor(x);
|
|
3326
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3327
|
-
}
|
|
3328
|
-
//
|
|
3329
|
-
}
|
|
3330
|
-
}
|
|
3331
|
-
}
|
|
3332
|
-
|
|
3333
|
-
static VALUE dfloat_floor(VALUE self) {
|
|
3334
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3335
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3336
|
-
ndfunc_t ndf = { iter_dfloat_floor, FULL_LOOP, 1, 1, ain, aout };
|
|
3337
|
-
|
|
3338
|
-
return na_ndloop(&ndf, 1, self);
|
|
3339
|
-
}
|
|
3340
|
-
|
|
3341
|
-
static void iter_dfloat_round(na_loop_t* const lp) {
|
|
3342
|
-
size_t i, n;
|
|
3343
|
-
char *p1, *p2;
|
|
3344
|
-
ssize_t s1, s2;
|
|
3345
|
-
size_t *idx1, *idx2;
|
|
3346
|
-
dtype x;
|
|
3347
|
-
|
|
3348
|
-
INIT_COUNTER(lp, n);
|
|
3349
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3350
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3351
|
-
|
|
3352
|
-
if (idx1) {
|
|
3353
|
-
if (idx2) {
|
|
3354
|
-
for (i = 0; i < n; i++) {
|
|
3355
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3356
|
-
x = m_round(x);
|
|
3357
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3358
|
-
}
|
|
3359
|
-
} else {
|
|
3360
|
-
for (i = 0; i < n; i++) {
|
|
3361
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3362
|
-
x = m_round(x);
|
|
3363
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3364
|
-
}
|
|
3365
|
-
}
|
|
3366
|
-
} else {
|
|
3367
|
-
if (idx2) {
|
|
3368
|
-
for (i = 0; i < n; i++) {
|
|
3369
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3370
|
-
x = m_round(x);
|
|
3371
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3372
|
-
}
|
|
3373
|
-
} else {
|
|
3374
|
-
//
|
|
3375
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3376
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3377
|
-
for (i = 0; i < n; i++) {
|
|
3378
|
-
((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
|
|
3379
|
-
}
|
|
3380
|
-
return;
|
|
3381
|
-
}
|
|
3382
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3383
|
-
//
|
|
3384
|
-
for (i = 0; i < n; i++) {
|
|
3385
|
-
*(dtype*)p2 = m_round(*(dtype*)p1);
|
|
3386
|
-
p1 += s1;
|
|
3387
|
-
p2 += s2;
|
|
3388
|
-
}
|
|
3389
|
-
return;
|
|
3390
|
-
//
|
|
3391
|
-
}
|
|
3392
|
-
}
|
|
3393
|
-
for (i = 0; i < n; i++) {
|
|
3394
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3395
|
-
x = m_round(x);
|
|
3396
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3397
|
-
}
|
|
3398
|
-
//
|
|
3399
|
-
}
|
|
3400
|
-
}
|
|
3401
|
-
}
|
|
3402
|
-
|
|
3403
|
-
static VALUE dfloat_round(VALUE self) {
|
|
3404
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3405
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3406
|
-
ndfunc_t ndf = { iter_dfloat_round, FULL_LOOP, 1, 1, ain, aout };
|
|
3407
|
-
|
|
3408
|
-
return na_ndloop(&ndf, 1, self);
|
|
3409
|
-
}
|
|
3410
|
-
|
|
3411
|
-
static void iter_dfloat_ceil(na_loop_t* const lp) {
|
|
3412
|
-
size_t i, n;
|
|
3413
|
-
char *p1, *p2;
|
|
3414
|
-
ssize_t s1, s2;
|
|
3415
|
-
size_t *idx1, *idx2;
|
|
3416
|
-
dtype x;
|
|
3417
|
-
|
|
3418
|
-
INIT_COUNTER(lp, n);
|
|
3419
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3420
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3421
|
-
|
|
3422
|
-
if (idx1) {
|
|
3423
|
-
if (idx2) {
|
|
3424
|
-
for (i = 0; i < n; i++) {
|
|
3425
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3426
|
-
x = m_ceil(x);
|
|
3427
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3428
|
-
}
|
|
3429
|
-
} else {
|
|
3430
|
-
for (i = 0; i < n; i++) {
|
|
3431
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3432
|
-
x = m_ceil(x);
|
|
3433
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3434
|
-
}
|
|
3435
|
-
}
|
|
3436
|
-
} else {
|
|
3437
|
-
if (idx2) {
|
|
3438
|
-
for (i = 0; i < n; i++) {
|
|
3439
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3440
|
-
x = m_ceil(x);
|
|
3441
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3442
|
-
}
|
|
3443
|
-
} else {
|
|
3444
|
-
//
|
|
3445
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3446
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3447
|
-
for (i = 0; i < n; i++) {
|
|
3448
|
-
((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
|
|
3449
|
-
}
|
|
3450
|
-
return;
|
|
3451
|
-
}
|
|
3452
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3453
|
-
//
|
|
3454
|
-
for (i = 0; i < n; i++) {
|
|
3455
|
-
*(dtype*)p2 = m_ceil(*(dtype*)p1);
|
|
3456
|
-
p1 += s1;
|
|
3457
|
-
p2 += s2;
|
|
3458
|
-
}
|
|
3459
|
-
return;
|
|
3460
|
-
//
|
|
3461
|
-
}
|
|
3462
|
-
}
|
|
3463
|
-
for (i = 0; i < n; i++) {
|
|
3464
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3465
|
-
x = m_ceil(x);
|
|
3466
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3467
|
-
}
|
|
3468
|
-
//
|
|
3469
|
-
}
|
|
3470
|
-
}
|
|
3471
|
-
}
|
|
3472
|
-
|
|
3473
|
-
static VALUE dfloat_ceil(VALUE self) {
|
|
3474
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3475
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3476
|
-
ndfunc_t ndf = { iter_dfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
|
|
3477
|
-
|
|
3478
|
-
return na_ndloop(&ndf, 1, self);
|
|
3479
|
-
}
|
|
3480
|
-
|
|
3481
|
-
static void iter_dfloat_trunc(na_loop_t* const lp) {
|
|
3482
|
-
size_t i, n;
|
|
3483
|
-
char *p1, *p2;
|
|
3484
|
-
ssize_t s1, s2;
|
|
3485
|
-
size_t *idx1, *idx2;
|
|
3486
|
-
dtype x;
|
|
3487
|
-
|
|
3488
|
-
INIT_COUNTER(lp, n);
|
|
3489
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3490
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3491
|
-
|
|
3492
|
-
if (idx1) {
|
|
3493
|
-
if (idx2) {
|
|
3494
|
-
for (i = 0; i < n; i++) {
|
|
3495
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3496
|
-
x = m_trunc(x);
|
|
3497
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3498
|
-
}
|
|
3499
|
-
} else {
|
|
3500
|
-
for (i = 0; i < n; i++) {
|
|
3501
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3502
|
-
x = m_trunc(x);
|
|
3503
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3504
|
-
}
|
|
3505
|
-
}
|
|
3506
|
-
} else {
|
|
3507
|
-
if (idx2) {
|
|
3508
|
-
for (i = 0; i < n; i++) {
|
|
3509
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3510
|
-
x = m_trunc(x);
|
|
3511
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3512
|
-
}
|
|
3513
|
-
} else {
|
|
3514
|
-
//
|
|
3515
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3516
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3517
|
-
for (i = 0; i < n; i++) {
|
|
3518
|
-
((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
|
|
3519
|
-
}
|
|
3520
|
-
return;
|
|
3521
|
-
}
|
|
3522
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3523
|
-
//
|
|
3524
|
-
for (i = 0; i < n; i++) {
|
|
3525
|
-
*(dtype*)p2 = m_trunc(*(dtype*)p1);
|
|
3526
|
-
p1 += s1;
|
|
3527
|
-
p2 += s2;
|
|
3528
|
-
}
|
|
3529
|
-
return;
|
|
3530
|
-
//
|
|
3531
|
-
}
|
|
3532
|
-
}
|
|
3533
|
-
for (i = 0; i < n; i++) {
|
|
3534
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3535
|
-
x = m_trunc(x);
|
|
3536
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3537
|
-
}
|
|
3538
|
-
//
|
|
3539
|
-
}
|
|
3540
|
-
}
|
|
3541
|
-
}
|
|
3542
|
-
|
|
3543
|
-
static VALUE dfloat_trunc(VALUE self) {
|
|
3544
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3545
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3546
|
-
ndfunc_t ndf = { iter_dfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
|
|
3547
|
-
|
|
3548
|
-
return na_ndloop(&ndf, 1, self);
|
|
3549
|
-
}
|
|
3550
|
-
|
|
3551
|
-
static void iter_dfloat_rint(na_loop_t* const lp) {
|
|
3552
|
-
size_t i, n;
|
|
3553
|
-
char *p1, *p2;
|
|
3554
|
-
ssize_t s1, s2;
|
|
3555
|
-
size_t *idx1, *idx2;
|
|
3556
|
-
dtype x;
|
|
3557
|
-
|
|
3558
|
-
INIT_COUNTER(lp, n);
|
|
3559
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3560
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3561
|
-
|
|
3562
|
-
if (idx1) {
|
|
3563
|
-
if (idx2) {
|
|
3564
|
-
for (i = 0; i < n; i++) {
|
|
3565
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3566
|
-
x = m_rint(x);
|
|
3567
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3568
|
-
}
|
|
3569
|
-
} else {
|
|
3570
|
-
for (i = 0; i < n; i++) {
|
|
3571
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3572
|
-
x = m_rint(x);
|
|
3573
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3574
|
-
}
|
|
3575
|
-
}
|
|
3576
|
-
} else {
|
|
3577
|
-
if (idx2) {
|
|
3578
|
-
for (i = 0; i < n; i++) {
|
|
3579
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3580
|
-
x = m_rint(x);
|
|
3581
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3582
|
-
}
|
|
3583
|
-
} else {
|
|
3584
|
-
//
|
|
3585
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3586
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3587
|
-
for (i = 0; i < n; i++) {
|
|
3588
|
-
((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
|
|
3589
|
-
}
|
|
3590
|
-
return;
|
|
3591
|
-
}
|
|
3592
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3593
|
-
//
|
|
3594
|
-
for (i = 0; i < n; i++) {
|
|
3595
|
-
*(dtype*)p2 = m_rint(*(dtype*)p1);
|
|
3596
|
-
p1 += s1;
|
|
3597
|
-
p2 += s2;
|
|
3598
|
-
}
|
|
3599
|
-
return;
|
|
3600
|
-
//
|
|
3601
|
-
}
|
|
3602
|
-
}
|
|
3603
|
-
for (i = 0; i < n; i++) {
|
|
3604
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3605
|
-
x = m_rint(x);
|
|
3606
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3607
|
-
}
|
|
3608
|
-
//
|
|
3609
|
-
}
|
|
3610
|
-
}
|
|
3611
|
-
}
|
|
3612
|
-
|
|
3613
|
-
static VALUE dfloat_rint(VALUE self) {
|
|
3614
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3615
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3616
|
-
ndfunc_t ndf = { iter_dfloat_rint, FULL_LOOP, 1, 1, ain, aout };
|
|
3617
|
-
|
|
3618
|
-
return na_ndloop(&ndf, 1, self);
|
|
3619
|
-
}
|
|
3620
|
-
|
|
3621
|
-
#define check_intdivzero(y) \
|
|
3622
|
-
{}
|
|
3623
|
-
|
|
3624
|
-
static void iter_dfloat_copysign(na_loop_t* const lp) {
|
|
3625
|
-
size_t i = 0;
|
|
3626
|
-
size_t n;
|
|
3627
|
-
char *p1, *p2, *p3;
|
|
3628
|
-
ssize_t s1, s2, s3;
|
|
3629
|
-
|
|
3630
|
-
INIT_COUNTER(lp, n);
|
|
3631
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3632
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3633
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
3634
|
-
|
|
3635
|
-
//
|
|
3636
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
3637
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
3638
|
-
|
|
3639
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
3640
|
-
if (p1 == p3) { // inplace case
|
|
3641
|
-
for (; i < n; i++) {
|
|
3642
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
3643
|
-
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
3644
|
-
}
|
|
3645
|
-
} else {
|
|
3646
|
-
for (; i < n; i++) {
|
|
3647
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
3648
|
-
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
3649
|
-
}
|
|
3650
|
-
}
|
|
3651
|
-
return;
|
|
3652
|
-
}
|
|
3653
|
-
|
|
3654
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
3655
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
3656
|
-
//
|
|
3657
|
-
|
|
3658
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
3659
|
-
check_intdivzero(*(dtype*)p2);
|
|
3660
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
3661
|
-
if (p1 == p3) { // inplace case
|
|
3662
|
-
for (; i < n; i++) {
|
|
3663
|
-
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
3664
|
-
}
|
|
3665
|
-
} else {
|
|
3666
|
-
for (; i < n; i++) {
|
|
3667
|
-
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
3668
|
-
}
|
|
3669
|
-
}
|
|
3670
|
-
} else {
|
|
3671
|
-
for (i = 0; i < n; i++) {
|
|
3672
|
-
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3673
|
-
p1 += s1;
|
|
3674
|
-
p3 += s3;
|
|
3675
|
-
}
|
|
3676
|
-
}
|
|
3677
|
-
} else {
|
|
3678
|
-
if (p1 == p3) { // inplace case
|
|
3679
|
-
for (i = 0; i < n; i++) {
|
|
3680
|
-
check_intdivzero(*(dtype*)p2);
|
|
3681
|
-
*(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3682
|
-
p1 += s1;
|
|
3683
|
-
p2 += s2;
|
|
3684
|
-
}
|
|
3685
|
-
} else {
|
|
3686
|
-
for (i = 0; i < n; i++) {
|
|
3687
|
-
check_intdivzero(*(dtype*)p2);
|
|
3688
|
-
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3689
|
-
p1 += s1;
|
|
3690
|
-
p2 += s2;
|
|
3691
|
-
p3 += s3;
|
|
3692
|
-
}
|
|
3693
|
-
}
|
|
3694
|
-
}
|
|
3695
|
-
|
|
3696
|
-
return;
|
|
3697
|
-
//
|
|
3698
|
-
}
|
|
3699
|
-
}
|
|
3700
|
-
for (i = 0; i < n; i++) {
|
|
3701
|
-
dtype x, y, z;
|
|
3702
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3703
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3704
|
-
check_intdivzero(y);
|
|
3705
|
-
z = m_copysign(x, y);
|
|
3706
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
3707
|
-
}
|
|
3708
|
-
//
|
|
3709
|
-
}
|
|
3710
|
-
#undef check_intdivzero
|
|
3711
|
-
|
|
3712
|
-
static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
|
|
3713
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3714
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3715
|
-
ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3716
|
-
|
|
3717
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3718
|
-
}
|
|
3719
|
-
|
|
3720
|
-
static VALUE dfloat_copysign(VALUE self, VALUE other) {
|
|
3721
|
-
|
|
3722
|
-
VALUE klass, v;
|
|
3723
|
-
|
|
3724
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3725
|
-
if (klass == cT) {
|
|
3726
|
-
return dfloat_copysign_self(self, other);
|
|
3727
|
-
} else {
|
|
3728
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3729
|
-
return rb_funcall(v, id_copysign, 1, other);
|
|
3730
|
-
}
|
|
3731
|
-
}
|
|
3732
|
-
|
|
3733
|
-
static void iter_dfloat_signbit(na_loop_t* const lp) {
|
|
3734
|
-
size_t i;
|
|
3735
|
-
char* p1;
|
|
3736
|
-
BIT_DIGIT* a2;
|
|
3737
|
-
size_t p2;
|
|
3738
|
-
ssize_t s1, s2;
|
|
3739
|
-
size_t* idx1;
|
|
3740
|
-
dtype x;
|
|
3741
|
-
BIT_DIGIT b;
|
|
3742
|
-
INIT_COUNTER(lp, i);
|
|
3743
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3744
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
3745
|
-
if (idx1) {
|
|
3746
|
-
for (; i--;) {
|
|
3747
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3748
|
-
b = (m_signbit(x)) ? 1 : 0;
|
|
3749
|
-
STORE_BIT(a2, p2, b);
|
|
3750
|
-
p2 += s2;
|
|
3751
|
-
}
|
|
3752
|
-
} else {
|
|
3753
|
-
for (; i--;) {
|
|
3754
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3755
|
-
b = (m_signbit(x)) ? 1 : 0;
|
|
3756
|
-
STORE_BIT(a2, p2, b);
|
|
3757
|
-
p2 += s2;
|
|
3758
|
-
}
|
|
3759
|
-
}
|
|
3760
|
-
}
|
|
3761
|
-
|
|
3762
|
-
static VALUE dfloat_signbit(VALUE self) {
|
|
3763
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3764
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3765
|
-
ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
|
|
3766
|
-
|
|
3767
|
-
return na_ndloop(&ndf, 1, self);
|
|
3768
|
-
}
|
|
3769
|
-
|
|
3770
|
-
static void iter_dfloat_modf(na_loop_t* const lp) {
|
|
3771
|
-
size_t i;
|
|
3772
|
-
char *p1, *p2, *p3;
|
|
3773
|
-
ssize_t s1, s2, s3;
|
|
3774
|
-
dtype x, y, z;
|
|
3775
|
-
INIT_COUNTER(lp, i);
|
|
3776
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3777
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3778
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
3779
|
-
for (; i--;) {
|
|
3780
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3781
|
-
m_modf(x, y, z);
|
|
3782
|
-
SET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3783
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
3784
|
-
}
|
|
3785
|
-
}
|
|
3786
|
-
|
|
3787
|
-
static VALUE dfloat_modf(VALUE self) {
|
|
3788
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3789
|
-
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
3790
|
-
ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
|
|
3791
|
-
|
|
3792
|
-
return na_ndloop(&ndf, 1, self);
|
|
3793
|
-
}
|
|
3794
|
-
|
|
3795
|
-
static void iter_dfloat_gt(na_loop_t* const lp) {
|
|
3796
|
-
size_t i;
|
|
3797
|
-
char *p1, *p2;
|
|
3798
|
-
BIT_DIGIT* a3;
|
|
3799
|
-
size_t p3;
|
|
3800
|
-
ssize_t s1, s2, s3;
|
|
3801
|
-
dtype x, y;
|
|
3802
|
-
BIT_DIGIT b;
|
|
3803
|
-
INIT_COUNTER(lp, i);
|
|
3804
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3805
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3806
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3807
|
-
for (; i--;) {
|
|
3808
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3809
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3810
|
-
b = (m_gt(x, y)) ? 1 : 0;
|
|
3811
|
-
STORE_BIT(a3, p3, b);
|
|
3812
|
-
p3 += s3;
|
|
3813
|
-
}
|
|
3814
|
-
}
|
|
3815
|
-
|
|
3816
|
-
static VALUE dfloat_gt_self(VALUE self, VALUE other) {
|
|
3817
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3818
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3819
|
-
ndfunc_t ndf = { iter_dfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3820
|
-
|
|
3821
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3822
|
-
}
|
|
3823
|
-
|
|
3824
|
-
static VALUE dfloat_gt(VALUE self, VALUE other) {
|
|
3825
|
-
|
|
3826
|
-
VALUE klass, v;
|
|
3827
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3828
|
-
if (klass == cT) {
|
|
3829
|
-
return dfloat_gt_self(self, other);
|
|
3830
|
-
} else {
|
|
3831
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3832
|
-
return rb_funcall(v, id_gt, 1, other);
|
|
3833
|
-
}
|
|
3834
|
-
}
|
|
3835
|
-
|
|
3836
|
-
static void iter_dfloat_ge(na_loop_t* const lp) {
|
|
3837
|
-
size_t i;
|
|
3838
|
-
char *p1, *p2;
|
|
3839
|
-
BIT_DIGIT* a3;
|
|
3840
|
-
size_t p3;
|
|
3841
|
-
ssize_t s1, s2, s3;
|
|
3842
|
-
dtype x, y;
|
|
3843
|
-
BIT_DIGIT b;
|
|
3844
|
-
INIT_COUNTER(lp, i);
|
|
3845
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3846
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3847
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3848
|
-
for (; i--;) {
|
|
3849
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3850
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3851
|
-
b = (m_ge(x, y)) ? 1 : 0;
|
|
3852
|
-
STORE_BIT(a3, p3, b);
|
|
3853
|
-
p3 += s3;
|
|
3854
|
-
}
|
|
3855
|
-
}
|
|
3856
|
-
|
|
3857
|
-
static VALUE dfloat_ge_self(VALUE self, VALUE other) {
|
|
3858
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3859
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3860
|
-
ndfunc_t ndf = { iter_dfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3861
|
-
|
|
3862
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3863
|
-
}
|
|
3864
|
-
|
|
3865
|
-
static VALUE dfloat_ge(VALUE self, VALUE other) {
|
|
3866
|
-
|
|
3867
|
-
VALUE klass, v;
|
|
3868
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3869
|
-
if (klass == cT) {
|
|
3870
|
-
return dfloat_ge_self(self, other);
|
|
3871
|
-
} else {
|
|
3872
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3873
|
-
return rb_funcall(v, id_ge, 1, other);
|
|
3874
|
-
}
|
|
3875
|
-
}
|
|
3876
|
-
|
|
3877
|
-
static void iter_dfloat_lt(na_loop_t* const lp) {
|
|
3878
|
-
size_t i;
|
|
3879
|
-
char *p1, *p2;
|
|
3880
|
-
BIT_DIGIT* a3;
|
|
3881
|
-
size_t p3;
|
|
3882
|
-
ssize_t s1, s2, s3;
|
|
3883
|
-
dtype x, y;
|
|
3884
|
-
BIT_DIGIT b;
|
|
3885
|
-
INIT_COUNTER(lp, i);
|
|
3886
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3887
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3888
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3889
|
-
for (; i--;) {
|
|
3890
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3891
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3892
|
-
b = (m_lt(x, y)) ? 1 : 0;
|
|
3893
|
-
STORE_BIT(a3, p3, b);
|
|
3894
|
-
p3 += s3;
|
|
3895
|
-
}
|
|
3896
|
-
}
|
|
3897
|
-
|
|
3898
|
-
static VALUE dfloat_lt_self(VALUE self, VALUE other) {
|
|
3899
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3900
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3901
|
-
ndfunc_t ndf = { iter_dfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3902
|
-
|
|
3903
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3904
|
-
}
|
|
3905
|
-
|
|
3906
|
-
static VALUE dfloat_lt(VALUE self, VALUE other) {
|
|
3907
|
-
|
|
3908
|
-
VALUE klass, v;
|
|
3909
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3910
|
-
if (klass == cT) {
|
|
3911
|
-
return dfloat_lt_self(self, other);
|
|
3912
|
-
} else {
|
|
3913
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3914
|
-
return rb_funcall(v, id_lt, 1, other);
|
|
3915
|
-
}
|
|
3916
|
-
}
|
|
3917
|
-
|
|
3918
|
-
static void iter_dfloat_le(na_loop_t* const lp) {
|
|
3919
|
-
size_t i;
|
|
3920
|
-
char *p1, *p2;
|
|
3921
|
-
BIT_DIGIT* a3;
|
|
3922
|
-
size_t p3;
|
|
3923
|
-
ssize_t s1, s2, s3;
|
|
3924
|
-
dtype x, y;
|
|
3925
|
-
BIT_DIGIT b;
|
|
3926
|
-
INIT_COUNTER(lp, i);
|
|
3927
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3928
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3929
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3930
|
-
for (; i--;) {
|
|
3931
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3932
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3933
|
-
b = (m_le(x, y)) ? 1 : 0;
|
|
3934
|
-
STORE_BIT(a3, p3, b);
|
|
3935
|
-
p3 += s3;
|
|
3936
|
-
}
|
|
3937
|
-
}
|
|
3938
|
-
|
|
3939
|
-
static VALUE dfloat_le_self(VALUE self, VALUE other) {
|
|
3940
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3941
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3942
|
-
ndfunc_t ndf = { iter_dfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3943
|
-
|
|
3944
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3945
|
-
}
|
|
3946
|
-
|
|
3947
|
-
static VALUE dfloat_le(VALUE self, VALUE other) {
|
|
3948
|
-
|
|
3949
|
-
VALUE klass, v;
|
|
3950
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3951
|
-
if (klass == cT) {
|
|
3952
|
-
return dfloat_le_self(self, other);
|
|
3953
|
-
} else {
|
|
3954
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3955
|
-
return rb_funcall(v, id_le, 1, other);
|
|
3956
|
-
}
|
|
3957
|
-
}
|
|
3958
|
-
|
|
3959
|
-
static void iter_dfloat_isnan(na_loop_t* const lp) {
|
|
3960
|
-
size_t i;
|
|
3961
|
-
char* p1;
|
|
3962
|
-
BIT_DIGIT* a2;
|
|
3963
|
-
size_t p2;
|
|
3964
|
-
ssize_t s1, s2;
|
|
3965
|
-
size_t* idx1;
|
|
3966
|
-
dtype x;
|
|
3967
|
-
BIT_DIGIT b;
|
|
3968
|
-
INIT_COUNTER(lp, i);
|
|
3969
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3970
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
3971
|
-
if (idx1) {
|
|
3972
|
-
for (; i--;) {
|
|
3973
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3974
|
-
b = (m_isnan(x)) ? 1 : 0;
|
|
3975
|
-
STORE_BIT(a2, p2, b);
|
|
3976
|
-
p2 += s2;
|
|
3977
|
-
}
|
|
3978
|
-
} else {
|
|
3979
|
-
for (; i--;) {
|
|
3980
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3981
|
-
b = (m_isnan(x)) ? 1 : 0;
|
|
3982
|
-
STORE_BIT(a2, p2, b);
|
|
3983
|
-
p2 += s2;
|
|
3984
|
-
}
|
|
3985
|
-
}
|
|
3986
|
-
}
|
|
3987
|
-
|
|
3988
|
-
static VALUE dfloat_isnan(VALUE self) {
|
|
3989
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3990
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3991
|
-
ndfunc_t ndf = { iter_dfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
|
|
3992
|
-
|
|
3993
|
-
return na_ndloop(&ndf, 1, self);
|
|
3994
|
-
}
|
|
3995
|
-
|
|
3996
|
-
static void iter_dfloat_isinf(na_loop_t* const lp) {
|
|
3997
|
-
size_t i;
|
|
3998
|
-
char* p1;
|
|
3999
|
-
BIT_DIGIT* a2;
|
|
4000
|
-
size_t p2;
|
|
4001
|
-
ssize_t s1, s2;
|
|
4002
|
-
size_t* idx1;
|
|
4003
|
-
dtype x;
|
|
4004
|
-
BIT_DIGIT b;
|
|
4005
|
-
INIT_COUNTER(lp, i);
|
|
4006
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4007
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
4008
|
-
if (idx1) {
|
|
4009
|
-
for (; i--;) {
|
|
4010
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
4011
|
-
b = (m_isinf(x)) ? 1 : 0;
|
|
4012
|
-
STORE_BIT(a2, p2, b);
|
|
4013
|
-
p2 += s2;
|
|
4014
|
-
}
|
|
4015
|
-
} else {
|
|
4016
|
-
for (; i--;) {
|
|
4017
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4018
|
-
b = (m_isinf(x)) ? 1 : 0;
|
|
4019
|
-
STORE_BIT(a2, p2, b);
|
|
4020
|
-
p2 += s2;
|
|
4021
|
-
}
|
|
4022
|
-
}
|
|
4023
|
-
}
|
|
4024
|
-
|
|
4025
|
-
static VALUE dfloat_isinf(VALUE self) {
|
|
4026
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4027
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
4028
|
-
ndfunc_t ndf = { iter_dfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
|
|
4029
|
-
|
|
4030
|
-
return na_ndloop(&ndf, 1, self);
|
|
4031
|
-
}
|
|
4032
|
-
|
|
4033
|
-
static void iter_dfloat_isposinf(na_loop_t* const lp) {
|
|
4034
|
-
size_t i;
|
|
4035
|
-
char* p1;
|
|
4036
|
-
BIT_DIGIT* a2;
|
|
4037
|
-
size_t p2;
|
|
4038
|
-
ssize_t s1, s2;
|
|
4039
|
-
size_t* idx1;
|
|
4040
|
-
dtype x;
|
|
4041
|
-
BIT_DIGIT b;
|
|
4042
|
-
INIT_COUNTER(lp, i);
|
|
4043
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4044
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
4045
|
-
if (idx1) {
|
|
4046
|
-
for (; i--;) {
|
|
4047
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
4048
|
-
b = (m_isposinf(x)) ? 1 : 0;
|
|
4049
|
-
STORE_BIT(a2, p2, b);
|
|
4050
|
-
p2 += s2;
|
|
4051
|
-
}
|
|
4052
|
-
} else {
|
|
4053
|
-
for (; i--;) {
|
|
4054
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4055
|
-
b = (m_isposinf(x)) ? 1 : 0;
|
|
4056
|
-
STORE_BIT(a2, p2, b);
|
|
4057
|
-
p2 += s2;
|
|
4058
|
-
}
|
|
4059
|
-
}
|
|
4060
|
-
}
|
|
4061
|
-
|
|
4062
|
-
static VALUE dfloat_isposinf(VALUE self) {
|
|
4063
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4064
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
4065
|
-
ndfunc_t ndf = { iter_dfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
|
|
4066
|
-
|
|
4067
|
-
return na_ndloop(&ndf, 1, self);
|
|
4068
|
-
}
|
|
4069
|
-
|
|
4070
|
-
static void iter_dfloat_isneginf(na_loop_t* const lp) {
|
|
4071
|
-
size_t i;
|
|
4072
|
-
char* p1;
|
|
4073
|
-
BIT_DIGIT* a2;
|
|
4074
|
-
size_t p2;
|
|
4075
|
-
ssize_t s1, s2;
|
|
4076
|
-
size_t* idx1;
|
|
4077
|
-
dtype x;
|
|
4078
|
-
BIT_DIGIT b;
|
|
4079
|
-
INIT_COUNTER(lp, i);
|
|
4080
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4081
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
4082
|
-
if (idx1) {
|
|
4083
|
-
for (; i--;) {
|
|
4084
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
4085
|
-
b = (m_isneginf(x)) ? 1 : 0;
|
|
4086
|
-
STORE_BIT(a2, p2, b);
|
|
4087
|
-
p2 += s2;
|
|
4088
|
-
}
|
|
4089
|
-
} else {
|
|
4090
|
-
for (; i--;) {
|
|
4091
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4092
|
-
b = (m_isneginf(x)) ? 1 : 0;
|
|
4093
|
-
STORE_BIT(a2, p2, b);
|
|
4094
|
-
p2 += s2;
|
|
4095
|
-
}
|
|
4096
|
-
}
|
|
4097
|
-
}
|
|
4098
|
-
|
|
4099
|
-
static VALUE dfloat_isneginf(VALUE self) {
|
|
4100
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4101
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
4102
|
-
ndfunc_t ndf = { iter_dfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
|
|
4103
|
-
|
|
4104
|
-
return na_ndloop(&ndf, 1, self);
|
|
4105
|
-
}
|
|
4106
|
-
|
|
4107
|
-
static void iter_dfloat_isfinite(na_loop_t* const lp) {
|
|
4108
|
-
size_t i;
|
|
4109
|
-
char* p1;
|
|
4110
|
-
BIT_DIGIT* a2;
|
|
4111
|
-
size_t p2;
|
|
4112
|
-
ssize_t s1, s2;
|
|
4113
|
-
size_t* idx1;
|
|
4114
|
-
dtype x;
|
|
4115
|
-
BIT_DIGIT b;
|
|
4116
|
-
INIT_COUNTER(lp, i);
|
|
4117
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4118
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
4119
|
-
if (idx1) {
|
|
4120
|
-
for (; i--;) {
|
|
4121
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
4122
|
-
b = (m_isfinite(x)) ? 1 : 0;
|
|
4123
|
-
STORE_BIT(a2, p2, b);
|
|
4124
|
-
p2 += s2;
|
|
4125
|
-
}
|
|
4126
|
-
} else {
|
|
4127
|
-
for (; i--;) {
|
|
4128
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4129
|
-
b = (m_isfinite(x)) ? 1 : 0;
|
|
4130
|
-
STORE_BIT(a2, p2, b);
|
|
4131
|
-
p2 += s2;
|
|
4132
|
-
}
|
|
4133
|
-
}
|
|
4134
|
-
}
|
|
4135
|
-
|
|
4136
|
-
static VALUE dfloat_isfinite(VALUE self) {
|
|
4137
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4138
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
4139
|
-
ndfunc_t ndf = { iter_dfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
|
|
4140
|
-
|
|
4141
|
-
return na_ndloop(&ndf, 1, self);
|
|
4142
|
-
}
|
|
4143
|
-
|
|
4144
|
-
static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
|
|
4145
|
-
size_t n;
|
|
4146
|
-
char *p1, *p2;
|
|
4147
|
-
ssize_t s1;
|
|
1811
|
+
static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
|
|
1812
|
+
size_t n;
|
|
1813
|
+
char *p1, *p2;
|
|
1814
|
+
ssize_t s1;
|
|
4148
1815
|
|
|
4149
1816
|
INIT_COUNTER(lp, n);
|
|
4150
1817
|
INIT_PTR(lp, 0, p1, s1);
|
|
@@ -4177,79 +1844,6 @@ static VALUE dfloat_kahan_sum(int argc, VALUE* argv, VALUE self) {
|
|
|
4177
1844
|
return dfloat_extract(v);
|
|
4178
1845
|
}
|
|
4179
1846
|
|
|
4180
|
-
typedef struct {
|
|
4181
|
-
dtype mu;
|
|
4182
|
-
rtype sigma;
|
|
4183
|
-
} randn_opt_t;
|
|
4184
|
-
|
|
4185
|
-
static void iter_dfloat_rand_norm(na_loop_t* const lp) {
|
|
4186
|
-
size_t i;
|
|
4187
|
-
char* p1;
|
|
4188
|
-
ssize_t s1;
|
|
4189
|
-
size_t* idx1;
|
|
4190
|
-
|
|
4191
|
-
dtype *a0, *a1;
|
|
4192
|
-
|
|
4193
|
-
dtype mu;
|
|
4194
|
-
rtype sigma;
|
|
4195
|
-
randn_opt_t* g;
|
|
4196
|
-
|
|
4197
|
-
INIT_COUNTER(lp, i);
|
|
4198
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4199
|
-
g = (randn_opt_t*)(lp->opt_ptr);
|
|
4200
|
-
mu = g->mu;
|
|
4201
|
-
sigma = g->sigma;
|
|
4202
|
-
|
|
4203
|
-
if (idx1) {
|
|
4204
|
-
|
|
4205
|
-
for (; i > 1; i -= 2) {
|
|
4206
|
-
a0 = (dtype*)(p1 + *idx1);
|
|
4207
|
-
a1 = (dtype*)(p1 + *(idx1 + 1));
|
|
4208
|
-
m_rand_norm(mu, sigma, a0, a1);
|
|
4209
|
-
idx1 += 2;
|
|
4210
|
-
}
|
|
4211
|
-
if (i > 0) {
|
|
4212
|
-
a0 = (dtype*)(p1 + *idx1);
|
|
4213
|
-
m_rand_norm(mu, sigma, a0, 0);
|
|
4214
|
-
}
|
|
4215
|
-
|
|
4216
|
-
} else {
|
|
4217
|
-
|
|
4218
|
-
for (; i > 1; i -= 2) {
|
|
4219
|
-
a0 = (dtype*)(p1);
|
|
4220
|
-
a1 = (dtype*)(p1 + s1);
|
|
4221
|
-
m_rand_norm(mu, sigma, a0, a1);
|
|
4222
|
-
p1 += s1 * 2;
|
|
4223
|
-
}
|
|
4224
|
-
if (i > 0) {
|
|
4225
|
-
a0 = (dtype*)(p1);
|
|
4226
|
-
m_rand_norm(mu, sigma, a0, 0);
|
|
4227
|
-
}
|
|
4228
|
-
}
|
|
4229
|
-
}
|
|
4230
|
-
|
|
4231
|
-
static VALUE dfloat_rand_norm(int argc, VALUE* args, VALUE self) {
|
|
4232
|
-
int n;
|
|
4233
|
-
randn_opt_t g;
|
|
4234
|
-
VALUE v1 = Qnil, v2 = Qnil;
|
|
4235
|
-
ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
|
|
4236
|
-
ndfunc_t ndf = { iter_dfloat_rand_norm, FULL_LOOP, 1, 0, ain, 0 };
|
|
4237
|
-
|
|
4238
|
-
n = rb_scan_args(argc, args, "02", &v1, &v2);
|
|
4239
|
-
if (n == 0) {
|
|
4240
|
-
g.mu = m_zero;
|
|
4241
|
-
} else {
|
|
4242
|
-
g.mu = m_num_to_data(v1);
|
|
4243
|
-
}
|
|
4244
|
-
if (n == 2) {
|
|
4245
|
-
g.sigma = NUM2DBL(v2);
|
|
4246
|
-
} else {
|
|
4247
|
-
g.sigma = 1;
|
|
4248
|
-
}
|
|
4249
|
-
na_ndloop3(&ndf, &g, 1, self);
|
|
4250
|
-
return self;
|
|
4251
|
-
}
|
|
4252
|
-
|
|
4253
1847
|
static void iter_dfloat_poly(na_loop_t* const lp) {
|
|
4254
1848
|
size_t i;
|
|
4255
1849
|
dtype x, y, a;
|