numo-narray-alt 0.9.11 → 0.9.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +7 -0
- data/ext/numo/narray/numo/narray.h +2 -2
- data/ext/numo/narray/numo/types/robj_macro.h +1 -1
- data/ext/numo/narray/src/mh/bincount.h +233 -0
- data/ext/numo/narray/src/mh/bit/and.h +225 -0
- data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
- data/ext/numo/narray/src/mh/bit/not.h +173 -0
- data/ext/numo/narray/src/mh/bit/or.h +225 -0
- data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
- data/ext/numo/narray/src/mh/bit/xor.h +225 -0
- data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
- data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
- data/ext/numo/narray/src/mh/comp/eq.h +26 -0
- data/ext/numo/narray/src/mh/comp/ge.h +26 -0
- data/ext/numo/narray/src/mh/comp/gt.h +26 -0
- data/ext/numo/narray/src/mh/comp/le.h +26 -0
- data/ext/numo/narray/src/mh/comp/lt.h +26 -0
- data/ext/numo/narray/src/mh/comp/ne.h +26 -0
- data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
- data/ext/numo/narray/src/mh/divmod.h +142 -0
- data/ext/numo/narray/src/mh/eye.h +1 -1
- data/ext/numo/narray/src/mh/fill.h +94 -0
- data/ext/numo/narray/src/mh/format.h +108 -0
- data/ext/numo/narray/src/mh/format_to_a.h +89 -0
- data/ext/numo/narray/src/mh/inspect.h +33 -0
- data/ext/numo/narray/src/mh/isfinite.h +42 -0
- data/ext/numo/narray/src/mh/isinf.h +42 -0
- data/ext/numo/narray/src/mh/isnan.h +42 -0
- data/ext/numo/narray/src/mh/isneginf.h +42 -0
- data/ext/numo/narray/src/mh/isposinf.h +42 -0
- data/ext/numo/narray/src/mh/math/acos.h +2 -2
- data/ext/numo/narray/src/mh/math/acosh.h +2 -2
- data/ext/numo/narray/src/mh/math/asin.h +2 -2
- data/ext/numo/narray/src/mh/math/asinh.h +2 -2
- data/ext/numo/narray/src/mh/math/atan.h +2 -2
- data/ext/numo/narray/src/mh/math/atan2.h +3 -3
- data/ext/numo/narray/src/mh/math/atanh.h +2 -2
- data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
- data/ext/numo/narray/src/mh/math/cos.h +2 -2
- data/ext/numo/narray/src/mh/math/cosh.h +2 -2
- data/ext/numo/narray/src/mh/math/erf.h +2 -2
- data/ext/numo/narray/src/mh/math/erfc.h +2 -2
- data/ext/numo/narray/src/mh/math/exp.h +2 -2
- data/ext/numo/narray/src/mh/math/exp10.h +2 -2
- data/ext/numo/narray/src/mh/math/exp2.h +2 -2
- data/ext/numo/narray/src/mh/math/expm1.h +2 -2
- data/ext/numo/narray/src/mh/math/frexp.h +3 -3
- data/ext/numo/narray/src/mh/math/hypot.h +3 -3
- data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
- data/ext/numo/narray/src/mh/math/log.h +2 -2
- data/ext/numo/narray/src/mh/math/log10.h +2 -2
- data/ext/numo/narray/src/mh/math/log1p.h +2 -2
- data/ext/numo/narray/src/mh/math/log2.h +2 -2
- data/ext/numo/narray/src/mh/math/sin.h +2 -2
- data/ext/numo/narray/src/mh/math/sinc.h +2 -2
- data/ext/numo/narray/src/mh/math/sinh.h +2 -2
- data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
- data/ext/numo/narray/src/mh/math/tan.h +2 -2
- data/ext/numo/narray/src/mh/math/tanh.h +2 -2
- data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
- data/ext/numo/narray/src/mh/op/add.h +78 -0
- data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
- data/ext/numo/narray/src/mh/op/div.h +118 -0
- data/ext/numo/narray/src/mh/op/mod.h +108 -0
- data/ext/numo/narray/src/mh/op/mul.h +78 -0
- data/ext/numo/narray/src/mh/op/sub.h +78 -0
- data/ext/numo/narray/src/mh/rand.h +2 -2
- data/ext/numo/narray/src/mh/round/ceil.h +11 -0
- data/ext/numo/narray/src/mh/round/floor.h +11 -0
- data/ext/numo/narray/src/mh/round/rint.h +9 -0
- data/ext/numo/narray/src/mh/round/round.h +11 -0
- data/ext/numo/narray/src/mh/round/trunc.h +11 -0
- data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
- data/ext/numo/narray/src/mh/to_a.h +78 -0
- data/ext/numo/narray/src/t_bit.c +45 -234
- data/ext/numo/narray/src/t_dcomplex.c +584 -1809
- data/ext/numo/narray/src/t_dfloat.c +429 -2432
- data/ext/numo/narray/src/t_int16.c +481 -2283
- data/ext/numo/narray/src/t_int32.c +481 -2283
- data/ext/numo/narray/src/t_int64.c +481 -2283
- data/ext/numo/narray/src/t_int8.c +408 -1873
- data/ext/numo/narray/src/t_robject.c +448 -1977
- data/ext/numo/narray/src/t_scomplex.c +584 -1809
- data/ext/numo/narray/src/t_sfloat.c +429 -2434
- data/ext/numo/narray/src/t_uint16.c +480 -2278
- data/ext/numo/narray/src/t_uint32.c +480 -2278
- data/ext/numo/narray/src/t_uint64.c +480 -2278
- data/ext/numo/narray/src/t_uint8.c +407 -1868
- metadata +41 -2
|
@@ -42,7 +42,36 @@ static ID id_to_a;
|
|
|
42
42
|
VALUE cT;
|
|
43
43
|
extern VALUE cRT;
|
|
44
44
|
|
|
45
|
+
#include "mh/coerce_cast.h"
|
|
46
|
+
#include "mh/to_a.h"
|
|
47
|
+
#include "mh/fill.h"
|
|
48
|
+
#include "mh/format.h"
|
|
49
|
+
#include "mh/format_to_a.h"
|
|
50
|
+
#include "mh/inspect.h"
|
|
51
|
+
#include "mh/op/add.h"
|
|
52
|
+
#include "mh/op/sub.h"
|
|
53
|
+
#include "mh/op/mul.h"
|
|
54
|
+
#include "mh/op/div.h"
|
|
55
|
+
#include "mh/op/mod.h"
|
|
56
|
+
#include "mh/divmod.h"
|
|
57
|
+
#include "mh/round/floor.h"
|
|
58
|
+
#include "mh/round/round.h"
|
|
59
|
+
#include "mh/round/ceil.h"
|
|
60
|
+
#include "mh/round/trunc.h"
|
|
61
|
+
#include "mh/round/rint.h"
|
|
62
|
+
#include "mh/comp/eq.h"
|
|
63
|
+
#include "mh/comp/ne.h"
|
|
64
|
+
#include "mh/comp/nearly_eq.h"
|
|
65
|
+
#include "mh/comp/gt.h"
|
|
66
|
+
#include "mh/comp/ge.h"
|
|
67
|
+
#include "mh/comp/lt.h"
|
|
68
|
+
#include "mh/comp/le.h"
|
|
45
69
|
#include "mh/clip.h"
|
|
70
|
+
#include "mh/isnan.h"
|
|
71
|
+
#include "mh/isinf.h"
|
|
72
|
+
#include "mh/isposinf.h"
|
|
73
|
+
#include "mh/isneginf.h"
|
|
74
|
+
#include "mh/isfinite.h"
|
|
46
75
|
#include "mh/sum.h"
|
|
47
76
|
#include "mh/prod.h"
|
|
48
77
|
#include "mh/mean.h"
|
|
@@ -98,7 +127,43 @@ extern VALUE cRT;
|
|
|
98
127
|
|
|
99
128
|
typedef double dfloat; // Type aliases for shorter notation
|
|
100
129
|
// following the codebase naming convention.
|
|
130
|
+
DEF_NARRAY_COERCE_CAST_METHOD_FUNC(dfloat)
|
|
131
|
+
DEF_NARRAY_TO_A_METHOD_FUNC(dfloat)
|
|
132
|
+
DEF_NARRAY_FILL_METHOD_FUNC(dfloat)
|
|
133
|
+
DEF_NARRAY_FORMAT_METHOD_FUNC(dfloat)
|
|
134
|
+
DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(dfloat)
|
|
135
|
+
DEF_NARRAY_INSPECT_METHOD_FUNC(dfloat)
|
|
136
|
+
#ifdef __SSE2__
|
|
137
|
+
DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
|
|
138
|
+
DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
|
|
139
|
+
DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
|
|
140
|
+
DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
|
|
141
|
+
#else
|
|
142
|
+
DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
143
|
+
DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
144
|
+
DEF_NARRAY_MUL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
145
|
+
DEF_NARRAY_FLT_DIV_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
146
|
+
#endif
|
|
147
|
+
DEF_NARRAY_FLT_MOD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
148
|
+
DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
149
|
+
DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
150
|
+
DEF_NARRAY_FLT_ROUND_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
151
|
+
DEF_NARRAY_FLT_CEIL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
152
|
+
DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
153
|
+
DEF_NARRAY_FLT_RINT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
154
|
+
DEF_NARRAY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
155
|
+
DEF_NARRAY_NE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
156
|
+
DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
157
|
+
DEF_NARRAY_GT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
158
|
+
DEF_NARRAY_GE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
159
|
+
DEF_NARRAY_LT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
160
|
+
DEF_NARRAY_LE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
101
161
|
DEF_NARRAY_CLIP_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
162
|
+
DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
163
|
+
DEF_NARRAY_FLT_ISINF_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
164
|
+
DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
165
|
+
DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
166
|
+
DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
102
167
|
DEF_NARRAY_FLT_SUM_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
103
168
|
DEF_NARRAY_FLT_PROD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
104
169
|
DEF_NARRAY_FLT_MEAN_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
|
|
@@ -1275,171 +1340,6 @@ static VALUE dfloat_aset(int argc, VALUE* argv, VALUE self) {
|
|
|
1275
1340
|
return argv[argc];
|
|
1276
1341
|
}
|
|
1277
1342
|
|
|
1278
|
-
static VALUE dfloat_coerce_cast(VALUE self, VALUE type) {
|
|
1279
|
-
return Qnil;
|
|
1280
|
-
}
|
|
1281
|
-
|
|
1282
|
-
static void iter_dfloat_to_a(na_loop_t* const lp) {
|
|
1283
|
-
size_t i, s1;
|
|
1284
|
-
char* p1;
|
|
1285
|
-
size_t* idx1;
|
|
1286
|
-
dtype x;
|
|
1287
|
-
volatile VALUE a, y;
|
|
1288
|
-
|
|
1289
|
-
INIT_COUNTER(lp, i);
|
|
1290
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1291
|
-
a = rb_ary_new2(i);
|
|
1292
|
-
rb_ary_push(lp->args[1].value, a);
|
|
1293
|
-
if (idx1) {
|
|
1294
|
-
for (; i--;) {
|
|
1295
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1296
|
-
y = m_data_to_num(x);
|
|
1297
|
-
rb_ary_push(a, y);
|
|
1298
|
-
}
|
|
1299
|
-
} else {
|
|
1300
|
-
for (; i--;) {
|
|
1301
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1302
|
-
y = m_data_to_num(x);
|
|
1303
|
-
rb_ary_push(a, y);
|
|
1304
|
-
}
|
|
1305
|
-
}
|
|
1306
|
-
}
|
|
1307
|
-
|
|
1308
|
-
static VALUE dfloat_to_a(VALUE self) {
|
|
1309
|
-
ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
|
|
1310
|
-
ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
|
|
1311
|
-
ndfunc_t ndf = { iter_dfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
|
|
1312
|
-
return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
static void iter_dfloat_fill(na_loop_t* const lp) {
|
|
1316
|
-
size_t i;
|
|
1317
|
-
char* p1;
|
|
1318
|
-
ssize_t s1;
|
|
1319
|
-
size_t* idx1;
|
|
1320
|
-
VALUE x = lp->option;
|
|
1321
|
-
dtype y;
|
|
1322
|
-
INIT_COUNTER(lp, i);
|
|
1323
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1324
|
-
y = m_num_to_data(x);
|
|
1325
|
-
if (idx1) {
|
|
1326
|
-
for (; i--;) {
|
|
1327
|
-
SET_DATA_INDEX(p1, idx1, dtype, y);
|
|
1328
|
-
}
|
|
1329
|
-
} else {
|
|
1330
|
-
for (; i--;) {
|
|
1331
|
-
SET_DATA_STRIDE(p1, s1, dtype, y);
|
|
1332
|
-
}
|
|
1333
|
-
}
|
|
1334
|
-
}
|
|
1335
|
-
|
|
1336
|
-
static VALUE dfloat_fill(VALUE self, VALUE val) {
|
|
1337
|
-
ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
|
|
1338
|
-
ndfunc_t ndf = { iter_dfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
|
|
1339
|
-
|
|
1340
|
-
na_ndloop(&ndf, 2, self, val);
|
|
1341
|
-
return self;
|
|
1342
|
-
}
|
|
1343
|
-
|
|
1344
|
-
static VALUE format_dfloat(VALUE fmt, dtype* x) {
|
|
1345
|
-
// fix-me
|
|
1346
|
-
char s[48];
|
|
1347
|
-
int n;
|
|
1348
|
-
|
|
1349
|
-
if (NIL_P(fmt)) {
|
|
1350
|
-
n = m_sprintf(s, *x);
|
|
1351
|
-
return rb_str_new(s, n);
|
|
1352
|
-
}
|
|
1353
|
-
return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
static void iter_dfloat_format(na_loop_t* const lp) {
|
|
1357
|
-
size_t i;
|
|
1358
|
-
char *p1, *p2;
|
|
1359
|
-
ssize_t s1, s2;
|
|
1360
|
-
size_t* idx1;
|
|
1361
|
-
dtype* x;
|
|
1362
|
-
VALUE y;
|
|
1363
|
-
VALUE fmt = lp->option;
|
|
1364
|
-
INIT_COUNTER(lp, i);
|
|
1365
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1366
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
1367
|
-
if (idx1) {
|
|
1368
|
-
for (; i--;) {
|
|
1369
|
-
x = (dtype*)(p1 + *idx1);
|
|
1370
|
-
idx1++;
|
|
1371
|
-
y = format_dfloat(fmt, x);
|
|
1372
|
-
SET_DATA_STRIDE(p2, s2, VALUE, y);
|
|
1373
|
-
}
|
|
1374
|
-
} else {
|
|
1375
|
-
for (; i--;) {
|
|
1376
|
-
x = (dtype*)p1;
|
|
1377
|
-
p1 += s1;
|
|
1378
|
-
y = format_dfloat(fmt, x);
|
|
1379
|
-
SET_DATA_STRIDE(p2, s2, VALUE, y);
|
|
1380
|
-
}
|
|
1381
|
-
}
|
|
1382
|
-
}
|
|
1383
|
-
|
|
1384
|
-
static VALUE dfloat_format(int argc, VALUE* argv, VALUE self) {
|
|
1385
|
-
VALUE fmt = Qnil;
|
|
1386
|
-
|
|
1387
|
-
ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
|
|
1388
|
-
ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
|
|
1389
|
-
ndfunc_t ndf = { iter_dfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
|
|
1390
|
-
|
|
1391
|
-
rb_scan_args(argc, argv, "01", &fmt);
|
|
1392
|
-
return na_ndloop(&ndf, 2, self, fmt);
|
|
1393
|
-
}
|
|
1394
|
-
|
|
1395
|
-
static void iter_dfloat_format_to_a(na_loop_t* const lp) {
|
|
1396
|
-
size_t i;
|
|
1397
|
-
char* p1;
|
|
1398
|
-
ssize_t s1;
|
|
1399
|
-
size_t* idx1;
|
|
1400
|
-
dtype* x;
|
|
1401
|
-
VALUE y;
|
|
1402
|
-
volatile VALUE a;
|
|
1403
|
-
VALUE fmt = lp->option;
|
|
1404
|
-
INIT_COUNTER(lp, i);
|
|
1405
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1406
|
-
a = rb_ary_new2(i);
|
|
1407
|
-
rb_ary_push(lp->args[1].value, a);
|
|
1408
|
-
if (idx1) {
|
|
1409
|
-
for (; i--;) {
|
|
1410
|
-
x = (dtype*)(p1 + *idx1);
|
|
1411
|
-
idx1++;
|
|
1412
|
-
y = format_dfloat(fmt, x);
|
|
1413
|
-
rb_ary_push(a, y);
|
|
1414
|
-
}
|
|
1415
|
-
} else {
|
|
1416
|
-
for (; i--;) {
|
|
1417
|
-
x = (dtype*)p1;
|
|
1418
|
-
p1 += s1;
|
|
1419
|
-
y = format_dfloat(fmt, x);
|
|
1420
|
-
rb_ary_push(a, y);
|
|
1421
|
-
}
|
|
1422
|
-
}
|
|
1423
|
-
}
|
|
1424
|
-
|
|
1425
|
-
static VALUE dfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
|
|
1426
|
-
VALUE fmt = Qnil;
|
|
1427
|
-
ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
|
|
1428
|
-
ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
|
|
1429
|
-
ndfunc_t ndf = { iter_dfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
|
|
1430
|
-
|
|
1431
|
-
rb_scan_args(argc, argv, "01", &fmt);
|
|
1432
|
-
return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
|
|
1433
|
-
}
|
|
1434
|
-
|
|
1435
|
-
static VALUE iter_dfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
|
|
1436
|
-
return format_dfloat(fmt, (dtype*)(ptr + pos));
|
|
1437
|
-
}
|
|
1438
|
-
|
|
1439
|
-
static VALUE dfloat_inspect(VALUE ary) {
|
|
1440
|
-
return na_ndloop_inspect(ary, iter_dfloat_inspect, Qnil);
|
|
1441
|
-
}
|
|
1442
|
-
|
|
1443
1343
|
static void iter_dfloat_each(na_loop_t* const lp) {
|
|
1444
1344
|
size_t i, s1;
|
|
1445
1345
|
char* p1;
|
|
@@ -1722,2352 +1622,461 @@ static VALUE dfloat_abs(VALUE self) {
|
|
|
1722
1622
|
return na_ndloop(&ndf, 1, self);
|
|
1723
1623
|
}
|
|
1724
1624
|
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
static void iter_dfloat_add(na_loop_t* const lp) {
|
|
1729
|
-
size_t i = 0;
|
|
1730
|
-
size_t n;
|
|
1625
|
+
static void iter_dfloat_pow(na_loop_t* const lp) {
|
|
1626
|
+
size_t i;
|
|
1731
1627
|
char *p1, *p2, *p3;
|
|
1732
1628
|
ssize_t s1, s2, s3;
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
size_t cnt;
|
|
1736
|
-
size_t cnt_simd_loop = -1;
|
|
1737
|
-
|
|
1738
|
-
__m128d a;
|
|
1739
|
-
__m128d b;
|
|
1740
|
-
|
|
1741
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
1742
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
1743
|
-
#endif
|
|
1744
|
-
INIT_COUNTER(lp, n);
|
|
1629
|
+
dtype x, y;
|
|
1630
|
+
INIT_COUNTER(lp, i);
|
|
1745
1631
|
INIT_PTR(lp, 0, p1, s1);
|
|
1746
1632
|
INIT_PTR(lp, 1, p2, s2);
|
|
1747
1633
|
INIT_PTR(lp, 2, p3, s3);
|
|
1748
|
-
|
|
1749
|
-
//
|
|
1750
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
1751
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
1752
|
-
|
|
1753
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1754
|
-
#ifdef __SSE2__
|
|
1755
|
-
// Check number of elements. & Check same alignment.
|
|
1756
|
-
if ((n >= num_pack) &&
|
|
1757
|
-
is_same_aligned3(
|
|
1758
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
1759
|
-
)) {
|
|
1760
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1761
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1762
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1763
|
-
);
|
|
1764
|
-
#endif
|
|
1765
|
-
if (p1 == p3) { // inplace case
|
|
1766
|
-
#ifdef __SSE2__
|
|
1767
|
-
for (; i < cnt; i++) {
|
|
1768
|
-
#else
|
|
1769
|
-
for (; i < n; i++) {
|
|
1770
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1771
|
-
#endif
|
|
1772
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1773
|
-
}
|
|
1774
|
-
} else {
|
|
1775
|
-
#ifdef __SSE2__
|
|
1776
|
-
for (; i < cnt; i++) {
|
|
1777
|
-
#else
|
|
1778
|
-
for (; i < n; i++) {
|
|
1779
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1780
|
-
#endif
|
|
1781
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1782
|
-
}
|
|
1783
|
-
}
|
|
1784
|
-
#ifdef __SSE2__
|
|
1785
|
-
// Get the count of SIMD computation loops.
|
|
1786
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1787
|
-
|
|
1788
|
-
// SIMD computation.
|
|
1789
|
-
if (p1 == p3) { // inplace case
|
|
1790
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1791
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1792
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
1793
|
-
a = _mm_add_pd(a, b);
|
|
1794
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
1795
|
-
}
|
|
1796
|
-
} else {
|
|
1797
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1798
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1799
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
1800
|
-
a = _mm_add_pd(a, b);
|
|
1801
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
1802
|
-
}
|
|
1803
|
-
}
|
|
1804
|
-
}
|
|
1805
|
-
|
|
1806
|
-
// Compute the remainder of the SIMD operation.
|
|
1807
|
-
if (cnt_simd_loop != 0) {
|
|
1808
|
-
if (p1 == p3) { // inplace case
|
|
1809
|
-
for (; i < n; i++) {
|
|
1810
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1811
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1812
|
-
}
|
|
1813
|
-
} else {
|
|
1814
|
-
for (; i < n; i++) {
|
|
1815
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1816
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1817
|
-
}
|
|
1818
|
-
}
|
|
1819
|
-
}
|
|
1820
|
-
#endif
|
|
1821
|
-
return;
|
|
1822
|
-
}
|
|
1823
|
-
|
|
1824
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
1825
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
1826
|
-
//
|
|
1827
|
-
|
|
1828
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
1829
|
-
check_intdivzero(*(dtype*)p2);
|
|
1830
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1831
|
-
#ifdef __SSE2__
|
|
1832
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
1833
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
1834
|
-
|
|
1835
|
-
// Check number of elements. & Check same alignment.
|
|
1836
|
-
if ((n >= num_pack) &&
|
|
1837
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
1838
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1839
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1840
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1841
|
-
);
|
|
1842
|
-
#endif
|
|
1843
|
-
if (p1 == p3) { // inplace case
|
|
1844
|
-
#ifdef __SSE2__
|
|
1845
|
-
for (; i < cnt; i++) {
|
|
1846
|
-
#else
|
|
1847
|
-
for (; i < n; i++) {
|
|
1848
|
-
#endif
|
|
1849
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1850
|
-
}
|
|
1851
|
-
} else {
|
|
1852
|
-
#ifdef __SSE2__
|
|
1853
|
-
for (; i < cnt; i++) {
|
|
1854
|
-
#else
|
|
1855
|
-
for (; i < n; i++) {
|
|
1856
|
-
#endif
|
|
1857
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1858
|
-
}
|
|
1859
|
-
}
|
|
1860
|
-
#ifdef __SSE2__
|
|
1861
|
-
// Get the count of SIMD computation loops.
|
|
1862
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1863
|
-
|
|
1864
|
-
// SIMD computation.
|
|
1865
|
-
if (p1 == p3) { // inplace case
|
|
1866
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1867
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1868
|
-
a = _mm_add_pd(a, b);
|
|
1869
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
1870
|
-
}
|
|
1871
|
-
} else {
|
|
1872
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1873
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
1874
|
-
a = _mm_add_pd(a, b);
|
|
1875
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
1876
|
-
}
|
|
1877
|
-
}
|
|
1878
|
-
}
|
|
1879
|
-
|
|
1880
|
-
// Compute the remainder of the SIMD operation.
|
|
1881
|
-
if (cnt_simd_loop != 0) {
|
|
1882
|
-
if (p1 == p3) { // inplace case
|
|
1883
|
-
for (; i < n; i++) {
|
|
1884
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1885
|
-
}
|
|
1886
|
-
} else {
|
|
1887
|
-
for (; i < n; i++) {
|
|
1888
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1889
|
-
}
|
|
1890
|
-
}
|
|
1891
|
-
}
|
|
1892
|
-
#endif
|
|
1893
|
-
} else {
|
|
1894
|
-
for (i = 0; i < n; i++) {
|
|
1895
|
-
*(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
|
|
1896
|
-
p1 += s1;
|
|
1897
|
-
p3 += s3;
|
|
1898
|
-
}
|
|
1899
|
-
}
|
|
1900
|
-
} else {
|
|
1901
|
-
if (p1 == p3) { // inplace case
|
|
1902
|
-
for (i = 0; i < n; i++) {
|
|
1903
|
-
check_intdivzero(*(dtype*)p2);
|
|
1904
|
-
*(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
|
|
1905
|
-
p1 += s1;
|
|
1906
|
-
p2 += s2;
|
|
1907
|
-
}
|
|
1908
|
-
} else {
|
|
1909
|
-
for (i = 0; i < n; i++) {
|
|
1910
|
-
check_intdivzero(*(dtype*)p2);
|
|
1911
|
-
*(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
|
|
1912
|
-
p1 += s1;
|
|
1913
|
-
p2 += s2;
|
|
1914
|
-
p3 += s3;
|
|
1915
|
-
}
|
|
1916
|
-
}
|
|
1917
|
-
}
|
|
1918
|
-
|
|
1919
|
-
return;
|
|
1920
|
-
//
|
|
1921
|
-
}
|
|
1922
|
-
}
|
|
1923
|
-
for (i = 0; i < n; i++) {
|
|
1924
|
-
dtype x, y, z;
|
|
1634
|
+
for (; i--;) {
|
|
1925
1635
|
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1926
1636
|
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1637
|
+
x = m_pow(x, y);
|
|
1638
|
+
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
static void iter_dfloat_pow_int32(na_loop_t* const lp) {
|
|
1643
|
+
size_t i;
|
|
1644
|
+
char *p1, *p2, *p3;
|
|
1645
|
+
ssize_t s1, s2, s3;
|
|
1646
|
+
dtype x;
|
|
1647
|
+
int32_t y;
|
|
1648
|
+
INIT_COUNTER(lp, i);
|
|
1649
|
+
INIT_PTR(lp, 0, p1, s1);
|
|
1650
|
+
INIT_PTR(lp, 1, p2, s2);
|
|
1651
|
+
INIT_PTR(lp, 2, p3, s3);
|
|
1652
|
+
for (; i--;) {
|
|
1653
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1654
|
+
GET_DATA_STRIDE(p2, s2, int32_t, y);
|
|
1655
|
+
x = m_pow_int(x, y);
|
|
1656
|
+
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
1930
1657
|
}
|
|
1931
|
-
//
|
|
1932
1658
|
}
|
|
1933
|
-
#undef check_intdivzero
|
|
1934
1659
|
|
|
1935
|
-
static VALUE
|
|
1660
|
+
static VALUE dfloat_pow_self(VALUE self, VALUE other) {
|
|
1936
1661
|
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
1662
|
+
ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
|
|
1937
1663
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1938
|
-
ndfunc_t ndf = {
|
|
1664
|
+
ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
|
|
1665
|
+
ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
|
|
1939
1666
|
|
|
1940
|
-
|
|
1667
|
+
// fixme : use na.integer?
|
|
1668
|
+
if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
|
|
1669
|
+
return na_ndloop(&ndf_i, 2, self, other);
|
|
1670
|
+
} else {
|
|
1671
|
+
return na_ndloop(&ndf, 2, self, other);
|
|
1672
|
+
}
|
|
1941
1673
|
}
|
|
1942
1674
|
|
|
1943
|
-
static VALUE
|
|
1675
|
+
static VALUE dfloat_pow(VALUE self, VALUE other) {
|
|
1944
1676
|
|
|
1945
1677
|
VALUE klass, v;
|
|
1946
|
-
|
|
1947
1678
|
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
1948
1679
|
if (klass == cT) {
|
|
1949
|
-
return
|
|
1680
|
+
return dfloat_pow_self(self, other);
|
|
1950
1681
|
} else {
|
|
1951
1682
|
v = rb_funcall(klass, id_cast, 1, self);
|
|
1952
|
-
return rb_funcall(v,
|
|
1683
|
+
return rb_funcall(v, id_pow, 1, other);
|
|
1953
1684
|
}
|
|
1954
1685
|
}
|
|
1955
1686
|
|
|
1956
|
-
|
|
1957
|
-
|
|
1687
|
+
static void iter_dfloat_minus(na_loop_t* const lp) {
|
|
1688
|
+
size_t i, n;
|
|
1689
|
+
char *p1, *p2;
|
|
1690
|
+
ssize_t s1, s2;
|
|
1691
|
+
size_t *idx1, *idx2;
|
|
1692
|
+
dtype x;
|
|
1958
1693
|
|
|
1959
|
-
static void iter_dfloat_sub(na_loop_t* const lp) {
|
|
1960
|
-
size_t i = 0;
|
|
1961
|
-
size_t n;
|
|
1962
|
-
char *p1, *p2, *p3;
|
|
1963
|
-
ssize_t s1, s2, s3;
|
|
1964
|
-
|
|
1965
|
-
#ifdef __SSE2__
|
|
1966
|
-
size_t cnt;
|
|
1967
|
-
size_t cnt_simd_loop = -1;
|
|
1968
|
-
|
|
1969
|
-
__m128d a;
|
|
1970
|
-
__m128d b;
|
|
1971
|
-
|
|
1972
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
1973
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
1974
|
-
#endif
|
|
1975
1694
|
INIT_COUNTER(lp, n);
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
1979
|
-
|
|
1980
|
-
//
|
|
1981
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
1982
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
1983
|
-
|
|
1984
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1985
|
-
#ifdef __SSE2__
|
|
1986
|
-
// Check number of elements. & Check same alignment.
|
|
1987
|
-
if ((n >= num_pack) &&
|
|
1988
|
-
is_same_aligned3(
|
|
1989
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
1990
|
-
)) {
|
|
1991
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1992
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1993
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1994
|
-
);
|
|
1995
|
-
#endif
|
|
1996
|
-
if (p1 == p3) { // inplace case
|
|
1997
|
-
#ifdef __SSE2__
|
|
1998
|
-
for (; i < cnt; i++) {
|
|
1999
|
-
#else
|
|
2000
|
-
for (; i < n; i++) {
|
|
2001
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2002
|
-
#endif
|
|
2003
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2004
|
-
}
|
|
2005
|
-
} else {
|
|
2006
|
-
#ifdef __SSE2__
|
|
2007
|
-
for (; i < cnt; i++) {
|
|
2008
|
-
#else
|
|
2009
|
-
for (; i < n; i++) {
|
|
2010
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2011
|
-
#endif
|
|
2012
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2013
|
-
}
|
|
2014
|
-
}
|
|
2015
|
-
|
|
2016
|
-
#ifdef __SSE2__
|
|
2017
|
-
// Get the count of SIMD computation loops.
|
|
2018
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1695
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1696
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2019
1697
|
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2027
|
-
}
|
|
2028
|
-
} else {
|
|
2029
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2030
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2031
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2032
|
-
a = _mm_sub_pd(a, b);
|
|
2033
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2034
|
-
}
|
|
2035
|
-
}
|
|
1698
|
+
if (idx1) {
|
|
1699
|
+
if (idx2) {
|
|
1700
|
+
for (i = 0; i < n; i++) {
|
|
1701
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1702
|
+
x = m_minus(x);
|
|
1703
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2036
1704
|
}
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2043
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2044
|
-
}
|
|
2045
|
-
} else {
|
|
2046
|
-
for (; i < n; i++) {
|
|
2047
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2048
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2049
|
-
}
|
|
2050
|
-
}
|
|
1705
|
+
} else {
|
|
1706
|
+
for (i = 0; i < n; i++) {
|
|
1707
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1708
|
+
x = m_minus(x);
|
|
1709
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2051
1710
|
}
|
|
2052
|
-
#endif
|
|
2053
|
-
return;
|
|
2054
1711
|
}
|
|
2055
|
-
|
|
2056
|
-
if (
|
|
2057
|
-
|
|
1712
|
+
} else {
|
|
1713
|
+
if (idx2) {
|
|
1714
|
+
for (i = 0; i < n; i++) {
|
|
1715
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1716
|
+
x = m_minus(x);
|
|
1717
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1718
|
+
}
|
|
1719
|
+
} else {
|
|
2058
1720
|
//
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
check_intdivzero(*(dtype*)p2);
|
|
2062
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2063
|
-
#ifdef __SSE2__
|
|
2064
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2065
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
2066
|
-
|
|
2067
|
-
// Check number of elements. & Check same alignment.
|
|
2068
|
-
if ((n >= num_pack) &&
|
|
2069
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2070
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2071
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2072
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2073
|
-
);
|
|
2074
|
-
#endif
|
|
2075
|
-
if (p1 == p3) { // inplace case
|
|
2076
|
-
#ifdef __SSE2__
|
|
2077
|
-
for (; i < cnt; i++) {
|
|
2078
|
-
#else
|
|
2079
|
-
for (; i < n; i++) {
|
|
2080
|
-
#endif
|
|
2081
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2082
|
-
}
|
|
2083
|
-
} else {
|
|
2084
|
-
#ifdef __SSE2__
|
|
2085
|
-
for (; i < cnt; i++) {
|
|
2086
|
-
#else
|
|
2087
|
-
for (; i < n; i++) {
|
|
2088
|
-
#endif
|
|
2089
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2090
|
-
}
|
|
2091
|
-
}
|
|
2092
|
-
|
|
2093
|
-
#ifdef __SSE2__
|
|
2094
|
-
// Get the count of SIMD computation loops.
|
|
2095
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2096
|
-
|
|
2097
|
-
// SIMD computation.
|
|
2098
|
-
if (p1 == p3) { // inplace case
|
|
2099
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2100
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2101
|
-
a = _mm_sub_pd(a, b);
|
|
2102
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2103
|
-
}
|
|
2104
|
-
} else {
|
|
2105
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2106
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2107
|
-
a = _mm_sub_pd(a, b);
|
|
2108
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2109
|
-
}
|
|
2110
|
-
}
|
|
2111
|
-
}
|
|
2112
|
-
|
|
2113
|
-
// Compute the remainder of the SIMD operation.
|
|
2114
|
-
if (cnt_simd_loop != 0) {
|
|
2115
|
-
if (p1 == p3) { // inplace case
|
|
2116
|
-
for (; i < n; i++) {
|
|
2117
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2118
|
-
}
|
|
2119
|
-
} else {
|
|
2120
|
-
for (; i < n; i++) {
|
|
2121
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2122
|
-
}
|
|
2123
|
-
}
|
|
2124
|
-
}
|
|
2125
|
-
#endif
|
|
2126
|
-
} else {
|
|
1721
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1722
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2127
1723
|
for (i = 0; i < n; i++) {
|
|
2128
|
-
|
|
2129
|
-
p1 += s1;
|
|
2130
|
-
p3 += s3;
|
|
1724
|
+
((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
|
|
2131
1725
|
}
|
|
1726
|
+
return;
|
|
2132
1727
|
}
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
for (i = 0; i < n; i++) {
|
|
2136
|
-
check_intdivzero(*(dtype*)p2);
|
|
2137
|
-
*(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
2138
|
-
p1 += s1;
|
|
2139
|
-
p2 += s2;
|
|
2140
|
-
}
|
|
2141
|
-
} else {
|
|
1728
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1729
|
+
//
|
|
2142
1730
|
for (i = 0; i < n; i++) {
|
|
2143
|
-
|
|
2144
|
-
*(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
1731
|
+
*(dtype*)p2 = m_minus(*(dtype*)p1);
|
|
2145
1732
|
p1 += s1;
|
|
2146
1733
|
p2 += s2;
|
|
2147
|
-
p3 += s3;
|
|
2148
1734
|
}
|
|
1735
|
+
return;
|
|
1736
|
+
//
|
|
2149
1737
|
}
|
|
2150
1738
|
}
|
|
2151
|
-
|
|
2152
|
-
|
|
1739
|
+
for (i = 0; i < n; i++) {
|
|
1740
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1741
|
+
x = m_minus(x);
|
|
1742
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1743
|
+
}
|
|
2153
1744
|
//
|
|
2154
1745
|
}
|
|
2155
1746
|
}
|
|
2156
|
-
for (i = 0; i < n; i++) {
|
|
2157
|
-
dtype x, y, z;
|
|
2158
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2159
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2160
|
-
check_intdivzero(y);
|
|
2161
|
-
z = m_sub(x, y);
|
|
2162
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2163
|
-
}
|
|
2164
|
-
//
|
|
2165
1747
|
}
|
|
2166
|
-
#undef check_intdivzero
|
|
2167
1748
|
|
|
2168
|
-
static VALUE
|
|
2169
|
-
ndfunc_arg_in_t ain[
|
|
1749
|
+
static VALUE dfloat_minus(VALUE self) {
|
|
1750
|
+
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2170
1751
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2171
|
-
ndfunc_t ndf = {
|
|
2172
|
-
|
|
2173
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2174
|
-
}
|
|
2175
|
-
|
|
2176
|
-
static VALUE dfloat_sub(VALUE self, VALUE other) {
|
|
2177
|
-
|
|
2178
|
-
VALUE klass, v;
|
|
1752
|
+
ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
|
|
2179
1753
|
|
|
2180
|
-
|
|
2181
|
-
if (klass == cT) {
|
|
2182
|
-
return dfloat_sub_self(self, other);
|
|
2183
|
-
} else {
|
|
2184
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2185
|
-
return rb_funcall(v, '-', 1, other);
|
|
2186
|
-
}
|
|
1754
|
+
return na_ndloop(&ndf, 1, self);
|
|
2187
1755
|
}
|
|
2188
1756
|
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
size_t
|
|
2194
|
-
|
|
2195
|
-
char *p1, *p2, *p3;
|
|
2196
|
-
ssize_t s1, s2, s3;
|
|
2197
|
-
|
|
2198
|
-
#ifdef __SSE2__
|
|
2199
|
-
size_t cnt;
|
|
2200
|
-
size_t cnt_simd_loop = -1;
|
|
2201
|
-
|
|
2202
|
-
__m128d a;
|
|
2203
|
-
__m128d b;
|
|
1757
|
+
static void iter_dfloat_reciprocal(na_loop_t* const lp) {
|
|
1758
|
+
size_t i, n;
|
|
1759
|
+
char *p1, *p2;
|
|
1760
|
+
ssize_t s1, s2;
|
|
1761
|
+
size_t *idx1, *idx2;
|
|
1762
|
+
dtype x;
|
|
2204
1763
|
|
|
2205
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
2206
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
2207
|
-
#endif
|
|
2208
1764
|
INIT_COUNTER(lp, n);
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2212
|
-
|
|
2213
|
-
//
|
|
2214
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2215
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
1765
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1766
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2216
1767
|
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
);
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
#endif
|
|
2245
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1768
|
+
if (idx1) {
|
|
1769
|
+
if (idx2) {
|
|
1770
|
+
for (i = 0; i < n; i++) {
|
|
1771
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1772
|
+
x = m_reciprocal(x);
|
|
1773
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1774
|
+
}
|
|
1775
|
+
} else {
|
|
1776
|
+
for (i = 0; i < n; i++) {
|
|
1777
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1778
|
+
x = m_reciprocal(x);
|
|
1779
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
} else {
|
|
1783
|
+
if (idx2) {
|
|
1784
|
+
for (i = 0; i < n; i++) {
|
|
1785
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1786
|
+
x = m_reciprocal(x);
|
|
1787
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1788
|
+
}
|
|
1789
|
+
} else {
|
|
1790
|
+
//
|
|
1791
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1792
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
1793
|
+
for (i = 0; i < n; i++) {
|
|
1794
|
+
((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
|
|
2246
1795
|
}
|
|
1796
|
+
return;
|
|
2247
1797
|
}
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
if (p1 == p3) { // inplace case
|
|
2255
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2256
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2257
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2258
|
-
a = _mm_mul_pd(a, b);
|
|
2259
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2260
|
-
}
|
|
2261
|
-
} else {
|
|
2262
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2263
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2264
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2265
|
-
a = _mm_mul_pd(a, b);
|
|
2266
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
1798
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1799
|
+
//
|
|
1800
|
+
for (i = 0; i < n; i++) {
|
|
1801
|
+
*(dtype*)p2 = m_reciprocal(*(dtype*)p1);
|
|
1802
|
+
p1 += s1;
|
|
1803
|
+
p2 += s2;
|
|
2267
1804
|
}
|
|
1805
|
+
return;
|
|
1806
|
+
//
|
|
2268
1807
|
}
|
|
2269
1808
|
}
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
for (; i < n; i++) {
|
|
2275
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2276
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2277
|
-
}
|
|
2278
|
-
} else {
|
|
2279
|
-
for (; i < n; i++) {
|
|
2280
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2281
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2282
|
-
}
|
|
2283
|
-
}
|
|
1809
|
+
for (i = 0; i < n; i++) {
|
|
1810
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1811
|
+
x = m_reciprocal(x);
|
|
1812
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2284
1813
|
}
|
|
2285
|
-
#endif
|
|
2286
|
-
return;
|
|
2287
|
-
}
|
|
2288
|
-
|
|
2289
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2290
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2291
1814
|
//
|
|
1815
|
+
}
|
|
1816
|
+
}
|
|
1817
|
+
}
|
|
2292
1818
|
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2296
|
-
#ifdef __SSE2__
|
|
2297
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2298
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
2299
|
-
|
|
2300
|
-
// Check number of elements. & Check same alignment.
|
|
2301
|
-
if ((n >= num_pack) &&
|
|
2302
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2303
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2304
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2305
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2306
|
-
);
|
|
2307
|
-
#endif
|
|
2308
|
-
if (p1 == p3) { // inplace case
|
|
2309
|
-
#ifdef __SSE2__
|
|
2310
|
-
for (; i < cnt; i++) {
|
|
2311
|
-
#else
|
|
2312
|
-
for (; i < n; i++) {
|
|
2313
|
-
#endif
|
|
2314
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2315
|
-
}
|
|
2316
|
-
} else {
|
|
2317
|
-
#ifdef __SSE2__
|
|
2318
|
-
for (; i < cnt; i++) {
|
|
2319
|
-
#else
|
|
2320
|
-
for (; i < n; i++) {
|
|
2321
|
-
#endif
|
|
2322
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2323
|
-
}
|
|
2324
|
-
}
|
|
2325
|
-
|
|
2326
|
-
#ifdef __SSE2__
|
|
2327
|
-
// Get the count of SIMD computation loops.
|
|
2328
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2329
|
-
|
|
2330
|
-
// SIMD computation.
|
|
2331
|
-
if (p1 == p3) { // inplace case
|
|
2332
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2333
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2334
|
-
a = _mm_mul_pd(a, b);
|
|
2335
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2336
|
-
}
|
|
2337
|
-
} else {
|
|
2338
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2339
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2340
|
-
a = _mm_mul_pd(a, b);
|
|
2341
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2342
|
-
}
|
|
2343
|
-
}
|
|
2344
|
-
}
|
|
2345
|
-
|
|
2346
|
-
// Compute the remainder of the SIMD operation.
|
|
2347
|
-
if (cnt_simd_loop != 0) {
|
|
2348
|
-
if (p1 == p3) { // inplace case
|
|
2349
|
-
for (; i < n; i++) {
|
|
2350
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2351
|
-
}
|
|
2352
|
-
} else {
|
|
2353
|
-
for (; i < n; i++) {
|
|
2354
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2355
|
-
}
|
|
2356
|
-
}
|
|
2357
|
-
}
|
|
2358
|
-
#endif
|
|
2359
|
-
} else {
|
|
2360
|
-
for (i = 0; i < n; i++) {
|
|
2361
|
-
*(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2362
|
-
p1 += s1;
|
|
2363
|
-
p3 += s3;
|
|
2364
|
-
}
|
|
2365
|
-
}
|
|
2366
|
-
} else {
|
|
2367
|
-
if (p1 == p3) { // inplace case
|
|
2368
|
-
for (i = 0; i < n; i++) {
|
|
2369
|
-
check_intdivzero(*(dtype*)p2);
|
|
2370
|
-
*(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2371
|
-
p1 += s1;
|
|
2372
|
-
p2 += s2;
|
|
2373
|
-
}
|
|
2374
|
-
} else {
|
|
2375
|
-
for (i = 0; i < n; i++) {
|
|
2376
|
-
check_intdivzero(*(dtype*)p2);
|
|
2377
|
-
*(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2378
|
-
p1 += s1;
|
|
2379
|
-
p2 += s2;
|
|
2380
|
-
p3 += s3;
|
|
2381
|
-
}
|
|
2382
|
-
}
|
|
2383
|
-
}
|
|
2384
|
-
|
|
2385
|
-
return;
|
|
2386
|
-
//
|
|
2387
|
-
}
|
|
2388
|
-
}
|
|
2389
|
-
for (i = 0; i < n; i++) {
|
|
2390
|
-
dtype x, y, z;
|
|
2391
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2392
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2393
|
-
check_intdivzero(y);
|
|
2394
|
-
z = m_mul(x, y);
|
|
2395
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2396
|
-
}
|
|
2397
|
-
//
|
|
2398
|
-
}
|
|
2399
|
-
#undef check_intdivzero
|
|
2400
|
-
|
|
2401
|
-
static VALUE dfloat_mul_self(VALUE self, VALUE other) {
|
|
2402
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
1819
|
+
static VALUE dfloat_reciprocal(VALUE self) {
|
|
1820
|
+
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2403
1821
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2404
|
-
ndfunc_t ndf = {
|
|
2405
|
-
|
|
2406
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2407
|
-
}
|
|
2408
|
-
|
|
2409
|
-
static VALUE dfloat_mul(VALUE self, VALUE other) {
|
|
2410
|
-
|
|
2411
|
-
VALUE klass, v;
|
|
1822
|
+
ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
|
|
2412
1823
|
|
|
2413
|
-
|
|
2414
|
-
if (klass == cT) {
|
|
2415
|
-
return dfloat_mul_self(self, other);
|
|
2416
|
-
} else {
|
|
2417
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2418
|
-
return rb_funcall(v, '*', 1, other);
|
|
2419
|
-
}
|
|
1824
|
+
return na_ndloop(&ndf, 1, self);
|
|
2420
1825
|
}
|
|
2421
1826
|
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
size_t
|
|
2427
|
-
|
|
2428
|
-
char *p1, *p2, *p3;
|
|
2429
|
-
ssize_t s1, s2, s3;
|
|
2430
|
-
|
|
2431
|
-
#ifdef __SSE2__
|
|
2432
|
-
size_t cnt;
|
|
2433
|
-
size_t cnt_simd_loop = -1;
|
|
2434
|
-
|
|
2435
|
-
__m128d a;
|
|
2436
|
-
__m128d b;
|
|
1827
|
+
static void iter_dfloat_sign(na_loop_t* const lp) {
|
|
1828
|
+
size_t i, n;
|
|
1829
|
+
char *p1, *p2;
|
|
1830
|
+
ssize_t s1, s2;
|
|
1831
|
+
size_t *idx1, *idx2;
|
|
1832
|
+
dtype x;
|
|
2437
1833
|
|
|
2438
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
2439
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
2440
|
-
#endif
|
|
2441
1834
|
INIT_COUNTER(lp, n);
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2445
|
-
|
|
2446
|
-
//
|
|
2447
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2448
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2449
|
-
|
|
2450
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2451
|
-
#ifdef __SSE2__
|
|
2452
|
-
// Check number of elements. & Check same alignment.
|
|
2453
|
-
if ((n >= num_pack) &&
|
|
2454
|
-
is_same_aligned3(
|
|
2455
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
2456
|
-
)) {
|
|
2457
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2458
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2459
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2460
|
-
);
|
|
2461
|
-
#endif
|
|
2462
|
-
if (p1 == p3) { // inplace case
|
|
2463
|
-
#ifdef __SSE2__
|
|
2464
|
-
for (; i < cnt; i++) {
|
|
2465
|
-
#else
|
|
2466
|
-
for (; i < n; i++) {
|
|
2467
|
-
#endif
|
|
2468
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2469
|
-
}
|
|
2470
|
-
} else {
|
|
2471
|
-
#ifdef __SSE2__
|
|
2472
|
-
for (; i < cnt; i++) {
|
|
2473
|
-
#else
|
|
2474
|
-
for (; i < n; i++) {
|
|
2475
|
-
#endif
|
|
2476
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2477
|
-
}
|
|
2478
|
-
}
|
|
2479
|
-
|
|
2480
|
-
#ifdef __SSE2__
|
|
2481
|
-
// Get the count of SIMD computation loops.
|
|
2482
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1835
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1836
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2483
1837
|
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2491
|
-
}
|
|
2492
|
-
} else {
|
|
2493
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2494
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2495
|
-
b = _mm_load_pd(&((dtype*)p2)[i]);
|
|
2496
|
-
a = _mm_div_pd(a, b);
|
|
2497
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2498
|
-
}
|
|
2499
|
-
}
|
|
1838
|
+
if (idx1) {
|
|
1839
|
+
if (idx2) {
|
|
1840
|
+
for (i = 0; i < n; i++) {
|
|
1841
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1842
|
+
x = m_sign(x);
|
|
1843
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2500
1844
|
}
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
|
|
2504
|
-
|
|
2505
|
-
|
|
2506
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2507
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2508
|
-
}
|
|
2509
|
-
} else {
|
|
2510
|
-
for (; i < n; i++) {
|
|
2511
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2512
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2513
|
-
}
|
|
2514
|
-
}
|
|
1845
|
+
} else {
|
|
1846
|
+
for (i = 0; i < n; i++) {
|
|
1847
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1848
|
+
x = m_sign(x);
|
|
1849
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2515
1850
|
}
|
|
2516
|
-
#endif
|
|
2517
|
-
return;
|
|
2518
1851
|
}
|
|
2519
|
-
|
|
2520
|
-
if (
|
|
2521
|
-
|
|
1852
|
+
} else {
|
|
1853
|
+
if (idx2) {
|
|
1854
|
+
for (i = 0; i < n; i++) {
|
|
1855
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1856
|
+
x = m_sign(x);
|
|
1857
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1858
|
+
}
|
|
1859
|
+
} else {
|
|
2522
1860
|
//
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
check_intdivzero(*(dtype*)p2);
|
|
2526
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2527
|
-
#ifdef __SSE2__
|
|
2528
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2529
|
-
b = _mm_load1_pd(&((dtype*)p2)[0]);
|
|
2530
|
-
|
|
2531
|
-
// Check number of elements. & Check same alignment.
|
|
2532
|
-
if ((n >= num_pack) &&
|
|
2533
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2534
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2535
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2536
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2537
|
-
);
|
|
2538
|
-
#endif
|
|
2539
|
-
if (p1 == p3) { // inplace case
|
|
2540
|
-
#ifdef __SSE2__
|
|
2541
|
-
for (; i < cnt; i++) {
|
|
2542
|
-
#else
|
|
2543
|
-
for (; i < n; i++) {
|
|
2544
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2545
|
-
#endif
|
|
2546
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2547
|
-
}
|
|
2548
|
-
} else {
|
|
2549
|
-
#ifdef __SSE2__
|
|
2550
|
-
for (; i < cnt; i++) {
|
|
2551
|
-
#else
|
|
2552
|
-
for (; i < n; i++) {
|
|
2553
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2554
|
-
#endif
|
|
2555
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2556
|
-
}
|
|
2557
|
-
}
|
|
2558
|
-
|
|
2559
|
-
#ifdef __SSE2__
|
|
2560
|
-
// Get the count of SIMD computation loops.
|
|
2561
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2562
|
-
|
|
2563
|
-
// SIMD computation.
|
|
2564
|
-
if (p1 == p3) { // inplace case
|
|
2565
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2566
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2567
|
-
a = _mm_div_pd(a, b);
|
|
2568
|
-
_mm_store_pd(&((dtype*)p1)[i], a);
|
|
2569
|
-
}
|
|
2570
|
-
} else {
|
|
2571
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2572
|
-
a = _mm_load_pd(&((dtype*)p1)[i]);
|
|
2573
|
-
a = _mm_div_pd(a, b);
|
|
2574
|
-
_mm_stream_pd(&((dtype*)p3)[i], a);
|
|
2575
|
-
}
|
|
2576
|
-
}
|
|
2577
|
-
}
|
|
2578
|
-
|
|
2579
|
-
// Compute the remainder of the SIMD operation.
|
|
2580
|
-
if (cnt_simd_loop != 0) {
|
|
2581
|
-
if (p1 == p3) { // inplace case
|
|
2582
|
-
for (; i < n; i++) {
|
|
2583
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2584
|
-
}
|
|
2585
|
-
} else {
|
|
2586
|
-
for (; i < n; i++) {
|
|
2587
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2588
|
-
}
|
|
2589
|
-
}
|
|
2590
|
-
}
|
|
2591
|
-
#endif
|
|
2592
|
-
} else {
|
|
1861
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1862
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2593
1863
|
for (i = 0; i < n; i++) {
|
|
2594
|
-
|
|
2595
|
-
p1 += s1;
|
|
2596
|
-
p3 += s3;
|
|
1864
|
+
((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
|
|
2597
1865
|
}
|
|
1866
|
+
return;
|
|
2598
1867
|
}
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
for (i = 0; i < n; i++) {
|
|
2602
|
-
check_intdivzero(*(dtype*)p2);
|
|
2603
|
-
*(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
2604
|
-
p1 += s1;
|
|
2605
|
-
p2 += s2;
|
|
2606
|
-
}
|
|
2607
|
-
} else {
|
|
1868
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1869
|
+
//
|
|
2608
1870
|
for (i = 0; i < n; i++) {
|
|
2609
|
-
|
|
2610
|
-
*(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
1871
|
+
*(dtype*)p2 = m_sign(*(dtype*)p1);
|
|
2611
1872
|
p1 += s1;
|
|
2612
1873
|
p2 += s2;
|
|
2613
|
-
p3 += s3;
|
|
2614
1874
|
}
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2626
|
-
check_intdivzero(y);
|
|
2627
|
-
z = m_div(x, y);
|
|
2628
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2629
|
-
}
|
|
2630
|
-
//
|
|
2631
|
-
}
|
|
2632
|
-
#undef check_intdivzero
|
|
2633
|
-
|
|
2634
|
-
static VALUE dfloat_div_self(VALUE self, VALUE other) {
|
|
2635
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2636
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2637
|
-
ndfunc_t ndf = { iter_dfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2638
|
-
|
|
2639
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2640
|
-
}
|
|
2641
|
-
|
|
2642
|
-
static VALUE dfloat_div(VALUE self, VALUE other) {
|
|
2643
|
-
|
|
2644
|
-
VALUE klass, v;
|
|
2645
|
-
|
|
2646
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2647
|
-
if (klass == cT) {
|
|
2648
|
-
return dfloat_div_self(self, other);
|
|
2649
|
-
} else {
|
|
2650
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2651
|
-
return rb_funcall(v, '/', 1, other);
|
|
2652
|
-
}
|
|
2653
|
-
}
|
|
2654
|
-
|
|
2655
|
-
#define check_intdivzero(y) \
|
|
2656
|
-
{}
|
|
2657
|
-
|
|
2658
|
-
static void iter_dfloat_mod(na_loop_t* const lp) {
|
|
2659
|
-
size_t i = 0;
|
|
2660
|
-
size_t n;
|
|
2661
|
-
char *p1, *p2, *p3;
|
|
2662
|
-
ssize_t s1, s2, s3;
|
|
2663
|
-
|
|
2664
|
-
INIT_COUNTER(lp, n);
|
|
2665
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2666
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2667
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2668
|
-
|
|
2669
|
-
//
|
|
2670
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2671
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2672
|
-
|
|
2673
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2674
|
-
if (p1 == p3) { // inplace case
|
|
2675
|
-
for (; i < n; i++) {
|
|
2676
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2677
|
-
((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2678
|
-
}
|
|
2679
|
-
} else {
|
|
2680
|
-
for (; i < n; i++) {
|
|
2681
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2682
|
-
((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2683
|
-
}
|
|
2684
|
-
}
|
|
2685
|
-
return;
|
|
2686
|
-
}
|
|
2687
|
-
|
|
2688
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2689
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2690
|
-
//
|
|
2691
|
-
|
|
2692
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2693
|
-
check_intdivzero(*(dtype*)p2);
|
|
2694
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2695
|
-
if (p1 == p3) { // inplace case
|
|
2696
|
-
for (; i < n; i++) {
|
|
2697
|
-
((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
|
|
2698
|
-
}
|
|
2699
|
-
} else {
|
|
2700
|
-
for (; i < n; i++) {
|
|
2701
|
-
((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
|
|
2702
|
-
}
|
|
2703
|
-
}
|
|
2704
|
-
} else {
|
|
2705
|
-
for (i = 0; i < n; i++) {
|
|
2706
|
-
*(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2707
|
-
p1 += s1;
|
|
2708
|
-
p3 += s3;
|
|
2709
|
-
}
|
|
2710
|
-
}
|
|
2711
|
-
} else {
|
|
2712
|
-
if (p1 == p3) { // inplace case
|
|
2713
|
-
for (i = 0; i < n; i++) {
|
|
2714
|
-
check_intdivzero(*(dtype*)p2);
|
|
2715
|
-
*(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2716
|
-
p1 += s1;
|
|
2717
|
-
p2 += s2;
|
|
2718
|
-
}
|
|
2719
|
-
} else {
|
|
2720
|
-
for (i = 0; i < n; i++) {
|
|
2721
|
-
check_intdivzero(*(dtype*)p2);
|
|
2722
|
-
*(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2723
|
-
p1 += s1;
|
|
2724
|
-
p2 += s2;
|
|
2725
|
-
p3 += s3;
|
|
2726
|
-
}
|
|
2727
|
-
}
|
|
2728
|
-
}
|
|
2729
|
-
|
|
2730
|
-
return;
|
|
2731
|
-
//
|
|
2732
|
-
}
|
|
2733
|
-
}
|
|
2734
|
-
for (i = 0; i < n; i++) {
|
|
2735
|
-
dtype x, y, z;
|
|
2736
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2737
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2738
|
-
check_intdivzero(y);
|
|
2739
|
-
z = m_mod(x, y);
|
|
2740
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2741
|
-
}
|
|
2742
|
-
//
|
|
2743
|
-
}
|
|
2744
|
-
#undef check_intdivzero
|
|
2745
|
-
|
|
2746
|
-
static VALUE dfloat_mod_self(VALUE self, VALUE other) {
|
|
2747
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2748
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2749
|
-
ndfunc_t ndf = { iter_dfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2750
|
-
|
|
2751
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2752
|
-
}
|
|
2753
|
-
|
|
2754
|
-
static VALUE dfloat_mod(VALUE self, VALUE other) {
|
|
2755
|
-
|
|
2756
|
-
VALUE klass, v;
|
|
2757
|
-
|
|
2758
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2759
|
-
if (klass == cT) {
|
|
2760
|
-
return dfloat_mod_self(self, other);
|
|
2761
|
-
} else {
|
|
2762
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2763
|
-
return rb_funcall(v, '%', 1, other);
|
|
2764
|
-
}
|
|
2765
|
-
}
|
|
2766
|
-
|
|
2767
|
-
static void iter_dfloat_divmod(na_loop_t* const lp) {
|
|
2768
|
-
size_t i, n;
|
|
2769
|
-
char *p1, *p2, *p3, *p4;
|
|
2770
|
-
ssize_t s1, s2, s3, s4;
|
|
2771
|
-
dtype x, y, a, b;
|
|
2772
|
-
INIT_COUNTER(lp, n);
|
|
2773
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2774
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2775
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2776
|
-
INIT_PTR(lp, 3, p4, s4);
|
|
2777
|
-
for (i = n; i--;) {
|
|
2778
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2779
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2780
|
-
m_divmod(x, y, a, b);
|
|
2781
|
-
SET_DATA_STRIDE(p3, s3, dtype, a);
|
|
2782
|
-
SET_DATA_STRIDE(p4, s4, dtype, b);
|
|
2783
|
-
}
|
|
2784
|
-
}
|
|
2785
|
-
|
|
2786
|
-
static VALUE dfloat_divmod_self(VALUE self, VALUE other) {
|
|
2787
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2788
|
-
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
2789
|
-
ndfunc_t ndf = { iter_dfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
|
|
2790
|
-
|
|
2791
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2792
|
-
}
|
|
2793
|
-
|
|
2794
|
-
static VALUE dfloat_divmod(VALUE self, VALUE other) {
|
|
2795
|
-
|
|
2796
|
-
VALUE klass, v;
|
|
2797
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2798
|
-
if (klass == cT) {
|
|
2799
|
-
return dfloat_divmod_self(self, other);
|
|
2800
|
-
} else {
|
|
2801
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2802
|
-
return rb_funcall(v, id_divmod, 1, other);
|
|
2803
|
-
}
|
|
2804
|
-
}
|
|
2805
|
-
|
|
2806
|
-
static void iter_dfloat_pow(na_loop_t* const lp) {
|
|
2807
|
-
size_t i;
|
|
2808
|
-
char *p1, *p2, *p3;
|
|
2809
|
-
ssize_t s1, s2, s3;
|
|
2810
|
-
dtype x, y;
|
|
2811
|
-
INIT_COUNTER(lp, i);
|
|
2812
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2813
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2814
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2815
|
-
for (; i--;) {
|
|
2816
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2817
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2818
|
-
x = m_pow(x, y);
|
|
2819
|
-
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
2820
|
-
}
|
|
2821
|
-
}
|
|
2822
|
-
|
|
2823
|
-
static void iter_dfloat_pow_int32(na_loop_t* const lp) {
|
|
2824
|
-
size_t i;
|
|
2825
|
-
char *p1, *p2, *p3;
|
|
2826
|
-
ssize_t s1, s2, s3;
|
|
2827
|
-
dtype x;
|
|
2828
|
-
int32_t y;
|
|
2829
|
-
INIT_COUNTER(lp, i);
|
|
2830
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2831
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2832
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2833
|
-
for (; i--;) {
|
|
2834
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2835
|
-
GET_DATA_STRIDE(p2, s2, int32_t, y);
|
|
2836
|
-
x = m_pow_int(x, y);
|
|
2837
|
-
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
2838
|
-
}
|
|
2839
|
-
}
|
|
2840
|
-
|
|
2841
|
-
static VALUE dfloat_pow_self(VALUE self, VALUE other) {
|
|
2842
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2843
|
-
ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
|
|
2844
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2845
|
-
ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2846
|
-
ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
|
|
2847
|
-
|
|
2848
|
-
// fixme : use na.integer?
|
|
2849
|
-
if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
|
|
2850
|
-
return na_ndloop(&ndf_i, 2, self, other);
|
|
2851
|
-
} else {
|
|
2852
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2853
|
-
}
|
|
2854
|
-
}
|
|
2855
|
-
|
|
2856
|
-
static VALUE dfloat_pow(VALUE self, VALUE other) {
|
|
2857
|
-
|
|
2858
|
-
VALUE klass, v;
|
|
2859
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2860
|
-
if (klass == cT) {
|
|
2861
|
-
return dfloat_pow_self(self, other);
|
|
2862
|
-
} else {
|
|
2863
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2864
|
-
return rb_funcall(v, id_pow, 1, other);
|
|
2865
|
-
}
|
|
2866
|
-
}
|
|
2867
|
-
|
|
2868
|
-
static void iter_dfloat_minus(na_loop_t* const lp) {
|
|
2869
|
-
size_t i, n;
|
|
2870
|
-
char *p1, *p2;
|
|
2871
|
-
ssize_t s1, s2;
|
|
2872
|
-
size_t *idx1, *idx2;
|
|
2873
|
-
dtype x;
|
|
2874
|
-
|
|
2875
|
-
INIT_COUNTER(lp, n);
|
|
2876
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
2877
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2878
|
-
|
|
2879
|
-
if (idx1) {
|
|
2880
|
-
if (idx2) {
|
|
2881
|
-
for (i = 0; i < n; i++) {
|
|
2882
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2883
|
-
x = m_minus(x);
|
|
2884
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2885
|
-
}
|
|
2886
|
-
} else {
|
|
2887
|
-
for (i = 0; i < n; i++) {
|
|
2888
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2889
|
-
x = m_minus(x);
|
|
2890
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2891
|
-
}
|
|
2892
|
-
}
|
|
2893
|
-
} else {
|
|
2894
|
-
if (idx2) {
|
|
2895
|
-
for (i = 0; i < n; i++) {
|
|
2896
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2897
|
-
x = m_minus(x);
|
|
2898
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2899
|
-
}
|
|
2900
|
-
} else {
|
|
2901
|
-
//
|
|
2902
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
2903
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2904
|
-
for (i = 0; i < n; i++) {
|
|
2905
|
-
((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
|
|
2906
|
-
}
|
|
2907
|
-
return;
|
|
2908
|
-
}
|
|
2909
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
2910
|
-
//
|
|
2911
|
-
for (i = 0; i < n; i++) {
|
|
2912
|
-
*(dtype*)p2 = m_minus(*(dtype*)p1);
|
|
2913
|
-
p1 += s1;
|
|
2914
|
-
p2 += s2;
|
|
2915
|
-
}
|
|
2916
|
-
return;
|
|
2917
|
-
//
|
|
2918
|
-
}
|
|
2919
|
-
}
|
|
2920
|
-
for (i = 0; i < n; i++) {
|
|
2921
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2922
|
-
x = m_minus(x);
|
|
2923
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2924
|
-
}
|
|
2925
|
-
//
|
|
2926
|
-
}
|
|
2927
|
-
}
|
|
2928
|
-
}
|
|
2929
|
-
|
|
2930
|
-
static VALUE dfloat_minus(VALUE self) {
|
|
2931
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2932
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2933
|
-
ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
|
|
2934
|
-
|
|
2935
|
-
return na_ndloop(&ndf, 1, self);
|
|
2936
|
-
}
|
|
2937
|
-
|
|
2938
|
-
static void iter_dfloat_reciprocal(na_loop_t* const lp) {
|
|
2939
|
-
size_t i, n;
|
|
2940
|
-
char *p1, *p2;
|
|
2941
|
-
ssize_t s1, s2;
|
|
2942
|
-
size_t *idx1, *idx2;
|
|
2943
|
-
dtype x;
|
|
2944
|
-
|
|
2945
|
-
INIT_COUNTER(lp, n);
|
|
2946
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
2947
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2948
|
-
|
|
2949
|
-
if (idx1) {
|
|
2950
|
-
if (idx2) {
|
|
2951
|
-
for (i = 0; i < n; i++) {
|
|
2952
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2953
|
-
x = m_reciprocal(x);
|
|
2954
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2955
|
-
}
|
|
2956
|
-
} else {
|
|
2957
|
-
for (i = 0; i < n; i++) {
|
|
2958
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2959
|
-
x = m_reciprocal(x);
|
|
2960
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2961
|
-
}
|
|
2962
|
-
}
|
|
2963
|
-
} else {
|
|
2964
|
-
if (idx2) {
|
|
2965
|
-
for (i = 0; i < n; i++) {
|
|
2966
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2967
|
-
x = m_reciprocal(x);
|
|
2968
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2969
|
-
}
|
|
2970
|
-
} else {
|
|
2971
|
-
//
|
|
2972
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
2973
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2974
|
-
for (i = 0; i < n; i++) {
|
|
2975
|
-
((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
|
|
2976
|
-
}
|
|
2977
|
-
return;
|
|
2978
|
-
}
|
|
2979
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
2980
|
-
//
|
|
2981
|
-
for (i = 0; i < n; i++) {
|
|
2982
|
-
*(dtype*)p2 = m_reciprocal(*(dtype*)p1);
|
|
2983
|
-
p1 += s1;
|
|
2984
|
-
p2 += s2;
|
|
2985
|
-
}
|
|
2986
|
-
return;
|
|
2987
|
-
//
|
|
2988
|
-
}
|
|
2989
|
-
}
|
|
2990
|
-
for (i = 0; i < n; i++) {
|
|
2991
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2992
|
-
x = m_reciprocal(x);
|
|
2993
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2994
|
-
}
|
|
2995
|
-
//
|
|
2996
|
-
}
|
|
2997
|
-
}
|
|
2998
|
-
}
|
|
2999
|
-
|
|
3000
|
-
static VALUE dfloat_reciprocal(VALUE self) {
|
|
3001
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3002
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3003
|
-
ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
|
|
3004
|
-
|
|
3005
|
-
return na_ndloop(&ndf, 1, self);
|
|
3006
|
-
}
|
|
3007
|
-
|
|
3008
|
-
static void iter_dfloat_sign(na_loop_t* const lp) {
|
|
3009
|
-
size_t i, n;
|
|
3010
|
-
char *p1, *p2;
|
|
3011
|
-
ssize_t s1, s2;
|
|
3012
|
-
size_t *idx1, *idx2;
|
|
3013
|
-
dtype x;
|
|
3014
|
-
|
|
3015
|
-
INIT_COUNTER(lp, n);
|
|
3016
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3017
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3018
|
-
|
|
3019
|
-
if (idx1) {
|
|
3020
|
-
if (idx2) {
|
|
3021
|
-
for (i = 0; i < n; i++) {
|
|
3022
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3023
|
-
x = m_sign(x);
|
|
3024
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3025
|
-
}
|
|
3026
|
-
} else {
|
|
3027
|
-
for (i = 0; i < n; i++) {
|
|
3028
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3029
|
-
x = m_sign(x);
|
|
3030
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3031
|
-
}
|
|
3032
|
-
}
|
|
3033
|
-
} else {
|
|
3034
|
-
if (idx2) {
|
|
3035
|
-
for (i = 0; i < n; i++) {
|
|
3036
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3037
|
-
x = m_sign(x);
|
|
3038
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3039
|
-
}
|
|
3040
|
-
} else {
|
|
3041
|
-
//
|
|
3042
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3043
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3044
|
-
for (i = 0; i < n; i++) {
|
|
3045
|
-
((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
|
|
3046
|
-
}
|
|
3047
|
-
return;
|
|
3048
|
-
}
|
|
3049
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3050
|
-
//
|
|
3051
|
-
for (i = 0; i < n; i++) {
|
|
3052
|
-
*(dtype*)p2 = m_sign(*(dtype*)p1);
|
|
3053
|
-
p1 += s1;
|
|
3054
|
-
p2 += s2;
|
|
3055
|
-
}
|
|
3056
|
-
return;
|
|
3057
|
-
//
|
|
3058
|
-
}
|
|
3059
|
-
}
|
|
3060
|
-
for (i = 0; i < n; i++) {
|
|
3061
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3062
|
-
x = m_sign(x);
|
|
3063
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3064
|
-
}
|
|
3065
|
-
//
|
|
3066
|
-
}
|
|
3067
|
-
}
|
|
3068
|
-
}
|
|
3069
|
-
|
|
3070
|
-
static VALUE dfloat_sign(VALUE self) {
|
|
3071
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3072
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3073
|
-
ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
|
|
3074
|
-
|
|
3075
|
-
return na_ndloop(&ndf, 1, self);
|
|
3076
|
-
}
|
|
3077
|
-
|
|
3078
|
-
static void iter_dfloat_square(na_loop_t* const lp) {
|
|
3079
|
-
size_t i, n;
|
|
3080
|
-
char *p1, *p2;
|
|
3081
|
-
ssize_t s1, s2;
|
|
3082
|
-
size_t *idx1, *idx2;
|
|
3083
|
-
dtype x;
|
|
3084
|
-
|
|
3085
|
-
INIT_COUNTER(lp, n);
|
|
3086
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3087
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3088
|
-
|
|
3089
|
-
if (idx1) {
|
|
3090
|
-
if (idx2) {
|
|
3091
|
-
for (i = 0; i < n; i++) {
|
|
3092
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3093
|
-
x = m_square(x);
|
|
3094
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3095
|
-
}
|
|
3096
|
-
} else {
|
|
3097
|
-
for (i = 0; i < n; i++) {
|
|
3098
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3099
|
-
x = m_square(x);
|
|
3100
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3101
|
-
}
|
|
3102
|
-
}
|
|
3103
|
-
} else {
|
|
3104
|
-
if (idx2) {
|
|
3105
|
-
for (i = 0; i < n; i++) {
|
|
3106
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3107
|
-
x = m_square(x);
|
|
3108
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3109
|
-
}
|
|
3110
|
-
} else {
|
|
3111
|
-
//
|
|
3112
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3113
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3114
|
-
for (i = 0; i < n; i++) {
|
|
3115
|
-
((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
|
|
3116
|
-
}
|
|
3117
|
-
return;
|
|
3118
|
-
}
|
|
3119
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3120
|
-
//
|
|
3121
|
-
for (i = 0; i < n; i++) {
|
|
3122
|
-
*(dtype*)p2 = m_square(*(dtype*)p1);
|
|
3123
|
-
p1 += s1;
|
|
3124
|
-
p2 += s2;
|
|
3125
|
-
}
|
|
3126
|
-
return;
|
|
3127
|
-
//
|
|
3128
|
-
}
|
|
3129
|
-
}
|
|
3130
|
-
for (i = 0; i < n; i++) {
|
|
3131
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3132
|
-
x = m_square(x);
|
|
3133
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3134
|
-
}
|
|
3135
|
-
//
|
|
3136
|
-
}
|
|
3137
|
-
}
|
|
3138
|
-
}
|
|
3139
|
-
|
|
3140
|
-
static VALUE dfloat_square(VALUE self) {
|
|
3141
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3142
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3143
|
-
ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
|
|
3144
|
-
|
|
3145
|
-
return na_ndloop(&ndf, 1, self);
|
|
3146
|
-
}
|
|
3147
|
-
|
|
3148
|
-
static void iter_dfloat_eq(na_loop_t* const lp) {
|
|
3149
|
-
size_t i;
|
|
3150
|
-
char *p1, *p2;
|
|
3151
|
-
BIT_DIGIT* a3;
|
|
3152
|
-
size_t p3;
|
|
3153
|
-
ssize_t s1, s2, s3;
|
|
3154
|
-
dtype x, y;
|
|
3155
|
-
BIT_DIGIT b;
|
|
3156
|
-
INIT_COUNTER(lp, i);
|
|
3157
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3158
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3159
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3160
|
-
for (; i--;) {
|
|
3161
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3162
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3163
|
-
b = (m_eq(x, y)) ? 1 : 0;
|
|
3164
|
-
STORE_BIT(a3, p3, b);
|
|
3165
|
-
p3 += s3;
|
|
3166
|
-
}
|
|
3167
|
-
}
|
|
3168
|
-
|
|
3169
|
-
static VALUE dfloat_eq_self(VALUE self, VALUE other) {
|
|
3170
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3171
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3172
|
-
ndfunc_t ndf = { iter_dfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3173
|
-
|
|
3174
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3175
|
-
}
|
|
3176
|
-
|
|
3177
|
-
static VALUE dfloat_eq(VALUE self, VALUE other) {
|
|
3178
|
-
|
|
3179
|
-
VALUE klass, v;
|
|
3180
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3181
|
-
if (klass == cT) {
|
|
3182
|
-
return dfloat_eq_self(self, other);
|
|
3183
|
-
} else {
|
|
3184
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3185
|
-
return rb_funcall(v, id_eq, 1, other);
|
|
3186
|
-
}
|
|
3187
|
-
}
|
|
3188
|
-
|
|
3189
|
-
static void iter_dfloat_ne(na_loop_t* const lp) {
|
|
3190
|
-
size_t i;
|
|
3191
|
-
char *p1, *p2;
|
|
3192
|
-
BIT_DIGIT* a3;
|
|
3193
|
-
size_t p3;
|
|
3194
|
-
ssize_t s1, s2, s3;
|
|
3195
|
-
dtype x, y;
|
|
3196
|
-
BIT_DIGIT b;
|
|
3197
|
-
INIT_COUNTER(lp, i);
|
|
3198
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3199
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3200
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3201
|
-
for (; i--;) {
|
|
3202
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3203
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3204
|
-
b = (m_ne(x, y)) ? 1 : 0;
|
|
3205
|
-
STORE_BIT(a3, p3, b);
|
|
3206
|
-
p3 += s3;
|
|
3207
|
-
}
|
|
3208
|
-
}
|
|
3209
|
-
|
|
3210
|
-
static VALUE dfloat_ne_self(VALUE self, VALUE other) {
|
|
3211
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3212
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3213
|
-
ndfunc_t ndf = { iter_dfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3214
|
-
|
|
3215
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3216
|
-
}
|
|
3217
|
-
|
|
3218
|
-
static VALUE dfloat_ne(VALUE self, VALUE other) {
|
|
3219
|
-
|
|
3220
|
-
VALUE klass, v;
|
|
3221
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3222
|
-
if (klass == cT) {
|
|
3223
|
-
return dfloat_ne_self(self, other);
|
|
3224
|
-
} else {
|
|
3225
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3226
|
-
return rb_funcall(v, id_ne, 1, other);
|
|
3227
|
-
}
|
|
3228
|
-
}
|
|
3229
|
-
|
|
3230
|
-
static void iter_dfloat_nearly_eq(na_loop_t* const lp) {
|
|
3231
|
-
size_t i;
|
|
3232
|
-
char *p1, *p2;
|
|
3233
|
-
BIT_DIGIT* a3;
|
|
3234
|
-
size_t p3;
|
|
3235
|
-
ssize_t s1, s2, s3;
|
|
3236
|
-
dtype x, y;
|
|
3237
|
-
BIT_DIGIT b;
|
|
3238
|
-
INIT_COUNTER(lp, i);
|
|
3239
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3240
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3241
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3242
|
-
for (; i--;) {
|
|
3243
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3244
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3245
|
-
b = (m_nearly_eq(x, y)) ? 1 : 0;
|
|
3246
|
-
STORE_BIT(a3, p3, b);
|
|
3247
|
-
p3 += s3;
|
|
3248
|
-
}
|
|
3249
|
-
}
|
|
3250
|
-
|
|
3251
|
-
static VALUE dfloat_nearly_eq_self(VALUE self, VALUE other) {
|
|
3252
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3253
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3254
|
-
ndfunc_t ndf = { iter_dfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3255
|
-
|
|
3256
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3257
|
-
}
|
|
3258
|
-
|
|
3259
|
-
static VALUE dfloat_nearly_eq(VALUE self, VALUE other) {
|
|
3260
|
-
|
|
3261
|
-
VALUE klass, v;
|
|
3262
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3263
|
-
if (klass == cT) {
|
|
3264
|
-
return dfloat_nearly_eq_self(self, other);
|
|
3265
|
-
} else {
|
|
3266
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3267
|
-
return rb_funcall(v, id_nearly_eq, 1, other);
|
|
3268
|
-
}
|
|
3269
|
-
}
|
|
3270
|
-
|
|
3271
|
-
static void iter_dfloat_floor(na_loop_t* const lp) {
|
|
3272
|
-
size_t i, n;
|
|
3273
|
-
char *p1, *p2;
|
|
3274
|
-
ssize_t s1, s2;
|
|
3275
|
-
size_t *idx1, *idx2;
|
|
3276
|
-
dtype x;
|
|
3277
|
-
|
|
3278
|
-
INIT_COUNTER(lp, n);
|
|
3279
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3280
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3281
|
-
|
|
3282
|
-
if (idx1) {
|
|
3283
|
-
if (idx2) {
|
|
3284
|
-
for (i = 0; i < n; i++) {
|
|
3285
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3286
|
-
x = m_floor(x);
|
|
3287
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3288
|
-
}
|
|
3289
|
-
} else {
|
|
3290
|
-
for (i = 0; i < n; i++) {
|
|
3291
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3292
|
-
x = m_floor(x);
|
|
3293
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3294
|
-
}
|
|
3295
|
-
}
|
|
3296
|
-
} else {
|
|
3297
|
-
if (idx2) {
|
|
3298
|
-
for (i = 0; i < n; i++) {
|
|
3299
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3300
|
-
x = m_floor(x);
|
|
3301
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3302
|
-
}
|
|
3303
|
-
} else {
|
|
3304
|
-
//
|
|
3305
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3306
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3307
|
-
for (i = 0; i < n; i++) {
|
|
3308
|
-
((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
|
|
3309
|
-
}
|
|
3310
|
-
return;
|
|
3311
|
-
}
|
|
3312
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3313
|
-
//
|
|
3314
|
-
for (i = 0; i < n; i++) {
|
|
3315
|
-
*(dtype*)p2 = m_floor(*(dtype*)p1);
|
|
3316
|
-
p1 += s1;
|
|
3317
|
-
p2 += s2;
|
|
3318
|
-
}
|
|
3319
|
-
return;
|
|
3320
|
-
//
|
|
3321
|
-
}
|
|
3322
|
-
}
|
|
3323
|
-
for (i = 0; i < n; i++) {
|
|
3324
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3325
|
-
x = m_floor(x);
|
|
3326
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3327
|
-
}
|
|
3328
|
-
//
|
|
3329
|
-
}
|
|
3330
|
-
}
|
|
3331
|
-
}
|
|
3332
|
-
|
|
3333
|
-
static VALUE dfloat_floor(VALUE self) {
|
|
3334
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3335
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3336
|
-
ndfunc_t ndf = { iter_dfloat_floor, FULL_LOOP, 1, 1, ain, aout };
|
|
3337
|
-
|
|
3338
|
-
return na_ndloop(&ndf, 1, self);
|
|
3339
|
-
}
|
|
3340
|
-
|
|
3341
|
-
static void iter_dfloat_round(na_loop_t* const lp) {
|
|
3342
|
-
size_t i, n;
|
|
3343
|
-
char *p1, *p2;
|
|
3344
|
-
ssize_t s1, s2;
|
|
3345
|
-
size_t *idx1, *idx2;
|
|
3346
|
-
dtype x;
|
|
3347
|
-
|
|
3348
|
-
INIT_COUNTER(lp, n);
|
|
3349
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3350
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3351
|
-
|
|
3352
|
-
if (idx1) {
|
|
3353
|
-
if (idx2) {
|
|
3354
|
-
for (i = 0; i < n; i++) {
|
|
3355
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3356
|
-
x = m_round(x);
|
|
3357
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3358
|
-
}
|
|
3359
|
-
} else {
|
|
3360
|
-
for (i = 0; i < n; i++) {
|
|
3361
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3362
|
-
x = m_round(x);
|
|
3363
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3364
|
-
}
|
|
3365
|
-
}
|
|
3366
|
-
} else {
|
|
3367
|
-
if (idx2) {
|
|
3368
|
-
for (i = 0; i < n; i++) {
|
|
3369
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3370
|
-
x = m_round(x);
|
|
3371
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3372
|
-
}
|
|
3373
|
-
} else {
|
|
3374
|
-
//
|
|
3375
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3376
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3377
|
-
for (i = 0; i < n; i++) {
|
|
3378
|
-
((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
|
|
3379
|
-
}
|
|
3380
|
-
return;
|
|
3381
|
-
}
|
|
3382
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3383
|
-
//
|
|
3384
|
-
for (i = 0; i < n; i++) {
|
|
3385
|
-
*(dtype*)p2 = m_round(*(dtype*)p1);
|
|
3386
|
-
p1 += s1;
|
|
3387
|
-
p2 += s2;
|
|
3388
|
-
}
|
|
3389
|
-
return;
|
|
3390
|
-
//
|
|
3391
|
-
}
|
|
3392
|
-
}
|
|
3393
|
-
for (i = 0; i < n; i++) {
|
|
3394
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3395
|
-
x = m_round(x);
|
|
3396
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3397
|
-
}
|
|
3398
|
-
//
|
|
3399
|
-
}
|
|
3400
|
-
}
|
|
3401
|
-
}
|
|
3402
|
-
|
|
3403
|
-
static VALUE dfloat_round(VALUE self) {
|
|
3404
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3405
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3406
|
-
ndfunc_t ndf = { iter_dfloat_round, FULL_LOOP, 1, 1, ain, aout };
|
|
3407
|
-
|
|
3408
|
-
return na_ndloop(&ndf, 1, self);
|
|
3409
|
-
}
|
|
3410
|
-
|
|
3411
|
-
static void iter_dfloat_ceil(na_loop_t* const lp) {
|
|
3412
|
-
size_t i, n;
|
|
3413
|
-
char *p1, *p2;
|
|
3414
|
-
ssize_t s1, s2;
|
|
3415
|
-
size_t *idx1, *idx2;
|
|
3416
|
-
dtype x;
|
|
3417
|
-
|
|
3418
|
-
INIT_COUNTER(lp, n);
|
|
3419
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3420
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3421
|
-
|
|
3422
|
-
if (idx1) {
|
|
3423
|
-
if (idx2) {
|
|
3424
|
-
for (i = 0; i < n; i++) {
|
|
3425
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3426
|
-
x = m_ceil(x);
|
|
3427
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3428
|
-
}
|
|
3429
|
-
} else {
|
|
3430
|
-
for (i = 0; i < n; i++) {
|
|
3431
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3432
|
-
x = m_ceil(x);
|
|
3433
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3434
|
-
}
|
|
3435
|
-
}
|
|
3436
|
-
} else {
|
|
3437
|
-
if (idx2) {
|
|
3438
|
-
for (i = 0; i < n; i++) {
|
|
3439
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3440
|
-
x = m_ceil(x);
|
|
3441
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3442
|
-
}
|
|
3443
|
-
} else {
|
|
3444
|
-
//
|
|
3445
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3446
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3447
|
-
for (i = 0; i < n; i++) {
|
|
3448
|
-
((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
|
|
3449
|
-
}
|
|
3450
|
-
return;
|
|
3451
|
-
}
|
|
3452
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3453
|
-
//
|
|
3454
|
-
for (i = 0; i < n; i++) {
|
|
3455
|
-
*(dtype*)p2 = m_ceil(*(dtype*)p1);
|
|
3456
|
-
p1 += s1;
|
|
3457
|
-
p2 += s2;
|
|
3458
|
-
}
|
|
3459
|
-
return;
|
|
3460
|
-
//
|
|
3461
|
-
}
|
|
3462
|
-
}
|
|
3463
|
-
for (i = 0; i < n; i++) {
|
|
3464
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3465
|
-
x = m_ceil(x);
|
|
3466
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3467
|
-
}
|
|
3468
|
-
//
|
|
3469
|
-
}
|
|
3470
|
-
}
|
|
3471
|
-
}
|
|
3472
|
-
|
|
3473
|
-
static VALUE dfloat_ceil(VALUE self) {
|
|
3474
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3475
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3476
|
-
ndfunc_t ndf = { iter_dfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
|
|
3477
|
-
|
|
3478
|
-
return na_ndloop(&ndf, 1, self);
|
|
3479
|
-
}
|
|
3480
|
-
|
|
3481
|
-
static void iter_dfloat_trunc(na_loop_t* const lp) {
|
|
3482
|
-
size_t i, n;
|
|
3483
|
-
char *p1, *p2;
|
|
3484
|
-
ssize_t s1, s2;
|
|
3485
|
-
size_t *idx1, *idx2;
|
|
3486
|
-
dtype x;
|
|
3487
|
-
|
|
3488
|
-
INIT_COUNTER(lp, n);
|
|
3489
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3490
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3491
|
-
|
|
3492
|
-
if (idx1) {
|
|
3493
|
-
if (idx2) {
|
|
3494
|
-
for (i = 0; i < n; i++) {
|
|
3495
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3496
|
-
x = m_trunc(x);
|
|
3497
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3498
|
-
}
|
|
3499
|
-
} else {
|
|
3500
|
-
for (i = 0; i < n; i++) {
|
|
3501
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3502
|
-
x = m_trunc(x);
|
|
3503
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3504
|
-
}
|
|
3505
|
-
}
|
|
3506
|
-
} else {
|
|
3507
|
-
if (idx2) {
|
|
3508
|
-
for (i = 0; i < n; i++) {
|
|
3509
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3510
|
-
x = m_trunc(x);
|
|
3511
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3512
|
-
}
|
|
3513
|
-
} else {
|
|
3514
|
-
//
|
|
3515
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3516
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3517
|
-
for (i = 0; i < n; i++) {
|
|
3518
|
-
((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
|
|
3519
|
-
}
|
|
3520
|
-
return;
|
|
3521
|
-
}
|
|
3522
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3523
|
-
//
|
|
3524
|
-
for (i = 0; i < n; i++) {
|
|
3525
|
-
*(dtype*)p2 = m_trunc(*(dtype*)p1);
|
|
3526
|
-
p1 += s1;
|
|
3527
|
-
p2 += s2;
|
|
3528
|
-
}
|
|
3529
|
-
return;
|
|
3530
|
-
//
|
|
3531
|
-
}
|
|
3532
|
-
}
|
|
3533
|
-
for (i = 0; i < n; i++) {
|
|
3534
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3535
|
-
x = m_trunc(x);
|
|
3536
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3537
|
-
}
|
|
3538
|
-
//
|
|
3539
|
-
}
|
|
3540
|
-
}
|
|
3541
|
-
}
|
|
3542
|
-
|
|
3543
|
-
static VALUE dfloat_trunc(VALUE self) {
|
|
3544
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3545
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3546
|
-
ndfunc_t ndf = { iter_dfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
|
|
3547
|
-
|
|
3548
|
-
return na_ndloop(&ndf, 1, self);
|
|
3549
|
-
}
|
|
3550
|
-
|
|
3551
|
-
static void iter_dfloat_rint(na_loop_t* const lp) {
|
|
3552
|
-
size_t i, n;
|
|
3553
|
-
char *p1, *p2;
|
|
3554
|
-
ssize_t s1, s2;
|
|
3555
|
-
size_t *idx1, *idx2;
|
|
3556
|
-
dtype x;
|
|
3557
|
-
|
|
3558
|
-
INIT_COUNTER(lp, n);
|
|
3559
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3560
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3561
|
-
|
|
3562
|
-
if (idx1) {
|
|
3563
|
-
if (idx2) {
|
|
3564
|
-
for (i = 0; i < n; i++) {
|
|
3565
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3566
|
-
x = m_rint(x);
|
|
3567
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3568
|
-
}
|
|
3569
|
-
} else {
|
|
3570
|
-
for (i = 0; i < n; i++) {
|
|
3571
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3572
|
-
x = m_rint(x);
|
|
3573
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3574
|
-
}
|
|
3575
|
-
}
|
|
3576
|
-
} else {
|
|
3577
|
-
if (idx2) {
|
|
3578
|
-
for (i = 0; i < n; i++) {
|
|
3579
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3580
|
-
x = m_rint(x);
|
|
3581
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3582
|
-
}
|
|
3583
|
-
} else {
|
|
3584
|
-
//
|
|
3585
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3586
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3587
|
-
for (i = 0; i < n; i++) {
|
|
3588
|
-
((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
|
|
3589
|
-
}
|
|
3590
|
-
return;
|
|
3591
|
-
}
|
|
3592
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3593
|
-
//
|
|
3594
|
-
for (i = 0; i < n; i++) {
|
|
3595
|
-
*(dtype*)p2 = m_rint(*(dtype*)p1);
|
|
3596
|
-
p1 += s1;
|
|
3597
|
-
p2 += s2;
|
|
3598
|
-
}
|
|
3599
|
-
return;
|
|
3600
|
-
//
|
|
3601
|
-
}
|
|
3602
|
-
}
|
|
3603
|
-
for (i = 0; i < n; i++) {
|
|
3604
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3605
|
-
x = m_rint(x);
|
|
3606
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3607
|
-
}
|
|
3608
|
-
//
|
|
3609
|
-
}
|
|
3610
|
-
}
|
|
3611
|
-
}
|
|
3612
|
-
|
|
3613
|
-
static VALUE dfloat_rint(VALUE self) {
|
|
3614
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3615
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3616
|
-
ndfunc_t ndf = { iter_dfloat_rint, FULL_LOOP, 1, 1, ain, aout };
|
|
3617
|
-
|
|
3618
|
-
return na_ndloop(&ndf, 1, self);
|
|
3619
|
-
}
|
|
3620
|
-
|
|
3621
|
-
#define check_intdivzero(y) \
|
|
3622
|
-
{}
|
|
3623
|
-
|
|
3624
|
-
static void iter_dfloat_copysign(na_loop_t* const lp) {
|
|
3625
|
-
size_t i = 0;
|
|
3626
|
-
size_t n;
|
|
3627
|
-
char *p1, *p2, *p3;
|
|
3628
|
-
ssize_t s1, s2, s3;
|
|
3629
|
-
|
|
3630
|
-
INIT_COUNTER(lp, n);
|
|
3631
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3632
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3633
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
3634
|
-
|
|
3635
|
-
//
|
|
3636
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
3637
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
3638
|
-
|
|
3639
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
3640
|
-
if (p1 == p3) { // inplace case
|
|
3641
|
-
for (; i < n; i++) {
|
|
3642
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
3643
|
-
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
3644
|
-
}
|
|
3645
|
-
} else {
|
|
3646
|
-
for (; i < n; i++) {
|
|
3647
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
3648
|
-
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
3649
|
-
}
|
|
3650
|
-
}
|
|
3651
|
-
return;
|
|
3652
|
-
}
|
|
3653
|
-
|
|
3654
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
3655
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
3656
|
-
//
|
|
3657
|
-
|
|
3658
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
3659
|
-
check_intdivzero(*(dtype*)p2);
|
|
3660
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
3661
|
-
if (p1 == p3) { // inplace case
|
|
3662
|
-
for (; i < n; i++) {
|
|
3663
|
-
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
3664
|
-
}
|
|
3665
|
-
} else {
|
|
3666
|
-
for (; i < n; i++) {
|
|
3667
|
-
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
3668
|
-
}
|
|
3669
|
-
}
|
|
3670
|
-
} else {
|
|
3671
|
-
for (i = 0; i < n; i++) {
|
|
3672
|
-
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3673
|
-
p1 += s1;
|
|
3674
|
-
p3 += s3;
|
|
3675
|
-
}
|
|
3676
|
-
}
|
|
3677
|
-
} else {
|
|
3678
|
-
if (p1 == p3) { // inplace case
|
|
3679
|
-
for (i = 0; i < n; i++) {
|
|
3680
|
-
check_intdivzero(*(dtype*)p2);
|
|
3681
|
-
*(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3682
|
-
p1 += s1;
|
|
3683
|
-
p2 += s2;
|
|
3684
|
-
}
|
|
3685
|
-
} else {
|
|
3686
|
-
for (i = 0; i < n; i++) {
|
|
3687
|
-
check_intdivzero(*(dtype*)p2);
|
|
3688
|
-
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3689
|
-
p1 += s1;
|
|
3690
|
-
p2 += s2;
|
|
3691
|
-
p3 += s3;
|
|
3692
|
-
}
|
|
3693
|
-
}
|
|
3694
|
-
}
|
|
3695
|
-
|
|
3696
|
-
return;
|
|
3697
|
-
//
|
|
3698
|
-
}
|
|
3699
|
-
}
|
|
3700
|
-
for (i = 0; i < n; i++) {
|
|
3701
|
-
dtype x, y, z;
|
|
3702
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3703
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3704
|
-
check_intdivzero(y);
|
|
3705
|
-
z = m_copysign(x, y);
|
|
3706
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
3707
|
-
}
|
|
3708
|
-
//
|
|
3709
|
-
}
|
|
3710
|
-
#undef check_intdivzero
|
|
3711
|
-
|
|
3712
|
-
static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
|
|
3713
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3714
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3715
|
-
ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3716
|
-
|
|
3717
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3718
|
-
}
|
|
3719
|
-
|
|
3720
|
-
static VALUE dfloat_copysign(VALUE self, VALUE other) {
|
|
3721
|
-
|
|
3722
|
-
VALUE klass, v;
|
|
3723
|
-
|
|
3724
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3725
|
-
if (klass == cT) {
|
|
3726
|
-
return dfloat_copysign_self(self, other);
|
|
3727
|
-
} else {
|
|
3728
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3729
|
-
return rb_funcall(v, id_copysign, 1, other);
|
|
3730
|
-
}
|
|
3731
|
-
}
|
|
3732
|
-
|
|
3733
|
-
static void iter_dfloat_signbit(na_loop_t* const lp) {
|
|
3734
|
-
size_t i;
|
|
3735
|
-
char* p1;
|
|
3736
|
-
BIT_DIGIT* a2;
|
|
3737
|
-
size_t p2;
|
|
3738
|
-
ssize_t s1, s2;
|
|
3739
|
-
size_t* idx1;
|
|
3740
|
-
dtype x;
|
|
3741
|
-
BIT_DIGIT b;
|
|
3742
|
-
INIT_COUNTER(lp, i);
|
|
3743
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3744
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
3745
|
-
if (idx1) {
|
|
3746
|
-
for (; i--;) {
|
|
3747
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3748
|
-
b = (m_signbit(x)) ? 1 : 0;
|
|
3749
|
-
STORE_BIT(a2, p2, b);
|
|
3750
|
-
p2 += s2;
|
|
3751
|
-
}
|
|
3752
|
-
} else {
|
|
3753
|
-
for (; i--;) {
|
|
3754
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3755
|
-
b = (m_signbit(x)) ? 1 : 0;
|
|
3756
|
-
STORE_BIT(a2, p2, b);
|
|
3757
|
-
p2 += s2;
|
|
3758
|
-
}
|
|
3759
|
-
}
|
|
3760
|
-
}
|
|
3761
|
-
|
|
3762
|
-
static VALUE dfloat_signbit(VALUE self) {
|
|
3763
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3764
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3765
|
-
ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
|
|
3766
|
-
|
|
3767
|
-
return na_ndloop(&ndf, 1, self);
|
|
3768
|
-
}
|
|
3769
|
-
|
|
3770
|
-
static void iter_dfloat_modf(na_loop_t* const lp) {
|
|
3771
|
-
size_t i;
|
|
3772
|
-
char *p1, *p2, *p3;
|
|
3773
|
-
ssize_t s1, s2, s3;
|
|
3774
|
-
dtype x, y, z;
|
|
3775
|
-
INIT_COUNTER(lp, i);
|
|
3776
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3777
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3778
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
3779
|
-
for (; i--;) {
|
|
3780
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3781
|
-
m_modf(x, y, z);
|
|
3782
|
-
SET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3783
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
3784
|
-
}
|
|
3785
|
-
}
|
|
3786
|
-
|
|
3787
|
-
static VALUE dfloat_modf(VALUE self) {
|
|
3788
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3789
|
-
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
3790
|
-
ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
|
|
3791
|
-
|
|
3792
|
-
return na_ndloop(&ndf, 1, self);
|
|
3793
|
-
}
|
|
3794
|
-
|
|
3795
|
-
static void iter_dfloat_gt(na_loop_t* const lp) {
|
|
3796
|
-
size_t i;
|
|
3797
|
-
char *p1, *p2;
|
|
3798
|
-
BIT_DIGIT* a3;
|
|
3799
|
-
size_t p3;
|
|
3800
|
-
ssize_t s1, s2, s3;
|
|
3801
|
-
dtype x, y;
|
|
3802
|
-
BIT_DIGIT b;
|
|
3803
|
-
INIT_COUNTER(lp, i);
|
|
3804
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3805
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3806
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3807
|
-
for (; i--;) {
|
|
3808
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3809
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3810
|
-
b = (m_gt(x, y)) ? 1 : 0;
|
|
3811
|
-
STORE_BIT(a3, p3, b);
|
|
3812
|
-
p3 += s3;
|
|
3813
|
-
}
|
|
3814
|
-
}
|
|
3815
|
-
|
|
3816
|
-
static VALUE dfloat_gt_self(VALUE self, VALUE other) {
|
|
3817
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3818
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3819
|
-
ndfunc_t ndf = { iter_dfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3820
|
-
|
|
3821
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3822
|
-
}
|
|
3823
|
-
|
|
3824
|
-
static VALUE dfloat_gt(VALUE self, VALUE other) {
|
|
3825
|
-
|
|
3826
|
-
VALUE klass, v;
|
|
3827
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3828
|
-
if (klass == cT) {
|
|
3829
|
-
return dfloat_gt_self(self, other);
|
|
3830
|
-
} else {
|
|
3831
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3832
|
-
return rb_funcall(v, id_gt, 1, other);
|
|
3833
|
-
}
|
|
3834
|
-
}
|
|
3835
|
-
|
|
3836
|
-
static void iter_dfloat_ge(na_loop_t* const lp) {
|
|
3837
|
-
size_t i;
|
|
3838
|
-
char *p1, *p2;
|
|
3839
|
-
BIT_DIGIT* a3;
|
|
3840
|
-
size_t p3;
|
|
3841
|
-
ssize_t s1, s2, s3;
|
|
3842
|
-
dtype x, y;
|
|
3843
|
-
BIT_DIGIT b;
|
|
3844
|
-
INIT_COUNTER(lp, i);
|
|
3845
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3846
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3847
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3848
|
-
for (; i--;) {
|
|
3849
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3850
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3851
|
-
b = (m_ge(x, y)) ? 1 : 0;
|
|
3852
|
-
STORE_BIT(a3, p3, b);
|
|
3853
|
-
p3 += s3;
|
|
3854
|
-
}
|
|
3855
|
-
}
|
|
3856
|
-
|
|
3857
|
-
static VALUE dfloat_ge_self(VALUE self, VALUE other) {
|
|
3858
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3859
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3860
|
-
ndfunc_t ndf = { iter_dfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3861
|
-
|
|
3862
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3863
|
-
}
|
|
3864
|
-
|
|
3865
|
-
static VALUE dfloat_ge(VALUE self, VALUE other) {
|
|
3866
|
-
|
|
3867
|
-
VALUE klass, v;
|
|
3868
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3869
|
-
if (klass == cT) {
|
|
3870
|
-
return dfloat_ge_self(self, other);
|
|
3871
|
-
} else {
|
|
3872
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3873
|
-
return rb_funcall(v, id_ge, 1, other);
|
|
3874
|
-
}
|
|
3875
|
-
}
|
|
3876
|
-
|
|
3877
|
-
static void iter_dfloat_lt(na_loop_t* const lp) {
|
|
3878
|
-
size_t i;
|
|
3879
|
-
char *p1, *p2;
|
|
3880
|
-
BIT_DIGIT* a3;
|
|
3881
|
-
size_t p3;
|
|
3882
|
-
ssize_t s1, s2, s3;
|
|
3883
|
-
dtype x, y;
|
|
3884
|
-
BIT_DIGIT b;
|
|
3885
|
-
INIT_COUNTER(lp, i);
|
|
3886
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3887
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3888
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3889
|
-
for (; i--;) {
|
|
3890
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3891
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3892
|
-
b = (m_lt(x, y)) ? 1 : 0;
|
|
3893
|
-
STORE_BIT(a3, p3, b);
|
|
3894
|
-
p3 += s3;
|
|
3895
|
-
}
|
|
3896
|
-
}
|
|
3897
|
-
|
|
3898
|
-
static VALUE dfloat_lt_self(VALUE self, VALUE other) {
|
|
3899
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3900
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3901
|
-
ndfunc_t ndf = { iter_dfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3902
|
-
|
|
3903
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3904
|
-
}
|
|
3905
|
-
|
|
3906
|
-
static VALUE dfloat_lt(VALUE self, VALUE other) {
|
|
3907
|
-
|
|
3908
|
-
VALUE klass, v;
|
|
3909
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3910
|
-
if (klass == cT) {
|
|
3911
|
-
return dfloat_lt_self(self, other);
|
|
3912
|
-
} else {
|
|
3913
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3914
|
-
return rb_funcall(v, id_lt, 1, other);
|
|
3915
|
-
}
|
|
3916
|
-
}
|
|
3917
|
-
|
|
3918
|
-
static void iter_dfloat_le(na_loop_t* const lp) {
|
|
3919
|
-
size_t i;
|
|
3920
|
-
char *p1, *p2;
|
|
3921
|
-
BIT_DIGIT* a3;
|
|
3922
|
-
size_t p3;
|
|
3923
|
-
ssize_t s1, s2, s3;
|
|
3924
|
-
dtype x, y;
|
|
3925
|
-
BIT_DIGIT b;
|
|
3926
|
-
INIT_COUNTER(lp, i);
|
|
3927
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3928
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3929
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3930
|
-
for (; i--;) {
|
|
3931
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3932
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3933
|
-
b = (m_le(x, y)) ? 1 : 0;
|
|
3934
|
-
STORE_BIT(a3, p3, b);
|
|
3935
|
-
p3 += s3;
|
|
3936
|
-
}
|
|
3937
|
-
}
|
|
3938
|
-
|
|
3939
|
-
static VALUE dfloat_le_self(VALUE self, VALUE other) {
|
|
3940
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3941
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3942
|
-
ndfunc_t ndf = { iter_dfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3943
|
-
|
|
3944
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3945
|
-
}
|
|
3946
|
-
|
|
3947
|
-
static VALUE dfloat_le(VALUE self, VALUE other) {
|
|
3948
|
-
|
|
3949
|
-
VALUE klass, v;
|
|
3950
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3951
|
-
if (klass == cT) {
|
|
3952
|
-
return dfloat_le_self(self, other);
|
|
3953
|
-
} else {
|
|
3954
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3955
|
-
return rb_funcall(v, id_le, 1, other);
|
|
3956
|
-
}
|
|
3957
|
-
}
|
|
3958
|
-
|
|
3959
|
-
static void iter_dfloat_isnan(na_loop_t* const lp) {
|
|
3960
|
-
size_t i;
|
|
3961
|
-
char* p1;
|
|
3962
|
-
BIT_DIGIT* a2;
|
|
3963
|
-
size_t p2;
|
|
3964
|
-
ssize_t s1, s2;
|
|
3965
|
-
size_t* idx1;
|
|
3966
|
-
dtype x;
|
|
3967
|
-
BIT_DIGIT b;
|
|
3968
|
-
INIT_COUNTER(lp, i);
|
|
3969
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3970
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
3971
|
-
if (idx1) {
|
|
3972
|
-
for (; i--;) {
|
|
3973
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3974
|
-
b = (m_isnan(x)) ? 1 : 0;
|
|
3975
|
-
STORE_BIT(a2, p2, b);
|
|
3976
|
-
p2 += s2;
|
|
3977
|
-
}
|
|
3978
|
-
} else {
|
|
3979
|
-
for (; i--;) {
|
|
3980
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3981
|
-
b = (m_isnan(x)) ? 1 : 0;
|
|
3982
|
-
STORE_BIT(a2, p2, b);
|
|
3983
|
-
p2 += s2;
|
|
1875
|
+
return;
|
|
1876
|
+
//
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
for (i = 0; i < n; i++) {
|
|
1880
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1881
|
+
x = m_sign(x);
|
|
1882
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1883
|
+
}
|
|
1884
|
+
//
|
|
3984
1885
|
}
|
|
3985
1886
|
}
|
|
3986
1887
|
}
|
|
3987
1888
|
|
|
3988
|
-
static VALUE
|
|
1889
|
+
static VALUE dfloat_sign(VALUE self) {
|
|
3989
1890
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3990
|
-
ndfunc_arg_out_t aout[1] = { {
|
|
3991
|
-
ndfunc_t ndf = {
|
|
1891
|
+
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1892
|
+
ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
|
|
3992
1893
|
|
|
3993
1894
|
return na_ndloop(&ndf, 1, self);
|
|
3994
1895
|
}
|
|
3995
1896
|
|
|
3996
|
-
static void
|
|
3997
|
-
size_t i;
|
|
3998
|
-
char*
|
|
3999
|
-
BIT_DIGIT* a2;
|
|
4000
|
-
size_t p2;
|
|
1897
|
+
static void iter_dfloat_square(na_loop_t* const lp) {
|
|
1898
|
+
size_t i, n;
|
|
1899
|
+
char *p1, *p2;
|
|
4001
1900
|
ssize_t s1, s2;
|
|
4002
|
-
size_t*
|
|
1901
|
+
size_t *idx1, *idx2;
|
|
4003
1902
|
dtype x;
|
|
4004
|
-
|
|
4005
|
-
INIT_COUNTER(lp,
|
|
1903
|
+
|
|
1904
|
+
INIT_COUNTER(lp, n);
|
|
4006
1905
|
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4007
|
-
|
|
1906
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
1907
|
+
|
|
4008
1908
|
if (idx1) {
|
|
4009
|
-
|
|
4010
|
-
|
|
4011
|
-
|
|
4012
|
-
|
|
4013
|
-
|
|
1909
|
+
if (idx2) {
|
|
1910
|
+
for (i = 0; i < n; i++) {
|
|
1911
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1912
|
+
x = m_square(x);
|
|
1913
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1914
|
+
}
|
|
1915
|
+
} else {
|
|
1916
|
+
for (i = 0; i < n; i++) {
|
|
1917
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1918
|
+
x = m_square(x);
|
|
1919
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1920
|
+
}
|
|
4014
1921
|
}
|
|
4015
1922
|
} else {
|
|
4016
|
-
|
|
4017
|
-
|
|
4018
|
-
|
|
4019
|
-
|
|
4020
|
-
|
|
1923
|
+
if (idx2) {
|
|
1924
|
+
for (i = 0; i < n; i++) {
|
|
1925
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1926
|
+
x = m_square(x);
|
|
1927
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1928
|
+
}
|
|
1929
|
+
} else {
|
|
1930
|
+
//
|
|
1931
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1932
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
1933
|
+
for (i = 0; i < n; i++) {
|
|
1934
|
+
((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
|
|
1935
|
+
}
|
|
1936
|
+
return;
|
|
1937
|
+
}
|
|
1938
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1939
|
+
//
|
|
1940
|
+
for (i = 0; i < n; i++) {
|
|
1941
|
+
*(dtype*)p2 = m_square(*(dtype*)p1);
|
|
1942
|
+
p1 += s1;
|
|
1943
|
+
p2 += s2;
|
|
1944
|
+
}
|
|
1945
|
+
return;
|
|
1946
|
+
//
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
for (i = 0; i < n; i++) {
|
|
1950
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1951
|
+
x = m_square(x);
|
|
1952
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1953
|
+
}
|
|
1954
|
+
//
|
|
4021
1955
|
}
|
|
4022
1956
|
}
|
|
4023
1957
|
}
|
|
4024
1958
|
|
|
4025
|
-
static VALUE
|
|
1959
|
+
static VALUE dfloat_square(VALUE self) {
|
|
4026
1960
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4027
|
-
ndfunc_arg_out_t aout[1] = { {
|
|
4028
|
-
ndfunc_t ndf = {
|
|
1961
|
+
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1962
|
+
ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
|
|
4029
1963
|
|
|
4030
1964
|
return na_ndloop(&ndf, 1, self);
|
|
4031
1965
|
}
|
|
4032
1966
|
|
|
4033
|
-
|
|
4034
|
-
|
|
4035
|
-
|
|
4036
|
-
|
|
4037
|
-
size_t
|
|
4038
|
-
|
|
4039
|
-
|
|
4040
|
-
|
|
4041
|
-
|
|
4042
|
-
INIT_COUNTER(lp,
|
|
4043
|
-
|
|
4044
|
-
|
|
4045
|
-
|
|
4046
|
-
|
|
4047
|
-
|
|
4048
|
-
|
|
4049
|
-
|
|
4050
|
-
|
|
1967
|
+
#define check_intdivzero(y) \
|
|
1968
|
+
{}
|
|
1969
|
+
|
|
1970
|
+
static void iter_dfloat_copysign(na_loop_t* const lp) {
|
|
1971
|
+
size_t i = 0;
|
|
1972
|
+
size_t n;
|
|
1973
|
+
char *p1, *p2, *p3;
|
|
1974
|
+
ssize_t s1, s2, s3;
|
|
1975
|
+
|
|
1976
|
+
INIT_COUNTER(lp, n);
|
|
1977
|
+
INIT_PTR(lp, 0, p1, s1);
|
|
1978
|
+
INIT_PTR(lp, 1, p2, s2);
|
|
1979
|
+
INIT_PTR(lp, 2, p3, s3);
|
|
1980
|
+
|
|
1981
|
+
//
|
|
1982
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
1983
|
+
is_aligned(p3, sizeof(dtype))) {
|
|
1984
|
+
|
|
1985
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1986
|
+
if (p1 == p3) { // inplace case
|
|
1987
|
+
for (; i < n; i++) {
|
|
1988
|
+
check_intdivzero(((dtype*)p2)[i]);
|
|
1989
|
+
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1990
|
+
}
|
|
1991
|
+
} else {
|
|
1992
|
+
for (; i < n; i++) {
|
|
1993
|
+
check_intdivzero(((dtype*)p2)[i]);
|
|
1994
|
+
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1995
|
+
}
|
|
1996
|
+
}
|
|
1997
|
+
return;
|
|
4051
1998
|
}
|
|
4052
|
-
|
|
4053
|
-
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
1999
|
+
|
|
2000
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2001
|
+
is_aligned_step(s3, sizeof(dtype))) {
|
|
2002
|
+
//
|
|
2003
|
+
|
|
2004
|
+
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2005
|
+
check_intdivzero(*(dtype*)p2);
|
|
2006
|
+
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2007
|
+
if (p1 == p3) { // inplace case
|
|
2008
|
+
for (; i < n; i++) {
|
|
2009
|
+
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
2010
|
+
}
|
|
2011
|
+
} else {
|
|
2012
|
+
for (; i < n; i++) {
|
|
2013
|
+
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
2014
|
+
}
|
|
2015
|
+
}
|
|
2016
|
+
} else {
|
|
2017
|
+
for (i = 0; i < n; i++) {
|
|
2018
|
+
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
2019
|
+
p1 += s1;
|
|
2020
|
+
p3 += s3;
|
|
2021
|
+
}
|
|
2022
|
+
}
|
|
2023
|
+
} else {
|
|
2024
|
+
if (p1 == p3) { // inplace case
|
|
2025
|
+
for (i = 0; i < n; i++) {
|
|
2026
|
+
check_intdivzero(*(dtype*)p2);
|
|
2027
|
+
*(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
2028
|
+
p1 += s1;
|
|
2029
|
+
p2 += s2;
|
|
2030
|
+
}
|
|
2031
|
+
} else {
|
|
2032
|
+
for (i = 0; i < n; i++) {
|
|
2033
|
+
check_intdivzero(*(dtype*)p2);
|
|
2034
|
+
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
2035
|
+
p1 += s1;
|
|
2036
|
+
p2 += s2;
|
|
2037
|
+
p3 += s3;
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
}
|
|
2041
|
+
|
|
2042
|
+
return;
|
|
2043
|
+
//
|
|
4058
2044
|
}
|
|
4059
2045
|
}
|
|
2046
|
+
for (i = 0; i < n; i++) {
|
|
2047
|
+
dtype x, y, z;
|
|
2048
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2049
|
+
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2050
|
+
check_intdivzero(y);
|
|
2051
|
+
z = m_copysign(x, y);
|
|
2052
|
+
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2053
|
+
}
|
|
2054
|
+
//
|
|
4060
2055
|
}
|
|
2056
|
+
#undef check_intdivzero
|
|
4061
2057
|
|
|
4062
|
-
static VALUE
|
|
4063
|
-
ndfunc_arg_in_t ain[
|
|
4064
|
-
ndfunc_arg_out_t aout[1] = { {
|
|
4065
|
-
ndfunc_t ndf = {
|
|
2058
|
+
static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
|
|
2059
|
+
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2060
|
+
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2061
|
+
ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
|
|
4066
2062
|
|
|
4067
|
-
return na_ndloop(&ndf,
|
|
2063
|
+
return na_ndloop(&ndf, 2, self, other);
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
static VALUE dfloat_copysign(VALUE self, VALUE other) {
|
|
2067
|
+
|
|
2068
|
+
VALUE klass, v;
|
|
2069
|
+
|
|
2070
|
+
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2071
|
+
if (klass == cT) {
|
|
2072
|
+
return dfloat_copysign_self(self, other);
|
|
2073
|
+
} else {
|
|
2074
|
+
v = rb_funcall(klass, id_cast, 1, self);
|
|
2075
|
+
return rb_funcall(v, id_copysign, 1, other);
|
|
2076
|
+
}
|
|
4068
2077
|
}
|
|
4069
2078
|
|
|
4070
|
-
static void
|
|
2079
|
+
static void iter_dfloat_signbit(na_loop_t* const lp) {
|
|
4071
2080
|
size_t i;
|
|
4072
2081
|
char* p1;
|
|
4073
2082
|
BIT_DIGIT* a2;
|
|
@@ -4082,61 +2091,49 @@ static void iter_dfloat_isneginf(na_loop_t* const lp) {
|
|
|
4082
2091
|
if (idx1) {
|
|
4083
2092
|
for (; i--;) {
|
|
4084
2093
|
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
4085
|
-
b = (
|
|
2094
|
+
b = (m_signbit(x)) ? 1 : 0;
|
|
4086
2095
|
STORE_BIT(a2, p2, b);
|
|
4087
2096
|
p2 += s2;
|
|
4088
2097
|
}
|
|
4089
2098
|
} else {
|
|
4090
2099
|
for (; i--;) {
|
|
4091
2100
|
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4092
|
-
b = (
|
|
2101
|
+
b = (m_signbit(x)) ? 1 : 0;
|
|
4093
2102
|
STORE_BIT(a2, p2, b);
|
|
4094
2103
|
p2 += s2;
|
|
4095
2104
|
}
|
|
4096
2105
|
}
|
|
4097
2106
|
}
|
|
4098
2107
|
|
|
4099
|
-
static VALUE
|
|
2108
|
+
static VALUE dfloat_signbit(VALUE self) {
|
|
4100
2109
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4101
2110
|
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
4102
|
-
ndfunc_t ndf = {
|
|
2111
|
+
ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
|
|
4103
2112
|
|
|
4104
2113
|
return na_ndloop(&ndf, 1, self);
|
|
4105
2114
|
}
|
|
4106
2115
|
|
|
4107
|
-
static void
|
|
2116
|
+
static void iter_dfloat_modf(na_loop_t* const lp) {
|
|
4108
2117
|
size_t i;
|
|
4109
|
-
char*
|
|
4110
|
-
|
|
4111
|
-
|
|
4112
|
-
ssize_t s1, s2;
|
|
4113
|
-
size_t* idx1;
|
|
4114
|
-
dtype x;
|
|
4115
|
-
BIT_DIGIT b;
|
|
2118
|
+
char *p1, *p2, *p3;
|
|
2119
|
+
ssize_t s1, s2, s3;
|
|
2120
|
+
dtype x, y, z;
|
|
4116
2121
|
INIT_COUNTER(lp, i);
|
|
4117
|
-
|
|
4118
|
-
|
|
4119
|
-
|
|
4120
|
-
|
|
4121
|
-
|
|
4122
|
-
|
|
4123
|
-
|
|
4124
|
-
|
|
4125
|
-
}
|
|
4126
|
-
} else {
|
|
4127
|
-
for (; i--;) {
|
|
4128
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4129
|
-
b = (m_isfinite(x)) ? 1 : 0;
|
|
4130
|
-
STORE_BIT(a2, p2, b);
|
|
4131
|
-
p2 += s2;
|
|
4132
|
-
}
|
|
2122
|
+
INIT_PTR(lp, 0, p1, s1);
|
|
2123
|
+
INIT_PTR(lp, 1, p2, s2);
|
|
2124
|
+
INIT_PTR(lp, 2, p3, s3);
|
|
2125
|
+
for (; i--;) {
|
|
2126
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2127
|
+
m_modf(x, y, z);
|
|
2128
|
+
SET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2129
|
+
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
4133
2130
|
}
|
|
4134
2131
|
}
|
|
4135
2132
|
|
|
4136
|
-
static VALUE
|
|
2133
|
+
static VALUE dfloat_modf(VALUE self) {
|
|
4137
2134
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4138
|
-
ndfunc_arg_out_t aout[
|
|
4139
|
-
ndfunc_t ndf = {
|
|
2135
|
+
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
2136
|
+
ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
|
|
4140
2137
|
|
|
4141
2138
|
return na_ndloop(&ndf, 1, self);
|
|
4142
2139
|
}
|