numo-narray-alt 0.9.11 → 0.9.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +7 -0
- data/ext/numo/narray/numo/narray.h +2 -2
- data/ext/numo/narray/numo/types/robj_macro.h +1 -1
- data/ext/numo/narray/src/mh/bincount.h +233 -0
- data/ext/numo/narray/src/mh/bit/and.h +225 -0
- data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
- data/ext/numo/narray/src/mh/bit/not.h +173 -0
- data/ext/numo/narray/src/mh/bit/or.h +225 -0
- data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
- data/ext/numo/narray/src/mh/bit/xor.h +225 -0
- data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
- data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
- data/ext/numo/narray/src/mh/comp/eq.h +26 -0
- data/ext/numo/narray/src/mh/comp/ge.h +26 -0
- data/ext/numo/narray/src/mh/comp/gt.h +26 -0
- data/ext/numo/narray/src/mh/comp/le.h +26 -0
- data/ext/numo/narray/src/mh/comp/lt.h +26 -0
- data/ext/numo/narray/src/mh/comp/ne.h +26 -0
- data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
- data/ext/numo/narray/src/mh/divmod.h +142 -0
- data/ext/numo/narray/src/mh/eye.h +1 -1
- data/ext/numo/narray/src/mh/fill.h +94 -0
- data/ext/numo/narray/src/mh/format.h +108 -0
- data/ext/numo/narray/src/mh/format_to_a.h +89 -0
- data/ext/numo/narray/src/mh/inspect.h +33 -0
- data/ext/numo/narray/src/mh/isfinite.h +42 -0
- data/ext/numo/narray/src/mh/isinf.h +42 -0
- data/ext/numo/narray/src/mh/isnan.h +42 -0
- data/ext/numo/narray/src/mh/isneginf.h +42 -0
- data/ext/numo/narray/src/mh/isposinf.h +42 -0
- data/ext/numo/narray/src/mh/math/acos.h +2 -2
- data/ext/numo/narray/src/mh/math/acosh.h +2 -2
- data/ext/numo/narray/src/mh/math/asin.h +2 -2
- data/ext/numo/narray/src/mh/math/asinh.h +2 -2
- data/ext/numo/narray/src/mh/math/atan.h +2 -2
- data/ext/numo/narray/src/mh/math/atan2.h +3 -3
- data/ext/numo/narray/src/mh/math/atanh.h +2 -2
- data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
- data/ext/numo/narray/src/mh/math/cos.h +2 -2
- data/ext/numo/narray/src/mh/math/cosh.h +2 -2
- data/ext/numo/narray/src/mh/math/erf.h +2 -2
- data/ext/numo/narray/src/mh/math/erfc.h +2 -2
- data/ext/numo/narray/src/mh/math/exp.h +2 -2
- data/ext/numo/narray/src/mh/math/exp10.h +2 -2
- data/ext/numo/narray/src/mh/math/exp2.h +2 -2
- data/ext/numo/narray/src/mh/math/expm1.h +2 -2
- data/ext/numo/narray/src/mh/math/frexp.h +3 -3
- data/ext/numo/narray/src/mh/math/hypot.h +3 -3
- data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
- data/ext/numo/narray/src/mh/math/log.h +2 -2
- data/ext/numo/narray/src/mh/math/log10.h +2 -2
- data/ext/numo/narray/src/mh/math/log1p.h +2 -2
- data/ext/numo/narray/src/mh/math/log2.h +2 -2
- data/ext/numo/narray/src/mh/math/sin.h +2 -2
- data/ext/numo/narray/src/mh/math/sinc.h +2 -2
- data/ext/numo/narray/src/mh/math/sinh.h +2 -2
- data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
- data/ext/numo/narray/src/mh/math/tan.h +2 -2
- data/ext/numo/narray/src/mh/math/tanh.h +2 -2
- data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
- data/ext/numo/narray/src/mh/op/add.h +78 -0
- data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
- data/ext/numo/narray/src/mh/op/div.h +118 -0
- data/ext/numo/narray/src/mh/op/mod.h +108 -0
- data/ext/numo/narray/src/mh/op/mul.h +78 -0
- data/ext/numo/narray/src/mh/op/sub.h +78 -0
- data/ext/numo/narray/src/mh/rand.h +2 -2
- data/ext/numo/narray/src/mh/round/ceil.h +11 -0
- data/ext/numo/narray/src/mh/round/floor.h +11 -0
- data/ext/numo/narray/src/mh/round/rint.h +9 -0
- data/ext/numo/narray/src/mh/round/round.h +11 -0
- data/ext/numo/narray/src/mh/round/trunc.h +11 -0
- data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
- data/ext/numo/narray/src/mh/to_a.h +78 -0
- data/ext/numo/narray/src/t_bit.c +45 -234
- data/ext/numo/narray/src/t_dcomplex.c +584 -1809
- data/ext/numo/narray/src/t_dfloat.c +429 -2432
- data/ext/numo/narray/src/t_int16.c +481 -2283
- data/ext/numo/narray/src/t_int32.c +481 -2283
- data/ext/numo/narray/src/t_int64.c +481 -2283
- data/ext/numo/narray/src/t_int8.c +408 -1873
- data/ext/numo/narray/src/t_robject.c +448 -1977
- data/ext/numo/narray/src/t_scomplex.c +584 -1809
- data/ext/numo/narray/src/t_sfloat.c +429 -2434
- data/ext/numo/narray/src/t_uint16.c +480 -2278
- data/ext/numo/narray/src/t_uint32.c +480 -2278
- data/ext/numo/narray/src/t_uint64.c +480 -2278
- data/ext/numo/narray/src/t_uint8.c +407 -1868
- metadata +41 -2
|
@@ -42,7 +42,36 @@ static ID id_to_a;
|
|
|
42
42
|
VALUE cT;
|
|
43
43
|
extern VALUE cRT;
|
|
44
44
|
|
|
45
|
+
#include "mh/coerce_cast.h"
|
|
46
|
+
#include "mh/to_a.h"
|
|
47
|
+
#include "mh/fill.h"
|
|
48
|
+
#include "mh/format.h"
|
|
49
|
+
#include "mh/format_to_a.h"
|
|
50
|
+
#include "mh/inspect.h"
|
|
51
|
+
#include "mh/op/add.h"
|
|
52
|
+
#include "mh/op/sub.h"
|
|
53
|
+
#include "mh/op/mul.h"
|
|
54
|
+
#include "mh/op/div.h"
|
|
55
|
+
#include "mh/op/mod.h"
|
|
56
|
+
#include "mh/divmod.h"
|
|
57
|
+
#include "mh/round/floor.h"
|
|
58
|
+
#include "mh/round/round.h"
|
|
59
|
+
#include "mh/round/ceil.h"
|
|
60
|
+
#include "mh/round/trunc.h"
|
|
61
|
+
#include "mh/round/rint.h"
|
|
62
|
+
#include "mh/comp/eq.h"
|
|
63
|
+
#include "mh/comp/ne.h"
|
|
64
|
+
#include "mh/comp/nearly_eq.h"
|
|
65
|
+
#include "mh/comp/gt.h"
|
|
66
|
+
#include "mh/comp/ge.h"
|
|
67
|
+
#include "mh/comp/lt.h"
|
|
68
|
+
#include "mh/comp/le.h"
|
|
45
69
|
#include "mh/clip.h"
|
|
70
|
+
#include "mh/isnan.h"
|
|
71
|
+
#include "mh/isinf.h"
|
|
72
|
+
#include "mh/isposinf.h"
|
|
73
|
+
#include "mh/isneginf.h"
|
|
74
|
+
#include "mh/isfinite.h"
|
|
46
75
|
#include "mh/sum.h"
|
|
47
76
|
#include "mh/prod.h"
|
|
48
77
|
#include "mh/mean.h"
|
|
@@ -98,7 +127,43 @@ extern VALUE cRT;
|
|
|
98
127
|
|
|
99
128
|
typedef float sfloat; // Type aliases for shorter notation
|
|
100
129
|
// following the codebase naming convention.
|
|
130
|
+
DEF_NARRAY_COERCE_CAST_METHOD_FUNC(sfloat)
|
|
131
|
+
DEF_NARRAY_TO_A_METHOD_FUNC(sfloat)
|
|
132
|
+
DEF_NARRAY_FILL_METHOD_FUNC(sfloat)
|
|
133
|
+
DEF_NARRAY_FORMAT_METHOD_FUNC(sfloat)
|
|
134
|
+
DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(sfloat)
|
|
135
|
+
DEF_NARRAY_INSPECT_METHOD_FUNC(sfloat)
|
|
136
|
+
#ifdef __SSE2__
|
|
137
|
+
DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
|
|
138
|
+
DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
|
|
139
|
+
DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
|
|
140
|
+
DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
|
|
141
|
+
#else
|
|
142
|
+
DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
143
|
+
DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
144
|
+
DEF_NARRAY_MUL_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
145
|
+
DEF_NARRAY_FLT_DIV_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
146
|
+
#endif
|
|
147
|
+
DEF_NARRAY_FLT_MOD_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
148
|
+
DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
149
|
+
DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
150
|
+
DEF_NARRAY_FLT_ROUND_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
151
|
+
DEF_NARRAY_FLT_CEIL_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
152
|
+
DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
153
|
+
DEF_NARRAY_FLT_RINT_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
154
|
+
DEF_NARRAY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
155
|
+
DEF_NARRAY_NE_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
156
|
+
DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
157
|
+
DEF_NARRAY_GT_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
158
|
+
DEF_NARRAY_GE_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
159
|
+
DEF_NARRAY_LT_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
160
|
+
DEF_NARRAY_LE_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
101
161
|
DEF_NARRAY_CLIP_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
162
|
+
DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
163
|
+
DEF_NARRAY_FLT_ISINF_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
164
|
+
DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
165
|
+
DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
166
|
+
DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
102
167
|
DEF_NARRAY_FLT_SUM_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
103
168
|
DEF_NARRAY_FLT_PROD_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
104
169
|
DEF_NARRAY_FLT_MEAN_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
|
|
@@ -1275,171 +1340,6 @@ static VALUE sfloat_aset(int argc, VALUE* argv, VALUE self) {
|
|
|
1275
1340
|
return argv[argc];
|
|
1276
1341
|
}
|
|
1277
1342
|
|
|
1278
|
-
static VALUE sfloat_coerce_cast(VALUE self, VALUE type) {
|
|
1279
|
-
return Qnil;
|
|
1280
|
-
}
|
|
1281
|
-
|
|
1282
|
-
static void iter_sfloat_to_a(na_loop_t* const lp) {
|
|
1283
|
-
size_t i, s1;
|
|
1284
|
-
char* p1;
|
|
1285
|
-
size_t* idx1;
|
|
1286
|
-
dtype x;
|
|
1287
|
-
volatile VALUE a, y;
|
|
1288
|
-
|
|
1289
|
-
INIT_COUNTER(lp, i);
|
|
1290
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1291
|
-
a = rb_ary_new2(i);
|
|
1292
|
-
rb_ary_push(lp->args[1].value, a);
|
|
1293
|
-
if (idx1) {
|
|
1294
|
-
for (; i--;) {
|
|
1295
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1296
|
-
y = m_data_to_num(x);
|
|
1297
|
-
rb_ary_push(a, y);
|
|
1298
|
-
}
|
|
1299
|
-
} else {
|
|
1300
|
-
for (; i--;) {
|
|
1301
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1302
|
-
y = m_data_to_num(x);
|
|
1303
|
-
rb_ary_push(a, y);
|
|
1304
|
-
}
|
|
1305
|
-
}
|
|
1306
|
-
}
|
|
1307
|
-
|
|
1308
|
-
static VALUE sfloat_to_a(VALUE self) {
|
|
1309
|
-
ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
|
|
1310
|
-
ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
|
|
1311
|
-
ndfunc_t ndf = { iter_sfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
|
|
1312
|
-
return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
static void iter_sfloat_fill(na_loop_t* const lp) {
|
|
1316
|
-
size_t i;
|
|
1317
|
-
char* p1;
|
|
1318
|
-
ssize_t s1;
|
|
1319
|
-
size_t* idx1;
|
|
1320
|
-
VALUE x = lp->option;
|
|
1321
|
-
dtype y;
|
|
1322
|
-
INIT_COUNTER(lp, i);
|
|
1323
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1324
|
-
y = m_num_to_data(x);
|
|
1325
|
-
if (idx1) {
|
|
1326
|
-
for (; i--;) {
|
|
1327
|
-
SET_DATA_INDEX(p1, idx1, dtype, y);
|
|
1328
|
-
}
|
|
1329
|
-
} else {
|
|
1330
|
-
for (; i--;) {
|
|
1331
|
-
SET_DATA_STRIDE(p1, s1, dtype, y);
|
|
1332
|
-
}
|
|
1333
|
-
}
|
|
1334
|
-
}
|
|
1335
|
-
|
|
1336
|
-
static VALUE sfloat_fill(VALUE self, VALUE val) {
|
|
1337
|
-
ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
|
|
1338
|
-
ndfunc_t ndf = { iter_sfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
|
|
1339
|
-
|
|
1340
|
-
na_ndloop(&ndf, 2, self, val);
|
|
1341
|
-
return self;
|
|
1342
|
-
}
|
|
1343
|
-
|
|
1344
|
-
static VALUE format_sfloat(VALUE fmt, dtype* x) {
|
|
1345
|
-
// fix-me
|
|
1346
|
-
char s[48];
|
|
1347
|
-
int n;
|
|
1348
|
-
|
|
1349
|
-
if (NIL_P(fmt)) {
|
|
1350
|
-
n = m_sprintf(s, *x);
|
|
1351
|
-
return rb_str_new(s, n);
|
|
1352
|
-
}
|
|
1353
|
-
return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
static void iter_sfloat_format(na_loop_t* const lp) {
|
|
1357
|
-
size_t i;
|
|
1358
|
-
char *p1, *p2;
|
|
1359
|
-
ssize_t s1, s2;
|
|
1360
|
-
size_t* idx1;
|
|
1361
|
-
dtype* x;
|
|
1362
|
-
VALUE y;
|
|
1363
|
-
VALUE fmt = lp->option;
|
|
1364
|
-
INIT_COUNTER(lp, i);
|
|
1365
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1366
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
1367
|
-
if (idx1) {
|
|
1368
|
-
for (; i--;) {
|
|
1369
|
-
x = (dtype*)(p1 + *idx1);
|
|
1370
|
-
idx1++;
|
|
1371
|
-
y = format_sfloat(fmt, x);
|
|
1372
|
-
SET_DATA_STRIDE(p2, s2, VALUE, y);
|
|
1373
|
-
}
|
|
1374
|
-
} else {
|
|
1375
|
-
for (; i--;) {
|
|
1376
|
-
x = (dtype*)p1;
|
|
1377
|
-
p1 += s1;
|
|
1378
|
-
y = format_sfloat(fmt, x);
|
|
1379
|
-
SET_DATA_STRIDE(p2, s2, VALUE, y);
|
|
1380
|
-
}
|
|
1381
|
-
}
|
|
1382
|
-
}
|
|
1383
|
-
|
|
1384
|
-
static VALUE sfloat_format(int argc, VALUE* argv, VALUE self) {
|
|
1385
|
-
VALUE fmt = Qnil;
|
|
1386
|
-
|
|
1387
|
-
ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
|
|
1388
|
-
ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
|
|
1389
|
-
ndfunc_t ndf = { iter_sfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
|
|
1390
|
-
|
|
1391
|
-
rb_scan_args(argc, argv, "01", &fmt);
|
|
1392
|
-
return na_ndloop(&ndf, 2, self, fmt);
|
|
1393
|
-
}
|
|
1394
|
-
|
|
1395
|
-
static void iter_sfloat_format_to_a(na_loop_t* const lp) {
|
|
1396
|
-
size_t i;
|
|
1397
|
-
char* p1;
|
|
1398
|
-
ssize_t s1;
|
|
1399
|
-
size_t* idx1;
|
|
1400
|
-
dtype* x;
|
|
1401
|
-
VALUE y;
|
|
1402
|
-
volatile VALUE a;
|
|
1403
|
-
VALUE fmt = lp->option;
|
|
1404
|
-
INIT_COUNTER(lp, i);
|
|
1405
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1406
|
-
a = rb_ary_new2(i);
|
|
1407
|
-
rb_ary_push(lp->args[1].value, a);
|
|
1408
|
-
if (idx1) {
|
|
1409
|
-
for (; i--;) {
|
|
1410
|
-
x = (dtype*)(p1 + *idx1);
|
|
1411
|
-
idx1++;
|
|
1412
|
-
y = format_sfloat(fmt, x);
|
|
1413
|
-
rb_ary_push(a, y);
|
|
1414
|
-
}
|
|
1415
|
-
} else {
|
|
1416
|
-
for (; i--;) {
|
|
1417
|
-
x = (dtype*)p1;
|
|
1418
|
-
p1 += s1;
|
|
1419
|
-
y = format_sfloat(fmt, x);
|
|
1420
|
-
rb_ary_push(a, y);
|
|
1421
|
-
}
|
|
1422
|
-
}
|
|
1423
|
-
}
|
|
1424
|
-
|
|
1425
|
-
static VALUE sfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
|
|
1426
|
-
VALUE fmt = Qnil;
|
|
1427
|
-
ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
|
|
1428
|
-
ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
|
|
1429
|
-
ndfunc_t ndf = { iter_sfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
|
|
1430
|
-
|
|
1431
|
-
rb_scan_args(argc, argv, "01", &fmt);
|
|
1432
|
-
return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
|
|
1433
|
-
}
|
|
1434
|
-
|
|
1435
|
-
static VALUE iter_sfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
|
|
1436
|
-
return format_sfloat(fmt, (dtype*)(ptr + pos));
|
|
1437
|
-
}
|
|
1438
|
-
|
|
1439
|
-
static VALUE sfloat_inspect(VALUE ary) {
|
|
1440
|
-
return na_ndloop_inspect(ary, iter_sfloat_inspect, Qnil);
|
|
1441
|
-
}
|
|
1442
|
-
|
|
1443
1343
|
static void iter_sfloat_each(na_loop_t* const lp) {
|
|
1444
1344
|
size_t i, s1;
|
|
1445
1345
|
char* p1;
|
|
@@ -1722,2354 +1622,461 @@ static VALUE sfloat_abs(VALUE self) {
|
|
|
1722
1622
|
return na_ndloop(&ndf, 1, self);
|
|
1723
1623
|
}
|
|
1724
1624
|
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
static void iter_sfloat_add(na_loop_t* const lp) {
|
|
1729
|
-
size_t i = 0;
|
|
1730
|
-
size_t n;
|
|
1625
|
+
static void iter_sfloat_pow(na_loop_t* const lp) {
|
|
1626
|
+
size_t i;
|
|
1731
1627
|
char *p1, *p2, *p3;
|
|
1732
1628
|
ssize_t s1, s2, s3;
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
size_t cnt;
|
|
1736
|
-
size_t cnt_simd_loop = -1;
|
|
1737
|
-
|
|
1738
|
-
__m128 a;
|
|
1739
|
-
__m128 b;
|
|
1740
|
-
|
|
1741
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
1742
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
1743
|
-
#endif
|
|
1744
|
-
INIT_COUNTER(lp, n);
|
|
1629
|
+
dtype x, y;
|
|
1630
|
+
INIT_COUNTER(lp, i);
|
|
1745
1631
|
INIT_PTR(lp, 0, p1, s1);
|
|
1746
1632
|
INIT_PTR(lp, 1, p2, s2);
|
|
1747
1633
|
INIT_PTR(lp, 2, p3, s3);
|
|
1748
|
-
|
|
1749
|
-
//
|
|
1750
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
1751
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
1752
|
-
|
|
1753
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1754
|
-
#ifdef __SSE2__
|
|
1755
|
-
// Check number of elements. & Check same alignment.
|
|
1756
|
-
if ((n >= num_pack) &&
|
|
1757
|
-
is_same_aligned3(
|
|
1758
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
1759
|
-
)) {
|
|
1760
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1761
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1762
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1763
|
-
);
|
|
1764
|
-
#endif
|
|
1765
|
-
if (p1 == p3) { // inplace case
|
|
1766
|
-
#ifdef __SSE2__
|
|
1767
|
-
for (; i < cnt; i++) {
|
|
1768
|
-
#else
|
|
1769
|
-
for (; i < n; i++) {
|
|
1770
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1771
|
-
#endif
|
|
1772
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1773
|
-
}
|
|
1774
|
-
} else {
|
|
1775
|
-
#ifdef __SSE2__
|
|
1776
|
-
for (; i < cnt; i++) {
|
|
1777
|
-
#else
|
|
1778
|
-
for (; i < n; i++) {
|
|
1779
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1780
|
-
#endif
|
|
1781
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1782
|
-
}
|
|
1783
|
-
}
|
|
1784
|
-
|
|
1785
|
-
#ifdef __SSE2__
|
|
1786
|
-
// Get the count of SIMD computation loops.
|
|
1787
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1788
|
-
|
|
1789
|
-
// SIMD computation.
|
|
1790
|
-
if (p1 == p3) { // inplace case
|
|
1791
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1792
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
1793
|
-
b = _mm_load_ps(&((dtype*)p2)[i]);
|
|
1794
|
-
a = _mm_add_ps(a, b);
|
|
1795
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
1796
|
-
}
|
|
1797
|
-
} else {
|
|
1798
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1799
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
1800
|
-
b = _mm_load_ps(&((dtype*)p2)[i]);
|
|
1801
|
-
a = _mm_add_ps(a, b);
|
|
1802
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
1803
|
-
}
|
|
1804
|
-
}
|
|
1805
|
-
}
|
|
1806
|
-
|
|
1807
|
-
// Compute the remainder of the SIMD operation.
|
|
1808
|
-
if (cnt_simd_loop != 0) {
|
|
1809
|
-
if (p1 == p3) { // inplace case
|
|
1810
|
-
for (; i < n; i++) {
|
|
1811
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1812
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1813
|
-
}
|
|
1814
|
-
} else {
|
|
1815
|
-
for (; i < n; i++) {
|
|
1816
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
1817
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1818
|
-
}
|
|
1819
|
-
}
|
|
1820
|
-
}
|
|
1821
|
-
#endif
|
|
1822
|
-
return;
|
|
1823
|
-
}
|
|
1824
|
-
|
|
1825
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
1826
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
1827
|
-
//
|
|
1828
|
-
|
|
1829
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
1830
|
-
check_intdivzero(*(dtype*)p2);
|
|
1831
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1832
|
-
#ifdef __SSE2__
|
|
1833
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
1834
|
-
b = _mm_load1_ps(&((dtype*)p2)[0]);
|
|
1835
|
-
|
|
1836
|
-
// Check number of elements. & Check same alignment.
|
|
1837
|
-
if ((n >= num_pack) &&
|
|
1838
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
1839
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1840
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1841
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1842
|
-
);
|
|
1843
|
-
#endif
|
|
1844
|
-
if (p1 == p3) { // inplace case
|
|
1845
|
-
#ifdef __SSE2__
|
|
1846
|
-
for (; i < cnt; i++) {
|
|
1847
|
-
#else
|
|
1848
|
-
for (; i < n; i++) {
|
|
1849
|
-
#endif
|
|
1850
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1851
|
-
}
|
|
1852
|
-
} else {
|
|
1853
|
-
#ifdef __SSE2__
|
|
1854
|
-
for (; i < cnt; i++) {
|
|
1855
|
-
#else
|
|
1856
|
-
for (; i < n; i++) {
|
|
1857
|
-
#endif
|
|
1858
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1859
|
-
}
|
|
1860
|
-
}
|
|
1861
|
-
|
|
1862
|
-
#ifdef __SSE2__
|
|
1863
|
-
// Get the count of SIMD computation loops.
|
|
1864
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1865
|
-
|
|
1866
|
-
// SIMD computation.
|
|
1867
|
-
if (p1 == p3) { // inplace case
|
|
1868
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1869
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
1870
|
-
a = _mm_add_ps(a, b);
|
|
1871
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
1872
|
-
}
|
|
1873
|
-
} else {
|
|
1874
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
1875
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
1876
|
-
a = _mm_add_ps(a, b);
|
|
1877
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
1878
|
-
}
|
|
1879
|
-
}
|
|
1880
|
-
}
|
|
1881
|
-
|
|
1882
|
-
// Compute the remainder of the SIMD operation.
|
|
1883
|
-
if (cnt_simd_loop != 0) {
|
|
1884
|
-
if (p1 == p3) { // inplace case
|
|
1885
|
-
for (; i < n; i++) {
|
|
1886
|
-
((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1887
|
-
}
|
|
1888
|
-
} else {
|
|
1889
|
-
for (; i < n; i++) {
|
|
1890
|
-
((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
|
|
1891
|
-
}
|
|
1892
|
-
}
|
|
1893
|
-
}
|
|
1894
|
-
#endif
|
|
1895
|
-
} else {
|
|
1896
|
-
for (i = 0; i < n; i++) {
|
|
1897
|
-
*(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
|
|
1898
|
-
p1 += s1;
|
|
1899
|
-
p3 += s3;
|
|
1900
|
-
}
|
|
1901
|
-
}
|
|
1902
|
-
} else {
|
|
1903
|
-
if (p1 == p3) { // inplace case
|
|
1904
|
-
for (i = 0; i < n; i++) {
|
|
1905
|
-
check_intdivzero(*(dtype*)p2);
|
|
1906
|
-
*(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
|
|
1907
|
-
p1 += s1;
|
|
1908
|
-
p2 += s2;
|
|
1909
|
-
}
|
|
1910
|
-
} else {
|
|
1911
|
-
for (i = 0; i < n; i++) {
|
|
1912
|
-
check_intdivzero(*(dtype*)p2);
|
|
1913
|
-
*(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
|
|
1914
|
-
p1 += s1;
|
|
1915
|
-
p2 += s2;
|
|
1916
|
-
p3 += s3;
|
|
1917
|
-
}
|
|
1918
|
-
}
|
|
1919
|
-
}
|
|
1920
|
-
|
|
1921
|
-
return;
|
|
1922
|
-
//
|
|
1923
|
-
}
|
|
1924
|
-
}
|
|
1925
|
-
for (i = 0; i < n; i++) {
|
|
1926
|
-
dtype x, y, z;
|
|
1634
|
+
for (; i--;) {
|
|
1927
1635
|
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1928
1636
|
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1637
|
+
x = m_pow(x, y);
|
|
1638
|
+
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
static void iter_sfloat_pow_int32(na_loop_t* const lp) {
|
|
1643
|
+
size_t i;
|
|
1644
|
+
char *p1, *p2, *p3;
|
|
1645
|
+
ssize_t s1, s2, s3;
|
|
1646
|
+
dtype x;
|
|
1647
|
+
int32_t y;
|
|
1648
|
+
INIT_COUNTER(lp, i);
|
|
1649
|
+
INIT_PTR(lp, 0, p1, s1);
|
|
1650
|
+
INIT_PTR(lp, 1, p2, s2);
|
|
1651
|
+
INIT_PTR(lp, 2, p3, s3);
|
|
1652
|
+
for (; i--;) {
|
|
1653
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1654
|
+
GET_DATA_STRIDE(p2, s2, int32_t, y);
|
|
1655
|
+
x = m_pow_int(x, y);
|
|
1656
|
+
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
1932
1657
|
}
|
|
1933
|
-
//
|
|
1934
1658
|
}
|
|
1935
|
-
#undef check_intdivzero
|
|
1936
1659
|
|
|
1937
|
-
static VALUE
|
|
1660
|
+
static VALUE sfloat_pow_self(VALUE self, VALUE other) {
|
|
1938
1661
|
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
1662
|
+
ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
|
|
1939
1663
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1940
|
-
ndfunc_t ndf = {
|
|
1664
|
+
ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
|
|
1665
|
+
ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
|
|
1941
1666
|
|
|
1942
|
-
|
|
1667
|
+
// fixme : use na.integer?
|
|
1668
|
+
if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
|
|
1669
|
+
return na_ndloop(&ndf_i, 2, self, other);
|
|
1670
|
+
} else {
|
|
1671
|
+
return na_ndloop(&ndf, 2, self, other);
|
|
1672
|
+
}
|
|
1943
1673
|
}
|
|
1944
1674
|
|
|
1945
|
-
static VALUE
|
|
1675
|
+
static VALUE sfloat_pow(VALUE self, VALUE other) {
|
|
1946
1676
|
|
|
1947
1677
|
VALUE klass, v;
|
|
1948
|
-
|
|
1949
1678
|
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
1950
1679
|
if (klass == cT) {
|
|
1951
|
-
return
|
|
1680
|
+
return sfloat_pow_self(self, other);
|
|
1952
1681
|
} else {
|
|
1953
1682
|
v = rb_funcall(klass, id_cast, 1, self);
|
|
1954
|
-
return rb_funcall(v,
|
|
1683
|
+
return rb_funcall(v, id_pow, 1, other);
|
|
1955
1684
|
}
|
|
1956
1685
|
}
|
|
1957
1686
|
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
size_t
|
|
1963
|
-
|
|
1964
|
-
char *p1, *p2, *p3;
|
|
1965
|
-
ssize_t s1, s2, s3;
|
|
1966
|
-
|
|
1967
|
-
#ifdef __SSE2__
|
|
1968
|
-
size_t cnt;
|
|
1969
|
-
size_t cnt_simd_loop = -1;
|
|
1970
|
-
|
|
1971
|
-
__m128 a;
|
|
1972
|
-
__m128 b;
|
|
1687
|
+
static void iter_sfloat_minus(na_loop_t* const lp) {
|
|
1688
|
+
size_t i, n;
|
|
1689
|
+
char *p1, *p2;
|
|
1690
|
+
ssize_t s1, s2;
|
|
1691
|
+
size_t *idx1, *idx2;
|
|
1692
|
+
dtype x;
|
|
1973
1693
|
|
|
1974
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
1975
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
1976
|
-
#endif
|
|
1977
1694
|
INIT_COUNTER(lp, n);
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
1981
|
-
|
|
1982
|
-
//
|
|
1983
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
1984
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
1985
|
-
|
|
1986
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1987
|
-
#ifdef __SSE2__
|
|
1988
|
-
// Check number of elements. & Check same alignment.
|
|
1989
|
-
if ((n >= num_pack) &&
|
|
1990
|
-
is_same_aligned3(
|
|
1991
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
1992
|
-
)) {
|
|
1993
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
1994
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
1995
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
1996
|
-
);
|
|
1997
|
-
#endif
|
|
1998
|
-
if (p1 == p3) { // inplace case
|
|
1999
|
-
#ifdef __SSE2__
|
|
2000
|
-
for (; i < cnt; i++) {
|
|
2001
|
-
#else
|
|
2002
|
-
for (; i < n; i++) {
|
|
2003
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2004
|
-
#endif
|
|
2005
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2006
|
-
}
|
|
2007
|
-
} else {
|
|
2008
|
-
#ifdef __SSE2__
|
|
2009
|
-
for (; i < cnt; i++) {
|
|
2010
|
-
#else
|
|
2011
|
-
for (; i < n; i++) {
|
|
2012
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2013
|
-
#endif
|
|
2014
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2015
|
-
}
|
|
2016
|
-
}
|
|
2017
|
-
|
|
2018
|
-
#ifdef __SSE2__
|
|
2019
|
-
// Get the count of SIMD computation loops.
|
|
2020
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1695
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1696
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2021
1697
|
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
2029
|
-
}
|
|
2030
|
-
} else {
|
|
2031
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2032
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2033
|
-
b = _mm_load_ps(&((dtype*)p2)[i]);
|
|
2034
|
-
a = _mm_sub_ps(a, b);
|
|
2035
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
2036
|
-
}
|
|
2037
|
-
}
|
|
1698
|
+
if (idx1) {
|
|
1699
|
+
if (idx2) {
|
|
1700
|
+
for (i = 0; i < n; i++) {
|
|
1701
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1702
|
+
x = m_minus(x);
|
|
1703
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2038
1704
|
}
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2045
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2046
|
-
}
|
|
2047
|
-
} else {
|
|
2048
|
-
for (; i < n; i++) {
|
|
2049
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2050
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2051
|
-
}
|
|
2052
|
-
}
|
|
1705
|
+
} else {
|
|
1706
|
+
for (i = 0; i < n; i++) {
|
|
1707
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1708
|
+
x = m_minus(x);
|
|
1709
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2053
1710
|
}
|
|
2054
|
-
#endif
|
|
2055
|
-
return;
|
|
2056
1711
|
}
|
|
2057
|
-
|
|
2058
|
-
if (
|
|
2059
|
-
|
|
1712
|
+
} else {
|
|
1713
|
+
if (idx2) {
|
|
1714
|
+
for (i = 0; i < n; i++) {
|
|
1715
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1716
|
+
x = m_minus(x);
|
|
1717
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1718
|
+
}
|
|
1719
|
+
} else {
|
|
2060
1720
|
//
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
check_intdivzero(*(dtype*)p2);
|
|
2064
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2065
|
-
#ifdef __SSE2__
|
|
2066
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2067
|
-
b = _mm_load1_ps(&((dtype*)p2)[0]);
|
|
2068
|
-
|
|
2069
|
-
// Check number of elements. & Check same alignment.
|
|
2070
|
-
if ((n >= num_pack) &&
|
|
2071
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2072
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2073
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2074
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2075
|
-
);
|
|
2076
|
-
#endif
|
|
2077
|
-
if (p1 == p3) { // inplace case
|
|
2078
|
-
#ifdef __SSE2__
|
|
2079
|
-
for (; i < cnt; i++) {
|
|
2080
|
-
#else
|
|
2081
|
-
for (; i < n; i++) {
|
|
2082
|
-
#endif
|
|
2083
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2084
|
-
}
|
|
2085
|
-
} else {
|
|
2086
|
-
#ifdef __SSE2__
|
|
2087
|
-
for (; i < cnt; i++) {
|
|
2088
|
-
#else
|
|
2089
|
-
for (; i < n; i++) {
|
|
2090
|
-
#endif
|
|
2091
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2092
|
-
}
|
|
2093
|
-
}
|
|
2094
|
-
|
|
2095
|
-
#ifdef __SSE2__
|
|
2096
|
-
// Get the count of SIMD computation loops.
|
|
2097
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2098
|
-
|
|
2099
|
-
// SIMD computation.
|
|
2100
|
-
if (p1 == p3) { // inplace case
|
|
2101
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2102
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2103
|
-
a = _mm_sub_ps(a, b);
|
|
2104
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
2105
|
-
}
|
|
2106
|
-
} else {
|
|
2107
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2108
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2109
|
-
a = _mm_sub_ps(a, b);
|
|
2110
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
2111
|
-
}
|
|
2112
|
-
}
|
|
2113
|
-
}
|
|
2114
|
-
|
|
2115
|
-
// Compute the remainder of the SIMD operation.
|
|
2116
|
-
if (cnt_simd_loop != 0) {
|
|
2117
|
-
if (p1 == p3) { // inplace case
|
|
2118
|
-
for (; i < n; i++) {
|
|
2119
|
-
((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2120
|
-
}
|
|
2121
|
-
} else {
|
|
2122
|
-
for (; i < n; i++) {
|
|
2123
|
-
((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
|
|
2124
|
-
}
|
|
2125
|
-
}
|
|
2126
|
-
}
|
|
2127
|
-
#endif
|
|
2128
|
-
} else {
|
|
1721
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1722
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2129
1723
|
for (i = 0; i < n; i++) {
|
|
2130
|
-
|
|
2131
|
-
p1 += s1;
|
|
2132
|
-
p3 += s3;
|
|
1724
|
+
((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
|
|
2133
1725
|
}
|
|
1726
|
+
return;
|
|
2134
1727
|
}
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
for (i = 0; i < n; i++) {
|
|
2138
|
-
check_intdivzero(*(dtype*)p2);
|
|
2139
|
-
*(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
2140
|
-
p1 += s1;
|
|
2141
|
-
p2 += s2;
|
|
2142
|
-
}
|
|
2143
|
-
} else {
|
|
1728
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1729
|
+
//
|
|
2144
1730
|
for (i = 0; i < n; i++) {
|
|
2145
|
-
|
|
2146
|
-
*(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
|
|
1731
|
+
*(dtype*)p2 = m_minus(*(dtype*)p1);
|
|
2147
1732
|
p1 += s1;
|
|
2148
1733
|
p2 += s2;
|
|
2149
|
-
p3 += s3;
|
|
2150
1734
|
}
|
|
1735
|
+
return;
|
|
1736
|
+
//
|
|
2151
1737
|
}
|
|
2152
1738
|
}
|
|
2153
|
-
|
|
2154
|
-
|
|
1739
|
+
for (i = 0; i < n; i++) {
|
|
1740
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1741
|
+
x = m_minus(x);
|
|
1742
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1743
|
+
}
|
|
2155
1744
|
//
|
|
2156
1745
|
}
|
|
2157
1746
|
}
|
|
2158
|
-
for (i = 0; i < n; i++) {
|
|
2159
|
-
dtype x, y, z;
|
|
2160
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2161
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2162
|
-
check_intdivzero(y);
|
|
2163
|
-
z = m_sub(x, y);
|
|
2164
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2165
|
-
}
|
|
2166
|
-
//
|
|
2167
1747
|
}
|
|
2168
|
-
#undef check_intdivzero
|
|
2169
1748
|
|
|
2170
|
-
static VALUE
|
|
2171
|
-
ndfunc_arg_in_t ain[
|
|
1749
|
+
static VALUE sfloat_minus(VALUE self) {
|
|
1750
|
+
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2172
1751
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2173
|
-
ndfunc_t ndf = {
|
|
2174
|
-
|
|
2175
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2176
|
-
}
|
|
2177
|
-
|
|
2178
|
-
static VALUE sfloat_sub(VALUE self, VALUE other) {
|
|
2179
|
-
|
|
2180
|
-
VALUE klass, v;
|
|
1752
|
+
ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
|
|
2181
1753
|
|
|
2182
|
-
|
|
2183
|
-
if (klass == cT) {
|
|
2184
|
-
return sfloat_sub_self(self, other);
|
|
2185
|
-
} else {
|
|
2186
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2187
|
-
return rb_funcall(v, '-', 1, other);
|
|
2188
|
-
}
|
|
1754
|
+
return na_ndloop(&ndf, 1, self);
|
|
2189
1755
|
}
|
|
2190
1756
|
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
size_t
|
|
2196
|
-
|
|
2197
|
-
char *p1, *p2, *p3;
|
|
2198
|
-
ssize_t s1, s2, s3;
|
|
2199
|
-
|
|
2200
|
-
#ifdef __SSE2__
|
|
2201
|
-
size_t cnt;
|
|
2202
|
-
size_t cnt_simd_loop = -1;
|
|
2203
|
-
|
|
2204
|
-
__m128 a;
|
|
2205
|
-
__m128 b;
|
|
1757
|
+
static void iter_sfloat_reciprocal(na_loop_t* const lp) {
|
|
1758
|
+
size_t i, n;
|
|
1759
|
+
char *p1, *p2;
|
|
1760
|
+
ssize_t s1, s2;
|
|
1761
|
+
size_t *idx1, *idx2;
|
|
1762
|
+
dtype x;
|
|
2206
1763
|
|
|
2207
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
2208
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
2209
|
-
#endif
|
|
2210
1764
|
INIT_COUNTER(lp, n);
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2214
|
-
|
|
2215
|
-
//
|
|
2216
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2217
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
1765
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1766
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2218
1767
|
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
);
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
#endif
|
|
2247
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1768
|
+
if (idx1) {
|
|
1769
|
+
if (idx2) {
|
|
1770
|
+
for (i = 0; i < n; i++) {
|
|
1771
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1772
|
+
x = m_reciprocal(x);
|
|
1773
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1774
|
+
}
|
|
1775
|
+
} else {
|
|
1776
|
+
for (i = 0; i < n; i++) {
|
|
1777
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1778
|
+
x = m_reciprocal(x);
|
|
1779
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
} else {
|
|
1783
|
+
if (idx2) {
|
|
1784
|
+
for (i = 0; i < n; i++) {
|
|
1785
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1786
|
+
x = m_reciprocal(x);
|
|
1787
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1788
|
+
}
|
|
1789
|
+
} else {
|
|
1790
|
+
//
|
|
1791
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1792
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
1793
|
+
for (i = 0; i < n; i++) {
|
|
1794
|
+
((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
|
|
2248
1795
|
}
|
|
1796
|
+
return;
|
|
2249
1797
|
}
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
if (p1 == p3) { // inplace case
|
|
2257
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2258
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2259
|
-
b = _mm_load_ps(&((dtype*)p2)[i]);
|
|
2260
|
-
a = _mm_mul_ps(a, b);
|
|
2261
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
2262
|
-
}
|
|
2263
|
-
} else {
|
|
2264
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2265
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2266
|
-
b = _mm_load_ps(&((dtype*)p2)[i]);
|
|
2267
|
-
a = _mm_mul_ps(a, b);
|
|
2268
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
1798
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1799
|
+
//
|
|
1800
|
+
for (i = 0; i < n; i++) {
|
|
1801
|
+
*(dtype*)p2 = m_reciprocal(*(dtype*)p1);
|
|
1802
|
+
p1 += s1;
|
|
1803
|
+
p2 += s2;
|
|
2269
1804
|
}
|
|
1805
|
+
return;
|
|
1806
|
+
//
|
|
2270
1807
|
}
|
|
2271
1808
|
}
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
for (; i < n; i++) {
|
|
2277
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2278
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2279
|
-
}
|
|
2280
|
-
} else {
|
|
2281
|
-
for (; i < n; i++) {
|
|
2282
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2283
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2284
|
-
}
|
|
2285
|
-
}
|
|
1809
|
+
for (i = 0; i < n; i++) {
|
|
1810
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1811
|
+
x = m_reciprocal(x);
|
|
1812
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2286
1813
|
}
|
|
2287
|
-
#endif
|
|
2288
|
-
return;
|
|
2289
|
-
}
|
|
2290
|
-
|
|
2291
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2292
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2293
1814
|
//
|
|
1815
|
+
}
|
|
1816
|
+
}
|
|
1817
|
+
}
|
|
2294
1818
|
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2298
|
-
#ifdef __SSE2__
|
|
2299
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2300
|
-
b = _mm_load1_ps(&((dtype*)p2)[0]);
|
|
2301
|
-
|
|
2302
|
-
// Check number of elements. & Check same alignment.
|
|
2303
|
-
if ((n >= num_pack) &&
|
|
2304
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2305
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2306
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2307
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2308
|
-
);
|
|
2309
|
-
#endif
|
|
2310
|
-
if (p1 == p3) { // inplace case
|
|
2311
|
-
#ifdef __SSE2__
|
|
2312
|
-
for (; i < cnt; i++) {
|
|
2313
|
-
#else
|
|
2314
|
-
for (; i < n; i++) {
|
|
2315
|
-
#endif
|
|
2316
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2317
|
-
}
|
|
2318
|
-
} else {
|
|
2319
|
-
#ifdef __SSE2__
|
|
2320
|
-
for (; i < cnt; i++) {
|
|
2321
|
-
#else
|
|
2322
|
-
for (; i < n; i++) {
|
|
2323
|
-
#endif
|
|
2324
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2325
|
-
}
|
|
2326
|
-
}
|
|
2327
|
-
|
|
2328
|
-
#ifdef __SSE2__
|
|
2329
|
-
// Get the count of SIMD computation loops.
|
|
2330
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2331
|
-
|
|
2332
|
-
// SIMD computation.
|
|
2333
|
-
if (p1 == p3) { // inplace case
|
|
2334
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2335
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2336
|
-
a = _mm_mul_ps(a, b);
|
|
2337
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
2338
|
-
}
|
|
2339
|
-
} else {
|
|
2340
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2341
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2342
|
-
a = _mm_mul_ps(a, b);
|
|
2343
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
2344
|
-
}
|
|
2345
|
-
}
|
|
2346
|
-
}
|
|
2347
|
-
|
|
2348
|
-
// Compute the remainder of the SIMD operation.
|
|
2349
|
-
if (cnt_simd_loop != 0) {
|
|
2350
|
-
if (p1 == p3) { // inplace case
|
|
2351
|
-
for (; i < n; i++) {
|
|
2352
|
-
((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2353
|
-
}
|
|
2354
|
-
} else {
|
|
2355
|
-
for (; i < n; i++) {
|
|
2356
|
-
((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
|
|
2357
|
-
}
|
|
2358
|
-
}
|
|
2359
|
-
}
|
|
2360
|
-
#endif
|
|
2361
|
-
} else {
|
|
2362
|
-
for (i = 0; i < n; i++) {
|
|
2363
|
-
*(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2364
|
-
p1 += s1;
|
|
2365
|
-
p3 += s3;
|
|
2366
|
-
}
|
|
2367
|
-
}
|
|
2368
|
-
} else {
|
|
2369
|
-
if (p1 == p3) { // inplace case
|
|
2370
|
-
for (i = 0; i < n; i++) {
|
|
2371
|
-
check_intdivzero(*(dtype*)p2);
|
|
2372
|
-
*(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2373
|
-
p1 += s1;
|
|
2374
|
-
p2 += s2;
|
|
2375
|
-
}
|
|
2376
|
-
} else {
|
|
2377
|
-
for (i = 0; i < n; i++) {
|
|
2378
|
-
check_intdivzero(*(dtype*)p2);
|
|
2379
|
-
*(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
|
|
2380
|
-
p1 += s1;
|
|
2381
|
-
p2 += s2;
|
|
2382
|
-
p3 += s3;
|
|
2383
|
-
}
|
|
2384
|
-
}
|
|
2385
|
-
}
|
|
2386
|
-
|
|
2387
|
-
return;
|
|
2388
|
-
//
|
|
2389
|
-
}
|
|
2390
|
-
}
|
|
2391
|
-
for (i = 0; i < n; i++) {
|
|
2392
|
-
dtype x, y, z;
|
|
2393
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2394
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2395
|
-
check_intdivzero(y);
|
|
2396
|
-
z = m_mul(x, y);
|
|
2397
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2398
|
-
}
|
|
2399
|
-
//
|
|
2400
|
-
}
|
|
2401
|
-
#undef check_intdivzero
|
|
2402
|
-
|
|
2403
|
-
static VALUE sfloat_mul_self(VALUE self, VALUE other) {
|
|
2404
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
1819
|
+
static VALUE sfloat_reciprocal(VALUE self) {
|
|
1820
|
+
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2405
1821
|
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2406
|
-
ndfunc_t ndf = {
|
|
2407
|
-
|
|
2408
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2409
|
-
}
|
|
2410
|
-
|
|
2411
|
-
static VALUE sfloat_mul(VALUE self, VALUE other) {
|
|
2412
|
-
|
|
2413
|
-
VALUE klass, v;
|
|
1822
|
+
ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
|
|
2414
1823
|
|
|
2415
|
-
|
|
2416
|
-
if (klass == cT) {
|
|
2417
|
-
return sfloat_mul_self(self, other);
|
|
2418
|
-
} else {
|
|
2419
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2420
|
-
return rb_funcall(v, '*', 1, other);
|
|
2421
|
-
}
|
|
1824
|
+
return na_ndloop(&ndf, 1, self);
|
|
2422
1825
|
}
|
|
2423
1826
|
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
size_t
|
|
2429
|
-
|
|
2430
|
-
char *p1, *p2, *p3;
|
|
2431
|
-
ssize_t s1, s2, s3;
|
|
2432
|
-
|
|
2433
|
-
#ifdef __SSE2__
|
|
2434
|
-
size_t cnt;
|
|
2435
|
-
size_t cnt_simd_loop = -1;
|
|
2436
|
-
|
|
2437
|
-
__m128 a;
|
|
2438
|
-
__m128 b;
|
|
1827
|
+
static void iter_sfloat_sign(na_loop_t* const lp) {
|
|
1828
|
+
size_t i, n;
|
|
1829
|
+
char *p1, *p2;
|
|
1830
|
+
ssize_t s1, s2;
|
|
1831
|
+
size_t *idx1, *idx2;
|
|
1832
|
+
dtype x;
|
|
2439
1833
|
|
|
2440
|
-
size_t num_pack; // Number of elements packed for SIMD.
|
|
2441
|
-
num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
|
|
2442
|
-
#endif
|
|
2443
1834
|
INIT_COUNTER(lp, n);
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2447
|
-
|
|
2448
|
-
//
|
|
2449
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2450
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2451
|
-
|
|
2452
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2453
|
-
#ifdef __SSE2__
|
|
2454
|
-
// Check number of elements. & Check same alignment.
|
|
2455
|
-
if ((n >= num_pack) &&
|
|
2456
|
-
is_same_aligned3(
|
|
2457
|
-
&((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
|
|
2458
|
-
)) {
|
|
2459
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2460
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2461
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2462
|
-
);
|
|
2463
|
-
#endif
|
|
2464
|
-
if (p1 == p3) { // inplace case
|
|
2465
|
-
#ifdef __SSE2__
|
|
2466
|
-
for (; i < cnt; i++) {
|
|
2467
|
-
#else
|
|
2468
|
-
for (; i < n; i++) {
|
|
2469
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2470
|
-
#endif
|
|
2471
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2472
|
-
}
|
|
2473
|
-
} else {
|
|
2474
|
-
#ifdef __SSE2__
|
|
2475
|
-
for (; i < cnt; i++) {
|
|
2476
|
-
#else
|
|
2477
|
-
for (; i < n; i++) {
|
|
2478
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2479
|
-
#endif
|
|
2480
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2481
|
-
}
|
|
2482
|
-
}
|
|
2483
|
-
|
|
2484
|
-
#ifdef __SSE2__
|
|
2485
|
-
// Get the count of SIMD computation loops.
|
|
2486
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
1835
|
+
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
1836
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2487
1837
|
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
2495
|
-
}
|
|
2496
|
-
} else {
|
|
2497
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2498
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2499
|
-
b = _mm_load_ps(&((dtype*)p2)[i]);
|
|
2500
|
-
a = _mm_div_ps(a, b);
|
|
2501
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
2502
|
-
}
|
|
2503
|
-
}
|
|
1838
|
+
if (idx1) {
|
|
1839
|
+
if (idx2) {
|
|
1840
|
+
for (i = 0; i < n; i++) {
|
|
1841
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1842
|
+
x = m_sign(x);
|
|
1843
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2504
1844
|
}
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2511
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2512
|
-
}
|
|
2513
|
-
} else {
|
|
2514
|
-
for (; i < n; i++) {
|
|
2515
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2516
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2517
|
-
}
|
|
2518
|
-
}
|
|
1845
|
+
} else {
|
|
1846
|
+
for (i = 0; i < n; i++) {
|
|
1847
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1848
|
+
x = m_sign(x);
|
|
1849
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2519
1850
|
}
|
|
2520
|
-
#endif
|
|
2521
|
-
return;
|
|
2522
1851
|
}
|
|
2523
|
-
|
|
2524
|
-
if (
|
|
2525
|
-
|
|
1852
|
+
} else {
|
|
1853
|
+
if (idx2) {
|
|
1854
|
+
for (i = 0; i < n; i++) {
|
|
1855
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1856
|
+
x = m_sign(x);
|
|
1857
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1858
|
+
}
|
|
1859
|
+
} else {
|
|
2526
1860
|
//
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
check_intdivzero(*(dtype*)p2);
|
|
2530
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2531
|
-
#ifdef __SSE2__
|
|
2532
|
-
// Broadcast a scalar value and use it for SIMD computation.
|
|
2533
|
-
b = _mm_load1_ps(&((dtype*)p2)[0]);
|
|
2534
|
-
|
|
2535
|
-
// Check number of elements. & Check same alignment.
|
|
2536
|
-
if ((n >= num_pack) &&
|
|
2537
|
-
is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
|
|
2538
|
-
// Calculate up to the position just before the start of SIMD computation.
|
|
2539
|
-
cnt = get_count_of_elements_not_aligned_to_simd_size(
|
|
2540
|
-
&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
|
|
2541
|
-
);
|
|
2542
|
-
#endif
|
|
2543
|
-
if (p1 == p3) { // inplace case
|
|
2544
|
-
#ifdef __SSE2__
|
|
2545
|
-
for (; i < cnt; i++) {
|
|
2546
|
-
#else
|
|
2547
|
-
for (; i < n; i++) {
|
|
2548
|
-
#endif
|
|
2549
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2550
|
-
}
|
|
2551
|
-
} else {
|
|
2552
|
-
#ifdef __SSE2__
|
|
2553
|
-
for (; i < cnt; i++) {
|
|
2554
|
-
#else
|
|
2555
|
-
for (; i < n; i++) {
|
|
2556
|
-
#endif
|
|
2557
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2558
|
-
}
|
|
2559
|
-
}
|
|
2560
|
-
|
|
2561
|
-
#ifdef __SSE2__
|
|
2562
|
-
// Get the count of SIMD computation loops.
|
|
2563
|
-
cnt_simd_loop = (n - i) % num_pack;
|
|
2564
|
-
|
|
2565
|
-
// SIMD computation.
|
|
2566
|
-
if (p1 == p3) { // inplace case
|
|
2567
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2568
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2569
|
-
a = _mm_div_ps(a, b);
|
|
2570
|
-
_mm_store_ps(&((dtype*)p1)[i], a);
|
|
2571
|
-
}
|
|
2572
|
-
} else {
|
|
2573
|
-
for (; i < n - cnt_simd_loop; i += num_pack) {
|
|
2574
|
-
a = _mm_load_ps(&((dtype*)p1)[i]);
|
|
2575
|
-
a = _mm_div_ps(a, b);
|
|
2576
|
-
_mm_stream_ps(&((dtype*)p3)[i], a);
|
|
2577
|
-
}
|
|
2578
|
-
}
|
|
2579
|
-
}
|
|
2580
|
-
|
|
2581
|
-
// Compute the remainder of the SIMD operation.
|
|
2582
|
-
if (cnt_simd_loop != 0) {
|
|
2583
|
-
if (p1 == p3) { // inplace case
|
|
2584
|
-
for (; i < n; i++) {
|
|
2585
|
-
((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2586
|
-
}
|
|
2587
|
-
} else {
|
|
2588
|
-
for (; i < n; i++) {
|
|
2589
|
-
((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
|
|
2590
|
-
}
|
|
2591
|
-
}
|
|
2592
|
-
}
|
|
2593
|
-
#endif
|
|
2594
|
-
} else {
|
|
1861
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1862
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2595
1863
|
for (i = 0; i < n; i++) {
|
|
2596
|
-
|
|
2597
|
-
p1 += s1;
|
|
2598
|
-
p3 += s3;
|
|
1864
|
+
((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
|
|
2599
1865
|
}
|
|
1866
|
+
return;
|
|
2600
1867
|
}
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
for (i = 0; i < n; i++) {
|
|
2604
|
-
check_intdivzero(*(dtype*)p2);
|
|
2605
|
-
*(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
2606
|
-
p1 += s1;
|
|
2607
|
-
p2 += s2;
|
|
2608
|
-
}
|
|
2609
|
-
} else {
|
|
1868
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1869
|
+
//
|
|
2610
1870
|
for (i = 0; i < n; i++) {
|
|
2611
|
-
|
|
2612
|
-
*(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
|
|
1871
|
+
*(dtype*)p2 = m_sign(*(dtype*)p1);
|
|
2613
1872
|
p1 += s1;
|
|
2614
1873
|
p2 += s2;
|
|
2615
|
-
p3 += s3;
|
|
2616
1874
|
}
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2628
|
-
check_intdivzero(y);
|
|
2629
|
-
z = m_div(x, y);
|
|
2630
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2631
|
-
}
|
|
2632
|
-
//
|
|
2633
|
-
}
|
|
2634
|
-
#undef check_intdivzero
|
|
2635
|
-
|
|
2636
|
-
static VALUE sfloat_div_self(VALUE self, VALUE other) {
|
|
2637
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2638
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2639
|
-
ndfunc_t ndf = { iter_sfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2640
|
-
|
|
2641
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2642
|
-
}
|
|
2643
|
-
|
|
2644
|
-
static VALUE sfloat_div(VALUE self, VALUE other) {
|
|
2645
|
-
|
|
2646
|
-
VALUE klass, v;
|
|
2647
|
-
|
|
2648
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2649
|
-
if (klass == cT) {
|
|
2650
|
-
return sfloat_div_self(self, other);
|
|
2651
|
-
} else {
|
|
2652
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2653
|
-
return rb_funcall(v, '/', 1, other);
|
|
2654
|
-
}
|
|
2655
|
-
}
|
|
2656
|
-
|
|
2657
|
-
#define check_intdivzero(y) \
|
|
2658
|
-
{}
|
|
2659
|
-
|
|
2660
|
-
static void iter_sfloat_mod(na_loop_t* const lp) {
|
|
2661
|
-
size_t i = 0;
|
|
2662
|
-
size_t n;
|
|
2663
|
-
char *p1, *p2, *p3;
|
|
2664
|
-
ssize_t s1, s2, s3;
|
|
2665
|
-
|
|
2666
|
-
INIT_COUNTER(lp, n);
|
|
2667
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2668
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2669
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2670
|
-
|
|
2671
|
-
//
|
|
2672
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
2673
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
2674
|
-
|
|
2675
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2676
|
-
if (p1 == p3) { // inplace case
|
|
2677
|
-
for (; i < n; i++) {
|
|
2678
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2679
|
-
((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2680
|
-
}
|
|
2681
|
-
} else {
|
|
2682
|
-
for (; i < n; i++) {
|
|
2683
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
2684
|
-
((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
2685
|
-
}
|
|
2686
|
-
}
|
|
2687
|
-
return;
|
|
2688
|
-
}
|
|
2689
|
-
|
|
2690
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2691
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
2692
|
-
//
|
|
2693
|
-
|
|
2694
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2695
|
-
check_intdivzero(*(dtype*)p2);
|
|
2696
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2697
|
-
if (p1 == p3) { // inplace case
|
|
2698
|
-
for (; i < n; i++) {
|
|
2699
|
-
((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
|
|
2700
|
-
}
|
|
2701
|
-
} else {
|
|
2702
|
-
for (; i < n; i++) {
|
|
2703
|
-
((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
|
|
2704
|
-
}
|
|
2705
|
-
}
|
|
2706
|
-
} else {
|
|
2707
|
-
for (i = 0; i < n; i++) {
|
|
2708
|
-
*(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2709
|
-
p1 += s1;
|
|
2710
|
-
p3 += s3;
|
|
2711
|
-
}
|
|
2712
|
-
}
|
|
2713
|
-
} else {
|
|
2714
|
-
if (p1 == p3) { // inplace case
|
|
2715
|
-
for (i = 0; i < n; i++) {
|
|
2716
|
-
check_intdivzero(*(dtype*)p2);
|
|
2717
|
-
*(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2718
|
-
p1 += s1;
|
|
2719
|
-
p2 += s2;
|
|
2720
|
-
}
|
|
2721
|
-
} else {
|
|
2722
|
-
for (i = 0; i < n; i++) {
|
|
2723
|
-
check_intdivzero(*(dtype*)p2);
|
|
2724
|
-
*(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
|
|
2725
|
-
p1 += s1;
|
|
2726
|
-
p2 += s2;
|
|
2727
|
-
p3 += s3;
|
|
2728
|
-
}
|
|
2729
|
-
}
|
|
2730
|
-
}
|
|
2731
|
-
|
|
2732
|
-
return;
|
|
2733
|
-
//
|
|
2734
|
-
}
|
|
2735
|
-
}
|
|
2736
|
-
for (i = 0; i < n; i++) {
|
|
2737
|
-
dtype x, y, z;
|
|
2738
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2739
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2740
|
-
check_intdivzero(y);
|
|
2741
|
-
z = m_mod(x, y);
|
|
2742
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2743
|
-
}
|
|
2744
|
-
//
|
|
2745
|
-
}
|
|
2746
|
-
#undef check_intdivzero
|
|
2747
|
-
|
|
2748
|
-
static VALUE sfloat_mod_self(VALUE self, VALUE other) {
|
|
2749
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2750
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2751
|
-
ndfunc_t ndf = { iter_sfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2752
|
-
|
|
2753
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2754
|
-
}
|
|
2755
|
-
|
|
2756
|
-
static VALUE sfloat_mod(VALUE self, VALUE other) {
|
|
2757
|
-
|
|
2758
|
-
VALUE klass, v;
|
|
2759
|
-
|
|
2760
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2761
|
-
if (klass == cT) {
|
|
2762
|
-
return sfloat_mod_self(self, other);
|
|
2763
|
-
} else {
|
|
2764
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2765
|
-
return rb_funcall(v, '%', 1, other);
|
|
2766
|
-
}
|
|
2767
|
-
}
|
|
2768
|
-
|
|
2769
|
-
static void iter_sfloat_divmod(na_loop_t* const lp) {
|
|
2770
|
-
size_t i, n;
|
|
2771
|
-
char *p1, *p2, *p3, *p4;
|
|
2772
|
-
ssize_t s1, s2, s3, s4;
|
|
2773
|
-
dtype x, y, a, b;
|
|
2774
|
-
INIT_COUNTER(lp, n);
|
|
2775
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2776
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2777
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2778
|
-
INIT_PTR(lp, 3, p4, s4);
|
|
2779
|
-
for (i = n; i--;) {
|
|
2780
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2781
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2782
|
-
m_divmod(x, y, a, b);
|
|
2783
|
-
SET_DATA_STRIDE(p3, s3, dtype, a);
|
|
2784
|
-
SET_DATA_STRIDE(p4, s4, dtype, b);
|
|
2785
|
-
}
|
|
2786
|
-
}
|
|
2787
|
-
|
|
2788
|
-
static VALUE sfloat_divmod_self(VALUE self, VALUE other) {
|
|
2789
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2790
|
-
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
2791
|
-
ndfunc_t ndf = { iter_sfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
|
|
2792
|
-
|
|
2793
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2794
|
-
}
|
|
2795
|
-
|
|
2796
|
-
static VALUE sfloat_divmod(VALUE self, VALUE other) {
|
|
2797
|
-
|
|
2798
|
-
VALUE klass, v;
|
|
2799
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2800
|
-
if (klass == cT) {
|
|
2801
|
-
return sfloat_divmod_self(self, other);
|
|
2802
|
-
} else {
|
|
2803
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2804
|
-
return rb_funcall(v, id_divmod, 1, other);
|
|
2805
|
-
}
|
|
2806
|
-
}
|
|
2807
|
-
|
|
2808
|
-
static void iter_sfloat_pow(na_loop_t* const lp) {
|
|
2809
|
-
size_t i;
|
|
2810
|
-
char *p1, *p2, *p3;
|
|
2811
|
-
ssize_t s1, s2, s3;
|
|
2812
|
-
dtype x, y;
|
|
2813
|
-
INIT_COUNTER(lp, i);
|
|
2814
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2815
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2816
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2817
|
-
for (; i--;) {
|
|
2818
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2819
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2820
|
-
x = m_pow(x, y);
|
|
2821
|
-
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
2822
|
-
}
|
|
2823
|
-
}
|
|
2824
|
-
|
|
2825
|
-
static void iter_sfloat_pow_int32(na_loop_t* const lp) {
|
|
2826
|
-
size_t i;
|
|
2827
|
-
char *p1, *p2, *p3;
|
|
2828
|
-
ssize_t s1, s2, s3;
|
|
2829
|
-
dtype x;
|
|
2830
|
-
int32_t y;
|
|
2831
|
-
INIT_COUNTER(lp, i);
|
|
2832
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
2833
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
2834
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
2835
|
-
for (; i--;) {
|
|
2836
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2837
|
-
GET_DATA_STRIDE(p2, s2, int32_t, y);
|
|
2838
|
-
x = m_pow_int(x, y);
|
|
2839
|
-
SET_DATA_STRIDE(p3, s3, dtype, x);
|
|
2840
|
-
}
|
|
2841
|
-
}
|
|
2842
|
-
|
|
2843
|
-
static VALUE sfloat_pow_self(VALUE self, VALUE other) {
|
|
2844
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2845
|
-
ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
|
|
2846
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2847
|
-
ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
|
|
2848
|
-
ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
|
|
2849
|
-
|
|
2850
|
-
// fixme : use na.integer?
|
|
2851
|
-
if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
|
|
2852
|
-
return na_ndloop(&ndf_i, 2, self, other);
|
|
2853
|
-
} else {
|
|
2854
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
2855
|
-
}
|
|
2856
|
-
}
|
|
2857
|
-
|
|
2858
|
-
static VALUE sfloat_pow(VALUE self, VALUE other) {
|
|
2859
|
-
|
|
2860
|
-
VALUE klass, v;
|
|
2861
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2862
|
-
if (klass == cT) {
|
|
2863
|
-
return sfloat_pow_self(self, other);
|
|
2864
|
-
} else {
|
|
2865
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
2866
|
-
return rb_funcall(v, id_pow, 1, other);
|
|
2867
|
-
}
|
|
2868
|
-
}
|
|
2869
|
-
|
|
2870
|
-
static void iter_sfloat_minus(na_loop_t* const lp) {
|
|
2871
|
-
size_t i, n;
|
|
2872
|
-
char *p1, *p2;
|
|
2873
|
-
ssize_t s1, s2;
|
|
2874
|
-
size_t *idx1, *idx2;
|
|
2875
|
-
dtype x;
|
|
2876
|
-
|
|
2877
|
-
INIT_COUNTER(lp, n);
|
|
2878
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
2879
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2880
|
-
|
|
2881
|
-
if (idx1) {
|
|
2882
|
-
if (idx2) {
|
|
2883
|
-
for (i = 0; i < n; i++) {
|
|
2884
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2885
|
-
x = m_minus(x);
|
|
2886
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2887
|
-
}
|
|
2888
|
-
} else {
|
|
2889
|
-
for (i = 0; i < n; i++) {
|
|
2890
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2891
|
-
x = m_minus(x);
|
|
2892
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2893
|
-
}
|
|
2894
|
-
}
|
|
2895
|
-
} else {
|
|
2896
|
-
if (idx2) {
|
|
2897
|
-
for (i = 0; i < n; i++) {
|
|
2898
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2899
|
-
x = m_minus(x);
|
|
2900
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2901
|
-
}
|
|
2902
|
-
} else {
|
|
2903
|
-
//
|
|
2904
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
2905
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2906
|
-
for (i = 0; i < n; i++) {
|
|
2907
|
-
((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
|
|
2908
|
-
}
|
|
2909
|
-
return;
|
|
2910
|
-
}
|
|
2911
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
2912
|
-
//
|
|
2913
|
-
for (i = 0; i < n; i++) {
|
|
2914
|
-
*(dtype*)p2 = m_minus(*(dtype*)p1);
|
|
2915
|
-
p1 += s1;
|
|
2916
|
-
p2 += s2;
|
|
2917
|
-
}
|
|
2918
|
-
return;
|
|
2919
|
-
//
|
|
2920
|
-
}
|
|
2921
|
-
}
|
|
2922
|
-
for (i = 0; i < n; i++) {
|
|
2923
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2924
|
-
x = m_minus(x);
|
|
2925
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2926
|
-
}
|
|
2927
|
-
//
|
|
2928
|
-
}
|
|
2929
|
-
}
|
|
2930
|
-
}
|
|
2931
|
-
|
|
2932
|
-
static VALUE sfloat_minus(VALUE self) {
|
|
2933
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
2934
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2935
|
-
ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
|
|
2936
|
-
|
|
2937
|
-
return na_ndloop(&ndf, 1, self);
|
|
2938
|
-
}
|
|
2939
|
-
|
|
2940
|
-
static void iter_sfloat_reciprocal(na_loop_t* const lp) {
|
|
2941
|
-
size_t i, n;
|
|
2942
|
-
char *p1, *p2;
|
|
2943
|
-
ssize_t s1, s2;
|
|
2944
|
-
size_t *idx1, *idx2;
|
|
2945
|
-
dtype x;
|
|
2946
|
-
|
|
2947
|
-
INIT_COUNTER(lp, n);
|
|
2948
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
2949
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
2950
|
-
|
|
2951
|
-
if (idx1) {
|
|
2952
|
-
if (idx2) {
|
|
2953
|
-
for (i = 0; i < n; i++) {
|
|
2954
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2955
|
-
x = m_reciprocal(x);
|
|
2956
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2957
|
-
}
|
|
2958
|
-
} else {
|
|
2959
|
-
for (i = 0; i < n; i++) {
|
|
2960
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
2961
|
-
x = m_reciprocal(x);
|
|
2962
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2963
|
-
}
|
|
2964
|
-
}
|
|
2965
|
-
} else {
|
|
2966
|
-
if (idx2) {
|
|
2967
|
-
for (i = 0; i < n; i++) {
|
|
2968
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2969
|
-
x = m_reciprocal(x);
|
|
2970
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
2971
|
-
}
|
|
2972
|
-
} else {
|
|
2973
|
-
//
|
|
2974
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
2975
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
2976
|
-
for (i = 0; i < n; i++) {
|
|
2977
|
-
((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
|
|
2978
|
-
}
|
|
2979
|
-
return;
|
|
2980
|
-
}
|
|
2981
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
2982
|
-
//
|
|
2983
|
-
for (i = 0; i < n; i++) {
|
|
2984
|
-
*(dtype*)p2 = m_reciprocal(*(dtype*)p1);
|
|
2985
|
-
p1 += s1;
|
|
2986
|
-
p2 += s2;
|
|
2987
|
-
}
|
|
2988
|
-
return;
|
|
2989
|
-
//
|
|
2990
|
-
}
|
|
2991
|
-
}
|
|
2992
|
-
for (i = 0; i < n; i++) {
|
|
2993
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2994
|
-
x = m_reciprocal(x);
|
|
2995
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
2996
|
-
}
|
|
2997
|
-
//
|
|
2998
|
-
}
|
|
2999
|
-
}
|
|
3000
|
-
}
|
|
3001
|
-
|
|
3002
|
-
static VALUE sfloat_reciprocal(VALUE self) {
|
|
3003
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3004
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3005
|
-
ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
|
|
3006
|
-
|
|
3007
|
-
return na_ndloop(&ndf, 1, self);
|
|
3008
|
-
}
|
|
3009
|
-
|
|
3010
|
-
static void iter_sfloat_sign(na_loop_t* const lp) {
|
|
3011
|
-
size_t i, n;
|
|
3012
|
-
char *p1, *p2;
|
|
3013
|
-
ssize_t s1, s2;
|
|
3014
|
-
size_t *idx1, *idx2;
|
|
3015
|
-
dtype x;
|
|
3016
|
-
|
|
3017
|
-
INIT_COUNTER(lp, n);
|
|
3018
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3019
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3020
|
-
|
|
3021
|
-
if (idx1) {
|
|
3022
|
-
if (idx2) {
|
|
3023
|
-
for (i = 0; i < n; i++) {
|
|
3024
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3025
|
-
x = m_sign(x);
|
|
3026
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3027
|
-
}
|
|
3028
|
-
} else {
|
|
3029
|
-
for (i = 0; i < n; i++) {
|
|
3030
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3031
|
-
x = m_sign(x);
|
|
3032
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3033
|
-
}
|
|
3034
|
-
}
|
|
3035
|
-
} else {
|
|
3036
|
-
if (idx2) {
|
|
3037
|
-
for (i = 0; i < n; i++) {
|
|
3038
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3039
|
-
x = m_sign(x);
|
|
3040
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3041
|
-
}
|
|
3042
|
-
} else {
|
|
3043
|
-
//
|
|
3044
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3045
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3046
|
-
for (i = 0; i < n; i++) {
|
|
3047
|
-
((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
|
|
3048
|
-
}
|
|
3049
|
-
return;
|
|
3050
|
-
}
|
|
3051
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3052
|
-
//
|
|
3053
|
-
for (i = 0; i < n; i++) {
|
|
3054
|
-
*(dtype*)p2 = m_sign(*(dtype*)p1);
|
|
3055
|
-
p1 += s1;
|
|
3056
|
-
p2 += s2;
|
|
3057
|
-
}
|
|
3058
|
-
return;
|
|
3059
|
-
//
|
|
3060
|
-
}
|
|
3061
|
-
}
|
|
3062
|
-
for (i = 0; i < n; i++) {
|
|
3063
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3064
|
-
x = m_sign(x);
|
|
3065
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3066
|
-
}
|
|
3067
|
-
//
|
|
3068
|
-
}
|
|
3069
|
-
}
|
|
3070
|
-
}
|
|
3071
|
-
|
|
3072
|
-
static VALUE sfloat_sign(VALUE self) {
|
|
3073
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3074
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3075
|
-
ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
|
|
3076
|
-
|
|
3077
|
-
return na_ndloop(&ndf, 1, self);
|
|
3078
|
-
}
|
|
3079
|
-
|
|
3080
|
-
static void iter_sfloat_square(na_loop_t* const lp) {
|
|
3081
|
-
size_t i, n;
|
|
3082
|
-
char *p1, *p2;
|
|
3083
|
-
ssize_t s1, s2;
|
|
3084
|
-
size_t *idx1, *idx2;
|
|
3085
|
-
dtype x;
|
|
3086
|
-
|
|
3087
|
-
INIT_COUNTER(lp, n);
|
|
3088
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3089
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3090
|
-
|
|
3091
|
-
if (idx1) {
|
|
3092
|
-
if (idx2) {
|
|
3093
|
-
for (i = 0; i < n; i++) {
|
|
3094
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3095
|
-
x = m_square(x);
|
|
3096
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3097
|
-
}
|
|
3098
|
-
} else {
|
|
3099
|
-
for (i = 0; i < n; i++) {
|
|
3100
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3101
|
-
x = m_square(x);
|
|
3102
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3103
|
-
}
|
|
3104
|
-
}
|
|
3105
|
-
} else {
|
|
3106
|
-
if (idx2) {
|
|
3107
|
-
for (i = 0; i < n; i++) {
|
|
3108
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3109
|
-
x = m_square(x);
|
|
3110
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3111
|
-
}
|
|
3112
|
-
} else {
|
|
3113
|
-
//
|
|
3114
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3115
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3116
|
-
for (i = 0; i < n; i++) {
|
|
3117
|
-
((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
|
|
3118
|
-
}
|
|
3119
|
-
return;
|
|
3120
|
-
}
|
|
3121
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3122
|
-
//
|
|
3123
|
-
for (i = 0; i < n; i++) {
|
|
3124
|
-
*(dtype*)p2 = m_square(*(dtype*)p1);
|
|
3125
|
-
p1 += s1;
|
|
3126
|
-
p2 += s2;
|
|
3127
|
-
}
|
|
3128
|
-
return;
|
|
3129
|
-
//
|
|
3130
|
-
}
|
|
3131
|
-
}
|
|
3132
|
-
for (i = 0; i < n; i++) {
|
|
3133
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3134
|
-
x = m_square(x);
|
|
3135
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3136
|
-
}
|
|
3137
|
-
//
|
|
3138
|
-
}
|
|
3139
|
-
}
|
|
3140
|
-
}
|
|
3141
|
-
|
|
3142
|
-
static VALUE sfloat_square(VALUE self) {
|
|
3143
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3144
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3145
|
-
ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
|
|
3146
|
-
|
|
3147
|
-
return na_ndloop(&ndf, 1, self);
|
|
3148
|
-
}
|
|
3149
|
-
|
|
3150
|
-
static void iter_sfloat_eq(na_loop_t* const lp) {
|
|
3151
|
-
size_t i;
|
|
3152
|
-
char *p1, *p2;
|
|
3153
|
-
BIT_DIGIT* a3;
|
|
3154
|
-
size_t p3;
|
|
3155
|
-
ssize_t s1, s2, s3;
|
|
3156
|
-
dtype x, y;
|
|
3157
|
-
BIT_DIGIT b;
|
|
3158
|
-
INIT_COUNTER(lp, i);
|
|
3159
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3160
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3161
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3162
|
-
for (; i--;) {
|
|
3163
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3164
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3165
|
-
b = (m_eq(x, y)) ? 1 : 0;
|
|
3166
|
-
STORE_BIT(a3, p3, b);
|
|
3167
|
-
p3 += s3;
|
|
3168
|
-
}
|
|
3169
|
-
}
|
|
3170
|
-
|
|
3171
|
-
static VALUE sfloat_eq_self(VALUE self, VALUE other) {
|
|
3172
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3173
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3174
|
-
ndfunc_t ndf = { iter_sfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3175
|
-
|
|
3176
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3177
|
-
}
|
|
3178
|
-
|
|
3179
|
-
static VALUE sfloat_eq(VALUE self, VALUE other) {
|
|
3180
|
-
|
|
3181
|
-
VALUE klass, v;
|
|
3182
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3183
|
-
if (klass == cT) {
|
|
3184
|
-
return sfloat_eq_self(self, other);
|
|
3185
|
-
} else {
|
|
3186
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3187
|
-
return rb_funcall(v, id_eq, 1, other);
|
|
3188
|
-
}
|
|
3189
|
-
}
|
|
3190
|
-
|
|
3191
|
-
static void iter_sfloat_ne(na_loop_t* const lp) {
|
|
3192
|
-
size_t i;
|
|
3193
|
-
char *p1, *p2;
|
|
3194
|
-
BIT_DIGIT* a3;
|
|
3195
|
-
size_t p3;
|
|
3196
|
-
ssize_t s1, s2, s3;
|
|
3197
|
-
dtype x, y;
|
|
3198
|
-
BIT_DIGIT b;
|
|
3199
|
-
INIT_COUNTER(lp, i);
|
|
3200
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3201
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3202
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3203
|
-
for (; i--;) {
|
|
3204
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3205
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3206
|
-
b = (m_ne(x, y)) ? 1 : 0;
|
|
3207
|
-
STORE_BIT(a3, p3, b);
|
|
3208
|
-
p3 += s3;
|
|
3209
|
-
}
|
|
3210
|
-
}
|
|
3211
|
-
|
|
3212
|
-
static VALUE sfloat_ne_self(VALUE self, VALUE other) {
|
|
3213
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3214
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3215
|
-
ndfunc_t ndf = { iter_sfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3216
|
-
|
|
3217
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3218
|
-
}
|
|
3219
|
-
|
|
3220
|
-
static VALUE sfloat_ne(VALUE self, VALUE other) {
|
|
3221
|
-
|
|
3222
|
-
VALUE klass, v;
|
|
3223
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3224
|
-
if (klass == cT) {
|
|
3225
|
-
return sfloat_ne_self(self, other);
|
|
3226
|
-
} else {
|
|
3227
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3228
|
-
return rb_funcall(v, id_ne, 1, other);
|
|
3229
|
-
}
|
|
3230
|
-
}
|
|
3231
|
-
|
|
3232
|
-
static void iter_sfloat_nearly_eq(na_loop_t* const lp) {
|
|
3233
|
-
size_t i;
|
|
3234
|
-
char *p1, *p2;
|
|
3235
|
-
BIT_DIGIT* a3;
|
|
3236
|
-
size_t p3;
|
|
3237
|
-
ssize_t s1, s2, s3;
|
|
3238
|
-
dtype x, y;
|
|
3239
|
-
BIT_DIGIT b;
|
|
3240
|
-
INIT_COUNTER(lp, i);
|
|
3241
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3242
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3243
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3244
|
-
for (; i--;) {
|
|
3245
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3246
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3247
|
-
b = (m_nearly_eq(x, y)) ? 1 : 0;
|
|
3248
|
-
STORE_BIT(a3, p3, b);
|
|
3249
|
-
p3 += s3;
|
|
3250
|
-
}
|
|
3251
|
-
}
|
|
3252
|
-
|
|
3253
|
-
static VALUE sfloat_nearly_eq_self(VALUE self, VALUE other) {
|
|
3254
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3255
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3256
|
-
ndfunc_t ndf = { iter_sfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3257
|
-
|
|
3258
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3259
|
-
}
|
|
3260
|
-
|
|
3261
|
-
static VALUE sfloat_nearly_eq(VALUE self, VALUE other) {
|
|
3262
|
-
|
|
3263
|
-
VALUE klass, v;
|
|
3264
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3265
|
-
if (klass == cT) {
|
|
3266
|
-
return sfloat_nearly_eq_self(self, other);
|
|
3267
|
-
} else {
|
|
3268
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3269
|
-
return rb_funcall(v, id_nearly_eq, 1, other);
|
|
3270
|
-
}
|
|
3271
|
-
}
|
|
3272
|
-
|
|
3273
|
-
static void iter_sfloat_floor(na_loop_t* const lp) {
|
|
3274
|
-
size_t i, n;
|
|
3275
|
-
char *p1, *p2;
|
|
3276
|
-
ssize_t s1, s2;
|
|
3277
|
-
size_t *idx1, *idx2;
|
|
3278
|
-
dtype x;
|
|
3279
|
-
|
|
3280
|
-
INIT_COUNTER(lp, n);
|
|
3281
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3282
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3283
|
-
|
|
3284
|
-
if (idx1) {
|
|
3285
|
-
if (idx2) {
|
|
3286
|
-
for (i = 0; i < n; i++) {
|
|
3287
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3288
|
-
x = m_floor(x);
|
|
3289
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3290
|
-
}
|
|
3291
|
-
} else {
|
|
3292
|
-
for (i = 0; i < n; i++) {
|
|
3293
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3294
|
-
x = m_floor(x);
|
|
3295
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3296
|
-
}
|
|
3297
|
-
}
|
|
3298
|
-
} else {
|
|
3299
|
-
if (idx2) {
|
|
3300
|
-
for (i = 0; i < n; i++) {
|
|
3301
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3302
|
-
x = m_floor(x);
|
|
3303
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3304
|
-
}
|
|
3305
|
-
} else {
|
|
3306
|
-
//
|
|
3307
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3308
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3309
|
-
for (i = 0; i < n; i++) {
|
|
3310
|
-
((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
|
|
3311
|
-
}
|
|
3312
|
-
return;
|
|
3313
|
-
}
|
|
3314
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3315
|
-
//
|
|
3316
|
-
for (i = 0; i < n; i++) {
|
|
3317
|
-
*(dtype*)p2 = m_floor(*(dtype*)p1);
|
|
3318
|
-
p1 += s1;
|
|
3319
|
-
p2 += s2;
|
|
3320
|
-
}
|
|
3321
|
-
return;
|
|
3322
|
-
//
|
|
3323
|
-
}
|
|
3324
|
-
}
|
|
3325
|
-
for (i = 0; i < n; i++) {
|
|
3326
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3327
|
-
x = m_floor(x);
|
|
3328
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3329
|
-
}
|
|
3330
|
-
//
|
|
3331
|
-
}
|
|
3332
|
-
}
|
|
3333
|
-
}
|
|
3334
|
-
|
|
3335
|
-
static VALUE sfloat_floor(VALUE self) {
|
|
3336
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3337
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3338
|
-
ndfunc_t ndf = { iter_sfloat_floor, FULL_LOOP, 1, 1, ain, aout };
|
|
3339
|
-
|
|
3340
|
-
return na_ndloop(&ndf, 1, self);
|
|
3341
|
-
}
|
|
3342
|
-
|
|
3343
|
-
static void iter_sfloat_round(na_loop_t* const lp) {
|
|
3344
|
-
size_t i, n;
|
|
3345
|
-
char *p1, *p2;
|
|
3346
|
-
ssize_t s1, s2;
|
|
3347
|
-
size_t *idx1, *idx2;
|
|
3348
|
-
dtype x;
|
|
3349
|
-
|
|
3350
|
-
INIT_COUNTER(lp, n);
|
|
3351
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3352
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3353
|
-
|
|
3354
|
-
if (idx1) {
|
|
3355
|
-
if (idx2) {
|
|
3356
|
-
for (i = 0; i < n; i++) {
|
|
3357
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3358
|
-
x = m_round(x);
|
|
3359
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3360
|
-
}
|
|
3361
|
-
} else {
|
|
3362
|
-
for (i = 0; i < n; i++) {
|
|
3363
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3364
|
-
x = m_round(x);
|
|
3365
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3366
|
-
}
|
|
3367
|
-
}
|
|
3368
|
-
} else {
|
|
3369
|
-
if (idx2) {
|
|
3370
|
-
for (i = 0; i < n; i++) {
|
|
3371
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3372
|
-
x = m_round(x);
|
|
3373
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3374
|
-
}
|
|
3375
|
-
} else {
|
|
3376
|
-
//
|
|
3377
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3378
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3379
|
-
for (i = 0; i < n; i++) {
|
|
3380
|
-
((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
|
|
3381
|
-
}
|
|
3382
|
-
return;
|
|
3383
|
-
}
|
|
3384
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3385
|
-
//
|
|
3386
|
-
for (i = 0; i < n; i++) {
|
|
3387
|
-
*(dtype*)p2 = m_round(*(dtype*)p1);
|
|
3388
|
-
p1 += s1;
|
|
3389
|
-
p2 += s2;
|
|
3390
|
-
}
|
|
3391
|
-
return;
|
|
3392
|
-
//
|
|
3393
|
-
}
|
|
3394
|
-
}
|
|
3395
|
-
for (i = 0; i < n; i++) {
|
|
3396
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3397
|
-
x = m_round(x);
|
|
3398
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3399
|
-
}
|
|
3400
|
-
//
|
|
3401
|
-
}
|
|
3402
|
-
}
|
|
3403
|
-
}
|
|
3404
|
-
|
|
3405
|
-
static VALUE sfloat_round(VALUE self) {
|
|
3406
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3407
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3408
|
-
ndfunc_t ndf = { iter_sfloat_round, FULL_LOOP, 1, 1, ain, aout };
|
|
3409
|
-
|
|
3410
|
-
return na_ndloop(&ndf, 1, self);
|
|
3411
|
-
}
|
|
3412
|
-
|
|
3413
|
-
static void iter_sfloat_ceil(na_loop_t* const lp) {
|
|
3414
|
-
size_t i, n;
|
|
3415
|
-
char *p1, *p2;
|
|
3416
|
-
ssize_t s1, s2;
|
|
3417
|
-
size_t *idx1, *idx2;
|
|
3418
|
-
dtype x;
|
|
3419
|
-
|
|
3420
|
-
INIT_COUNTER(lp, n);
|
|
3421
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3422
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3423
|
-
|
|
3424
|
-
if (idx1) {
|
|
3425
|
-
if (idx2) {
|
|
3426
|
-
for (i = 0; i < n; i++) {
|
|
3427
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3428
|
-
x = m_ceil(x);
|
|
3429
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3430
|
-
}
|
|
3431
|
-
} else {
|
|
3432
|
-
for (i = 0; i < n; i++) {
|
|
3433
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3434
|
-
x = m_ceil(x);
|
|
3435
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3436
|
-
}
|
|
3437
|
-
}
|
|
3438
|
-
} else {
|
|
3439
|
-
if (idx2) {
|
|
3440
|
-
for (i = 0; i < n; i++) {
|
|
3441
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3442
|
-
x = m_ceil(x);
|
|
3443
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3444
|
-
}
|
|
3445
|
-
} else {
|
|
3446
|
-
//
|
|
3447
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3448
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3449
|
-
for (i = 0; i < n; i++) {
|
|
3450
|
-
((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
|
|
3451
|
-
}
|
|
3452
|
-
return;
|
|
3453
|
-
}
|
|
3454
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3455
|
-
//
|
|
3456
|
-
for (i = 0; i < n; i++) {
|
|
3457
|
-
*(dtype*)p2 = m_ceil(*(dtype*)p1);
|
|
3458
|
-
p1 += s1;
|
|
3459
|
-
p2 += s2;
|
|
3460
|
-
}
|
|
3461
|
-
return;
|
|
3462
|
-
//
|
|
3463
|
-
}
|
|
3464
|
-
}
|
|
3465
|
-
for (i = 0; i < n; i++) {
|
|
3466
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3467
|
-
x = m_ceil(x);
|
|
3468
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3469
|
-
}
|
|
3470
|
-
//
|
|
3471
|
-
}
|
|
3472
|
-
}
|
|
3473
|
-
}
|
|
3474
|
-
|
|
3475
|
-
static VALUE sfloat_ceil(VALUE self) {
|
|
3476
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3477
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3478
|
-
ndfunc_t ndf = { iter_sfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
|
|
3479
|
-
|
|
3480
|
-
return na_ndloop(&ndf, 1, self);
|
|
3481
|
-
}
|
|
3482
|
-
|
|
3483
|
-
static void iter_sfloat_trunc(na_loop_t* const lp) {
|
|
3484
|
-
size_t i, n;
|
|
3485
|
-
char *p1, *p2;
|
|
3486
|
-
ssize_t s1, s2;
|
|
3487
|
-
size_t *idx1, *idx2;
|
|
3488
|
-
dtype x;
|
|
3489
|
-
|
|
3490
|
-
INIT_COUNTER(lp, n);
|
|
3491
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3492
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3493
|
-
|
|
3494
|
-
if (idx1) {
|
|
3495
|
-
if (idx2) {
|
|
3496
|
-
for (i = 0; i < n; i++) {
|
|
3497
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3498
|
-
x = m_trunc(x);
|
|
3499
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3500
|
-
}
|
|
3501
|
-
} else {
|
|
3502
|
-
for (i = 0; i < n; i++) {
|
|
3503
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3504
|
-
x = m_trunc(x);
|
|
3505
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3506
|
-
}
|
|
3507
|
-
}
|
|
3508
|
-
} else {
|
|
3509
|
-
if (idx2) {
|
|
3510
|
-
for (i = 0; i < n; i++) {
|
|
3511
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3512
|
-
x = m_trunc(x);
|
|
3513
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3514
|
-
}
|
|
3515
|
-
} else {
|
|
3516
|
-
//
|
|
3517
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3518
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3519
|
-
for (i = 0; i < n; i++) {
|
|
3520
|
-
((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
|
|
3521
|
-
}
|
|
3522
|
-
return;
|
|
3523
|
-
}
|
|
3524
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3525
|
-
//
|
|
3526
|
-
for (i = 0; i < n; i++) {
|
|
3527
|
-
*(dtype*)p2 = m_trunc(*(dtype*)p1);
|
|
3528
|
-
p1 += s1;
|
|
3529
|
-
p2 += s2;
|
|
3530
|
-
}
|
|
3531
|
-
return;
|
|
3532
|
-
//
|
|
3533
|
-
}
|
|
3534
|
-
}
|
|
3535
|
-
for (i = 0; i < n; i++) {
|
|
3536
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3537
|
-
x = m_trunc(x);
|
|
3538
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3539
|
-
}
|
|
3540
|
-
//
|
|
3541
|
-
}
|
|
3542
|
-
}
|
|
3543
|
-
}
|
|
3544
|
-
|
|
3545
|
-
static VALUE sfloat_trunc(VALUE self) {
|
|
3546
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3547
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3548
|
-
ndfunc_t ndf = { iter_sfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
|
|
3549
|
-
|
|
3550
|
-
return na_ndloop(&ndf, 1, self);
|
|
3551
|
-
}
|
|
3552
|
-
|
|
3553
|
-
static void iter_sfloat_rint(na_loop_t* const lp) {
|
|
3554
|
-
size_t i, n;
|
|
3555
|
-
char *p1, *p2;
|
|
3556
|
-
ssize_t s1, s2;
|
|
3557
|
-
size_t *idx1, *idx2;
|
|
3558
|
-
dtype x;
|
|
3559
|
-
|
|
3560
|
-
INIT_COUNTER(lp, n);
|
|
3561
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3562
|
-
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
3563
|
-
|
|
3564
|
-
if (idx1) {
|
|
3565
|
-
if (idx2) {
|
|
3566
|
-
for (i = 0; i < n; i++) {
|
|
3567
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3568
|
-
x = m_rint(x);
|
|
3569
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3570
|
-
}
|
|
3571
|
-
} else {
|
|
3572
|
-
for (i = 0; i < n; i++) {
|
|
3573
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3574
|
-
x = m_rint(x);
|
|
3575
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3576
|
-
}
|
|
3577
|
-
}
|
|
3578
|
-
} else {
|
|
3579
|
-
if (idx2) {
|
|
3580
|
-
for (i = 0; i < n; i++) {
|
|
3581
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3582
|
-
x = m_rint(x);
|
|
3583
|
-
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
3584
|
-
}
|
|
3585
|
-
} else {
|
|
3586
|
-
//
|
|
3587
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
3588
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
3589
|
-
for (i = 0; i < n; i++) {
|
|
3590
|
-
((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
|
|
3591
|
-
}
|
|
3592
|
-
return;
|
|
3593
|
-
}
|
|
3594
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
3595
|
-
//
|
|
3596
|
-
for (i = 0; i < n; i++) {
|
|
3597
|
-
*(dtype*)p2 = m_rint(*(dtype*)p1);
|
|
3598
|
-
p1 += s1;
|
|
3599
|
-
p2 += s2;
|
|
3600
|
-
}
|
|
3601
|
-
return;
|
|
3602
|
-
//
|
|
3603
|
-
}
|
|
3604
|
-
}
|
|
3605
|
-
for (i = 0; i < n; i++) {
|
|
3606
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3607
|
-
x = m_rint(x);
|
|
3608
|
-
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
3609
|
-
}
|
|
3610
|
-
//
|
|
3611
|
-
}
|
|
3612
|
-
}
|
|
3613
|
-
}
|
|
3614
|
-
|
|
3615
|
-
static VALUE sfloat_rint(VALUE self) {
|
|
3616
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3617
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3618
|
-
ndfunc_t ndf = { iter_sfloat_rint, FULL_LOOP, 1, 1, ain, aout };
|
|
3619
|
-
|
|
3620
|
-
return na_ndloop(&ndf, 1, self);
|
|
3621
|
-
}
|
|
3622
|
-
|
|
3623
|
-
#define check_intdivzero(y) \
|
|
3624
|
-
{}
|
|
3625
|
-
|
|
3626
|
-
static void iter_sfloat_copysign(na_loop_t* const lp) {
|
|
3627
|
-
size_t i = 0;
|
|
3628
|
-
size_t n;
|
|
3629
|
-
char *p1, *p2, *p3;
|
|
3630
|
-
ssize_t s1, s2, s3;
|
|
3631
|
-
|
|
3632
|
-
INIT_COUNTER(lp, n);
|
|
3633
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3634
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3635
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
3636
|
-
|
|
3637
|
-
//
|
|
3638
|
-
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
3639
|
-
is_aligned(p3, sizeof(dtype))) {
|
|
3640
|
-
|
|
3641
|
-
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
3642
|
-
if (p1 == p3) { // inplace case
|
|
3643
|
-
for (; i < n; i++) {
|
|
3644
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
3645
|
-
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
3646
|
-
}
|
|
3647
|
-
} else {
|
|
3648
|
-
for (; i < n; i++) {
|
|
3649
|
-
check_intdivzero(((dtype*)p2)[i]);
|
|
3650
|
-
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
3651
|
-
}
|
|
3652
|
-
}
|
|
3653
|
-
return;
|
|
3654
|
-
}
|
|
3655
|
-
|
|
3656
|
-
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
3657
|
-
is_aligned_step(s3, sizeof(dtype))) {
|
|
3658
|
-
//
|
|
3659
|
-
|
|
3660
|
-
if (s2 == 0) { // Broadcasting from scalar value.
|
|
3661
|
-
check_intdivzero(*(dtype*)p2);
|
|
3662
|
-
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
3663
|
-
if (p1 == p3) { // inplace case
|
|
3664
|
-
for (; i < n; i++) {
|
|
3665
|
-
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
3666
|
-
}
|
|
3667
|
-
} else {
|
|
3668
|
-
for (; i < n; i++) {
|
|
3669
|
-
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
3670
|
-
}
|
|
3671
|
-
}
|
|
3672
|
-
} else {
|
|
3673
|
-
for (i = 0; i < n; i++) {
|
|
3674
|
-
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3675
|
-
p1 += s1;
|
|
3676
|
-
p3 += s3;
|
|
3677
|
-
}
|
|
3678
|
-
}
|
|
3679
|
-
} else {
|
|
3680
|
-
if (p1 == p3) { // inplace case
|
|
3681
|
-
for (i = 0; i < n; i++) {
|
|
3682
|
-
check_intdivzero(*(dtype*)p2);
|
|
3683
|
-
*(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3684
|
-
p1 += s1;
|
|
3685
|
-
p2 += s2;
|
|
3686
|
-
}
|
|
3687
|
-
} else {
|
|
3688
|
-
for (i = 0; i < n; i++) {
|
|
3689
|
-
check_intdivzero(*(dtype*)p2);
|
|
3690
|
-
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
3691
|
-
p1 += s1;
|
|
3692
|
-
p2 += s2;
|
|
3693
|
-
p3 += s3;
|
|
3694
|
-
}
|
|
3695
|
-
}
|
|
3696
|
-
}
|
|
3697
|
-
|
|
3698
|
-
return;
|
|
3699
|
-
//
|
|
3700
|
-
}
|
|
3701
|
-
}
|
|
3702
|
-
for (i = 0; i < n; i++) {
|
|
3703
|
-
dtype x, y, z;
|
|
3704
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3705
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3706
|
-
check_intdivzero(y);
|
|
3707
|
-
z = m_copysign(x, y);
|
|
3708
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
3709
|
-
}
|
|
3710
|
-
//
|
|
3711
|
-
}
|
|
3712
|
-
#undef check_intdivzero
|
|
3713
|
-
|
|
3714
|
-
static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
|
|
3715
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3716
|
-
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
3717
|
-
ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3718
|
-
|
|
3719
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3720
|
-
}
|
|
3721
|
-
|
|
3722
|
-
static VALUE sfloat_copysign(VALUE self, VALUE other) {
|
|
3723
|
-
|
|
3724
|
-
VALUE klass, v;
|
|
3725
|
-
|
|
3726
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3727
|
-
if (klass == cT) {
|
|
3728
|
-
return sfloat_copysign_self(self, other);
|
|
3729
|
-
} else {
|
|
3730
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3731
|
-
return rb_funcall(v, id_copysign, 1, other);
|
|
3732
|
-
}
|
|
3733
|
-
}
|
|
3734
|
-
|
|
3735
|
-
static void iter_sfloat_signbit(na_loop_t* const lp) {
|
|
3736
|
-
size_t i;
|
|
3737
|
-
char* p1;
|
|
3738
|
-
BIT_DIGIT* a2;
|
|
3739
|
-
size_t p2;
|
|
3740
|
-
ssize_t s1, s2;
|
|
3741
|
-
size_t* idx1;
|
|
3742
|
-
dtype x;
|
|
3743
|
-
BIT_DIGIT b;
|
|
3744
|
-
INIT_COUNTER(lp, i);
|
|
3745
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3746
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
3747
|
-
if (idx1) {
|
|
3748
|
-
for (; i--;) {
|
|
3749
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3750
|
-
b = (m_signbit(x)) ? 1 : 0;
|
|
3751
|
-
STORE_BIT(a2, p2, b);
|
|
3752
|
-
p2 += s2;
|
|
3753
|
-
}
|
|
3754
|
-
} else {
|
|
3755
|
-
for (; i--;) {
|
|
3756
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3757
|
-
b = (m_signbit(x)) ? 1 : 0;
|
|
3758
|
-
STORE_BIT(a2, p2, b);
|
|
3759
|
-
p2 += s2;
|
|
3760
|
-
}
|
|
3761
|
-
}
|
|
3762
|
-
}
|
|
3763
|
-
|
|
3764
|
-
static VALUE sfloat_signbit(VALUE self) {
|
|
3765
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3766
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3767
|
-
ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
|
|
3768
|
-
|
|
3769
|
-
return na_ndloop(&ndf, 1, self);
|
|
3770
|
-
}
|
|
3771
|
-
|
|
3772
|
-
static void iter_sfloat_modf(na_loop_t* const lp) {
|
|
3773
|
-
size_t i;
|
|
3774
|
-
char *p1, *p2, *p3;
|
|
3775
|
-
ssize_t s1, s2, s3;
|
|
3776
|
-
dtype x, y, z;
|
|
3777
|
-
INIT_COUNTER(lp, i);
|
|
3778
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3779
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3780
|
-
INIT_PTR(lp, 2, p3, s3);
|
|
3781
|
-
for (; i--;) {
|
|
3782
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3783
|
-
m_modf(x, y, z);
|
|
3784
|
-
SET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3785
|
-
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
3786
|
-
}
|
|
3787
|
-
}
|
|
3788
|
-
|
|
3789
|
-
static VALUE sfloat_modf(VALUE self) {
|
|
3790
|
-
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3791
|
-
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
3792
|
-
ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
|
|
3793
|
-
|
|
3794
|
-
return na_ndloop(&ndf, 1, self);
|
|
3795
|
-
}
|
|
3796
|
-
|
|
3797
|
-
static void iter_sfloat_gt(na_loop_t* const lp) {
|
|
3798
|
-
size_t i;
|
|
3799
|
-
char *p1, *p2;
|
|
3800
|
-
BIT_DIGIT* a3;
|
|
3801
|
-
size_t p3;
|
|
3802
|
-
ssize_t s1, s2, s3;
|
|
3803
|
-
dtype x, y;
|
|
3804
|
-
BIT_DIGIT b;
|
|
3805
|
-
INIT_COUNTER(lp, i);
|
|
3806
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3807
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3808
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3809
|
-
for (; i--;) {
|
|
3810
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3811
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3812
|
-
b = (m_gt(x, y)) ? 1 : 0;
|
|
3813
|
-
STORE_BIT(a3, p3, b);
|
|
3814
|
-
p3 += s3;
|
|
3815
|
-
}
|
|
3816
|
-
}
|
|
3817
|
-
|
|
3818
|
-
static VALUE sfloat_gt_self(VALUE self, VALUE other) {
|
|
3819
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3820
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3821
|
-
ndfunc_t ndf = { iter_sfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3822
|
-
|
|
3823
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3824
|
-
}
|
|
3825
|
-
|
|
3826
|
-
static VALUE sfloat_gt(VALUE self, VALUE other) {
|
|
3827
|
-
|
|
3828
|
-
VALUE klass, v;
|
|
3829
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3830
|
-
if (klass == cT) {
|
|
3831
|
-
return sfloat_gt_self(self, other);
|
|
3832
|
-
} else {
|
|
3833
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3834
|
-
return rb_funcall(v, id_gt, 1, other);
|
|
3835
|
-
}
|
|
3836
|
-
}
|
|
3837
|
-
|
|
3838
|
-
static void iter_sfloat_ge(na_loop_t* const lp) {
|
|
3839
|
-
size_t i;
|
|
3840
|
-
char *p1, *p2;
|
|
3841
|
-
BIT_DIGIT* a3;
|
|
3842
|
-
size_t p3;
|
|
3843
|
-
ssize_t s1, s2, s3;
|
|
3844
|
-
dtype x, y;
|
|
3845
|
-
BIT_DIGIT b;
|
|
3846
|
-
INIT_COUNTER(lp, i);
|
|
3847
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3848
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3849
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3850
|
-
for (; i--;) {
|
|
3851
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3852
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3853
|
-
b = (m_ge(x, y)) ? 1 : 0;
|
|
3854
|
-
STORE_BIT(a3, p3, b);
|
|
3855
|
-
p3 += s3;
|
|
3856
|
-
}
|
|
3857
|
-
}
|
|
3858
|
-
|
|
3859
|
-
static VALUE sfloat_ge_self(VALUE self, VALUE other) {
|
|
3860
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3861
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3862
|
-
ndfunc_t ndf = { iter_sfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3863
|
-
|
|
3864
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3865
|
-
}
|
|
3866
|
-
|
|
3867
|
-
static VALUE sfloat_ge(VALUE self, VALUE other) {
|
|
3868
|
-
|
|
3869
|
-
VALUE klass, v;
|
|
3870
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3871
|
-
if (klass == cT) {
|
|
3872
|
-
return sfloat_ge_self(self, other);
|
|
3873
|
-
} else {
|
|
3874
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3875
|
-
return rb_funcall(v, id_ge, 1, other);
|
|
3876
|
-
}
|
|
3877
|
-
}
|
|
3878
|
-
|
|
3879
|
-
static void iter_sfloat_lt(na_loop_t* const lp) {
|
|
3880
|
-
size_t i;
|
|
3881
|
-
char *p1, *p2;
|
|
3882
|
-
BIT_DIGIT* a3;
|
|
3883
|
-
size_t p3;
|
|
3884
|
-
ssize_t s1, s2, s3;
|
|
3885
|
-
dtype x, y;
|
|
3886
|
-
BIT_DIGIT b;
|
|
3887
|
-
INIT_COUNTER(lp, i);
|
|
3888
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3889
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3890
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3891
|
-
for (; i--;) {
|
|
3892
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3893
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3894
|
-
b = (m_lt(x, y)) ? 1 : 0;
|
|
3895
|
-
STORE_BIT(a3, p3, b);
|
|
3896
|
-
p3 += s3;
|
|
3897
|
-
}
|
|
3898
|
-
}
|
|
3899
|
-
|
|
3900
|
-
static VALUE sfloat_lt_self(VALUE self, VALUE other) {
|
|
3901
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3902
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3903
|
-
ndfunc_t ndf = { iter_sfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3904
|
-
|
|
3905
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3906
|
-
}
|
|
3907
|
-
|
|
3908
|
-
static VALUE sfloat_lt(VALUE self, VALUE other) {
|
|
3909
|
-
|
|
3910
|
-
VALUE klass, v;
|
|
3911
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3912
|
-
if (klass == cT) {
|
|
3913
|
-
return sfloat_lt_self(self, other);
|
|
3914
|
-
} else {
|
|
3915
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3916
|
-
return rb_funcall(v, id_lt, 1, other);
|
|
3917
|
-
}
|
|
3918
|
-
}
|
|
3919
|
-
|
|
3920
|
-
static void iter_sfloat_le(na_loop_t* const lp) {
|
|
3921
|
-
size_t i;
|
|
3922
|
-
char *p1, *p2;
|
|
3923
|
-
BIT_DIGIT* a3;
|
|
3924
|
-
size_t p3;
|
|
3925
|
-
ssize_t s1, s2, s3;
|
|
3926
|
-
dtype x, y;
|
|
3927
|
-
BIT_DIGIT b;
|
|
3928
|
-
INIT_COUNTER(lp, i);
|
|
3929
|
-
INIT_PTR(lp, 0, p1, s1);
|
|
3930
|
-
INIT_PTR(lp, 1, p2, s2);
|
|
3931
|
-
INIT_PTR_BIT(lp, 2, a3, p3, s3);
|
|
3932
|
-
for (; i--;) {
|
|
3933
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3934
|
-
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
3935
|
-
b = (m_le(x, y)) ? 1 : 0;
|
|
3936
|
-
STORE_BIT(a3, p3, b);
|
|
3937
|
-
p3 += s3;
|
|
3938
|
-
}
|
|
3939
|
-
}
|
|
3940
|
-
|
|
3941
|
-
static VALUE sfloat_le_self(VALUE self, VALUE other) {
|
|
3942
|
-
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
3943
|
-
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
3944
|
-
ndfunc_t ndf = { iter_sfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
|
|
3945
|
-
|
|
3946
|
-
return na_ndloop(&ndf, 2, self, other);
|
|
3947
|
-
}
|
|
3948
|
-
|
|
3949
|
-
static VALUE sfloat_le(VALUE self, VALUE other) {
|
|
3950
|
-
|
|
3951
|
-
VALUE klass, v;
|
|
3952
|
-
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
3953
|
-
if (klass == cT) {
|
|
3954
|
-
return sfloat_le_self(self, other);
|
|
3955
|
-
} else {
|
|
3956
|
-
v = rb_funcall(klass, id_cast, 1, self);
|
|
3957
|
-
return rb_funcall(v, id_le, 1, other);
|
|
3958
|
-
}
|
|
3959
|
-
}
|
|
3960
|
-
|
|
3961
|
-
static void iter_sfloat_isnan(na_loop_t* const lp) {
|
|
3962
|
-
size_t i;
|
|
3963
|
-
char* p1;
|
|
3964
|
-
BIT_DIGIT* a2;
|
|
3965
|
-
size_t p2;
|
|
3966
|
-
ssize_t s1, s2;
|
|
3967
|
-
size_t* idx1;
|
|
3968
|
-
dtype x;
|
|
3969
|
-
BIT_DIGIT b;
|
|
3970
|
-
INIT_COUNTER(lp, i);
|
|
3971
|
-
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
3972
|
-
INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
|
3973
|
-
if (idx1) {
|
|
3974
|
-
for (; i--;) {
|
|
3975
|
-
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
3976
|
-
b = (m_isnan(x)) ? 1 : 0;
|
|
3977
|
-
STORE_BIT(a2, p2, b);
|
|
3978
|
-
p2 += s2;
|
|
3979
|
-
}
|
|
3980
|
-
} else {
|
|
3981
|
-
for (; i--;) {
|
|
3982
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
3983
|
-
b = (m_isnan(x)) ? 1 : 0;
|
|
3984
|
-
STORE_BIT(a2, p2, b);
|
|
3985
|
-
p2 += s2;
|
|
1875
|
+
return;
|
|
1876
|
+
//
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
for (i = 0; i < n; i++) {
|
|
1880
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1881
|
+
x = m_sign(x);
|
|
1882
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1883
|
+
}
|
|
1884
|
+
//
|
|
3986
1885
|
}
|
|
3987
1886
|
}
|
|
3988
1887
|
}
|
|
3989
1888
|
|
|
3990
|
-
static VALUE
|
|
1889
|
+
static VALUE sfloat_sign(VALUE self) {
|
|
3991
1890
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
3992
|
-
ndfunc_arg_out_t aout[1] = { {
|
|
3993
|
-
ndfunc_t ndf = {
|
|
1891
|
+
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1892
|
+
ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
|
|
3994
1893
|
|
|
3995
1894
|
return na_ndloop(&ndf, 1, self);
|
|
3996
1895
|
}
|
|
3997
1896
|
|
|
3998
|
-
static void
|
|
3999
|
-
size_t i;
|
|
4000
|
-
char*
|
|
4001
|
-
BIT_DIGIT* a2;
|
|
4002
|
-
size_t p2;
|
|
1897
|
+
static void iter_sfloat_square(na_loop_t* const lp) {
|
|
1898
|
+
size_t i, n;
|
|
1899
|
+
char *p1, *p2;
|
|
4003
1900
|
ssize_t s1, s2;
|
|
4004
|
-
size_t*
|
|
1901
|
+
size_t *idx1, *idx2;
|
|
4005
1902
|
dtype x;
|
|
4006
|
-
|
|
4007
|
-
INIT_COUNTER(lp,
|
|
1903
|
+
|
|
1904
|
+
INIT_COUNTER(lp, n);
|
|
4008
1905
|
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
|
4009
|
-
|
|
1906
|
+
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
|
|
1907
|
+
|
|
4010
1908
|
if (idx1) {
|
|
4011
|
-
|
|
4012
|
-
|
|
4013
|
-
|
|
4014
|
-
|
|
4015
|
-
|
|
1909
|
+
if (idx2) {
|
|
1910
|
+
for (i = 0; i < n; i++) {
|
|
1911
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1912
|
+
x = m_square(x);
|
|
1913
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1914
|
+
}
|
|
1915
|
+
} else {
|
|
1916
|
+
for (i = 0; i < n; i++) {
|
|
1917
|
+
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
1918
|
+
x = m_square(x);
|
|
1919
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1920
|
+
}
|
|
4016
1921
|
}
|
|
4017
1922
|
} else {
|
|
4018
|
-
|
|
4019
|
-
|
|
4020
|
-
|
|
4021
|
-
|
|
4022
|
-
|
|
1923
|
+
if (idx2) {
|
|
1924
|
+
for (i = 0; i < n; i++) {
|
|
1925
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1926
|
+
x = m_square(x);
|
|
1927
|
+
SET_DATA_INDEX(p2, idx2, dtype, x);
|
|
1928
|
+
}
|
|
1929
|
+
} else {
|
|
1930
|
+
//
|
|
1931
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
|
|
1932
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
|
|
1933
|
+
for (i = 0; i < n; i++) {
|
|
1934
|
+
((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
|
|
1935
|
+
}
|
|
1936
|
+
return;
|
|
1937
|
+
}
|
|
1938
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
|
|
1939
|
+
//
|
|
1940
|
+
for (i = 0; i < n; i++) {
|
|
1941
|
+
*(dtype*)p2 = m_square(*(dtype*)p1);
|
|
1942
|
+
p1 += s1;
|
|
1943
|
+
p2 += s2;
|
|
1944
|
+
}
|
|
1945
|
+
return;
|
|
1946
|
+
//
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
for (i = 0; i < n; i++) {
|
|
1950
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
1951
|
+
x = m_square(x);
|
|
1952
|
+
SET_DATA_STRIDE(p2, s2, dtype, x);
|
|
1953
|
+
}
|
|
1954
|
+
//
|
|
4023
1955
|
}
|
|
4024
1956
|
}
|
|
4025
1957
|
}
|
|
4026
1958
|
|
|
4027
|
-
static VALUE
|
|
1959
|
+
static VALUE sfloat_square(VALUE self) {
|
|
4028
1960
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4029
|
-
ndfunc_arg_out_t aout[1] = { {
|
|
4030
|
-
ndfunc_t ndf = {
|
|
1961
|
+
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
1962
|
+
ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
|
|
4031
1963
|
|
|
4032
1964
|
return na_ndloop(&ndf, 1, self);
|
|
4033
1965
|
}
|
|
4034
1966
|
|
|
4035
|
-
|
|
4036
|
-
|
|
4037
|
-
|
|
4038
|
-
|
|
4039
|
-
size_t
|
|
4040
|
-
|
|
4041
|
-
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
INIT_COUNTER(lp,
|
|
4045
|
-
|
|
4046
|
-
|
|
4047
|
-
|
|
4048
|
-
|
|
4049
|
-
|
|
4050
|
-
|
|
4051
|
-
|
|
4052
|
-
|
|
1967
|
+
#define check_intdivzero(y) \
|
|
1968
|
+
{}
|
|
1969
|
+
|
|
1970
|
+
static void iter_sfloat_copysign(na_loop_t* const lp) {
|
|
1971
|
+
size_t i = 0;
|
|
1972
|
+
size_t n;
|
|
1973
|
+
char *p1, *p2, *p3;
|
|
1974
|
+
ssize_t s1, s2, s3;
|
|
1975
|
+
|
|
1976
|
+
INIT_COUNTER(lp, n);
|
|
1977
|
+
INIT_PTR(lp, 0, p1, s1);
|
|
1978
|
+
INIT_PTR(lp, 1, p2, s2);
|
|
1979
|
+
INIT_PTR(lp, 2, p3, s3);
|
|
1980
|
+
|
|
1981
|
+
//
|
|
1982
|
+
if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
|
|
1983
|
+
is_aligned(p3, sizeof(dtype))) {
|
|
1984
|
+
|
|
1985
|
+
if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
1986
|
+
if (p1 == p3) { // inplace case
|
|
1987
|
+
for (; i < n; i++) {
|
|
1988
|
+
check_intdivzero(((dtype*)p2)[i]);
|
|
1989
|
+
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1990
|
+
}
|
|
1991
|
+
} else {
|
|
1992
|
+
for (; i < n; i++) {
|
|
1993
|
+
check_intdivzero(((dtype*)p2)[i]);
|
|
1994
|
+
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
|
|
1995
|
+
}
|
|
1996
|
+
}
|
|
1997
|
+
return;
|
|
4053
1998
|
}
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
1999
|
+
|
|
2000
|
+
if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
|
|
2001
|
+
is_aligned_step(s3, sizeof(dtype))) {
|
|
2002
|
+
//
|
|
2003
|
+
|
|
2004
|
+
if (s2 == 0) { // Broadcasting from scalar value.
|
|
2005
|
+
check_intdivzero(*(dtype*)p2);
|
|
2006
|
+
if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
|
|
2007
|
+
if (p1 == p3) { // inplace case
|
|
2008
|
+
for (; i < n; i++) {
|
|
2009
|
+
((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
2010
|
+
}
|
|
2011
|
+
} else {
|
|
2012
|
+
for (; i < n; i++) {
|
|
2013
|
+
((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
|
|
2014
|
+
}
|
|
2015
|
+
}
|
|
2016
|
+
} else {
|
|
2017
|
+
for (i = 0; i < n; i++) {
|
|
2018
|
+
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
2019
|
+
p1 += s1;
|
|
2020
|
+
p3 += s3;
|
|
2021
|
+
}
|
|
2022
|
+
}
|
|
2023
|
+
} else {
|
|
2024
|
+
if (p1 == p3) { // inplace case
|
|
2025
|
+
for (i = 0; i < n; i++) {
|
|
2026
|
+
check_intdivzero(*(dtype*)p2);
|
|
2027
|
+
*(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
2028
|
+
p1 += s1;
|
|
2029
|
+
p2 += s2;
|
|
2030
|
+
}
|
|
2031
|
+
} else {
|
|
2032
|
+
for (i = 0; i < n; i++) {
|
|
2033
|
+
check_intdivzero(*(dtype*)p2);
|
|
2034
|
+
*(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
|
|
2035
|
+
p1 += s1;
|
|
2036
|
+
p2 += s2;
|
|
2037
|
+
p3 += s3;
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
}
|
|
2041
|
+
|
|
2042
|
+
return;
|
|
2043
|
+
//
|
|
4060
2044
|
}
|
|
4061
2045
|
}
|
|
2046
|
+
for (i = 0; i < n; i++) {
|
|
2047
|
+
dtype x, y, z;
|
|
2048
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2049
|
+
GET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2050
|
+
check_intdivzero(y);
|
|
2051
|
+
z = m_copysign(x, y);
|
|
2052
|
+
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
2053
|
+
}
|
|
2054
|
+
//
|
|
4062
2055
|
}
|
|
2056
|
+
#undef check_intdivzero
|
|
4063
2057
|
|
|
4064
|
-
static VALUE
|
|
4065
|
-
ndfunc_arg_in_t ain[
|
|
4066
|
-
ndfunc_arg_out_t aout[1] = { {
|
|
4067
|
-
ndfunc_t ndf = {
|
|
2058
|
+
static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
|
|
2059
|
+
ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
|
|
2060
|
+
ndfunc_arg_out_t aout[1] = { { cT, 0 } };
|
|
2061
|
+
ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
|
|
4068
2062
|
|
|
4069
|
-
return na_ndloop(&ndf,
|
|
2063
|
+
return na_ndloop(&ndf, 2, self, other);
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
static VALUE sfloat_copysign(VALUE self, VALUE other) {
|
|
2067
|
+
|
|
2068
|
+
VALUE klass, v;
|
|
2069
|
+
|
|
2070
|
+
klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
|
|
2071
|
+
if (klass == cT) {
|
|
2072
|
+
return sfloat_copysign_self(self, other);
|
|
2073
|
+
} else {
|
|
2074
|
+
v = rb_funcall(klass, id_cast, 1, self);
|
|
2075
|
+
return rb_funcall(v, id_copysign, 1, other);
|
|
2076
|
+
}
|
|
4070
2077
|
}
|
|
4071
2078
|
|
|
4072
|
-
static void
|
|
2079
|
+
static void iter_sfloat_signbit(na_loop_t* const lp) {
|
|
4073
2080
|
size_t i;
|
|
4074
2081
|
char* p1;
|
|
4075
2082
|
BIT_DIGIT* a2;
|
|
@@ -4084,61 +2091,49 @@ static void iter_sfloat_isneginf(na_loop_t* const lp) {
|
|
|
4084
2091
|
if (idx1) {
|
|
4085
2092
|
for (; i--;) {
|
|
4086
2093
|
GET_DATA_INDEX(p1, idx1, dtype, x);
|
|
4087
|
-
b = (
|
|
2094
|
+
b = (m_signbit(x)) ? 1 : 0;
|
|
4088
2095
|
STORE_BIT(a2, p2, b);
|
|
4089
2096
|
p2 += s2;
|
|
4090
2097
|
}
|
|
4091
2098
|
} else {
|
|
4092
2099
|
for (; i--;) {
|
|
4093
2100
|
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4094
|
-
b = (
|
|
2101
|
+
b = (m_signbit(x)) ? 1 : 0;
|
|
4095
2102
|
STORE_BIT(a2, p2, b);
|
|
4096
2103
|
p2 += s2;
|
|
4097
2104
|
}
|
|
4098
2105
|
}
|
|
4099
2106
|
}
|
|
4100
2107
|
|
|
4101
|
-
static VALUE
|
|
2108
|
+
static VALUE sfloat_signbit(VALUE self) {
|
|
4102
2109
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4103
2110
|
ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
|
|
4104
|
-
ndfunc_t ndf = {
|
|
2111
|
+
ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
|
|
4105
2112
|
|
|
4106
2113
|
return na_ndloop(&ndf, 1, self);
|
|
4107
2114
|
}
|
|
4108
2115
|
|
|
4109
|
-
static void
|
|
2116
|
+
static void iter_sfloat_modf(na_loop_t* const lp) {
|
|
4110
2117
|
size_t i;
|
|
4111
|
-
char*
|
|
4112
|
-
|
|
4113
|
-
|
|
4114
|
-
ssize_t s1, s2;
|
|
4115
|
-
size_t* idx1;
|
|
4116
|
-
dtype x;
|
|
4117
|
-
BIT_DIGIT b;
|
|
2118
|
+
char *p1, *p2, *p3;
|
|
2119
|
+
ssize_t s1, s2, s3;
|
|
2120
|
+
dtype x, y, z;
|
|
4118
2121
|
INIT_COUNTER(lp, i);
|
|
4119
|
-
|
|
4120
|
-
|
|
4121
|
-
|
|
4122
|
-
|
|
4123
|
-
|
|
4124
|
-
|
|
4125
|
-
|
|
4126
|
-
|
|
4127
|
-
}
|
|
4128
|
-
} else {
|
|
4129
|
-
for (; i--;) {
|
|
4130
|
-
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
4131
|
-
b = (m_isfinite(x)) ? 1 : 0;
|
|
4132
|
-
STORE_BIT(a2, p2, b);
|
|
4133
|
-
p2 += s2;
|
|
4134
|
-
}
|
|
2122
|
+
INIT_PTR(lp, 0, p1, s1);
|
|
2123
|
+
INIT_PTR(lp, 1, p2, s2);
|
|
2124
|
+
INIT_PTR(lp, 2, p3, s3);
|
|
2125
|
+
for (; i--;) {
|
|
2126
|
+
GET_DATA_STRIDE(p1, s1, dtype, x);
|
|
2127
|
+
m_modf(x, y, z);
|
|
2128
|
+
SET_DATA_STRIDE(p2, s2, dtype, y);
|
|
2129
|
+
SET_DATA_STRIDE(p3, s3, dtype, z);
|
|
4135
2130
|
}
|
|
4136
2131
|
}
|
|
4137
2132
|
|
|
4138
|
-
static VALUE
|
|
2133
|
+
static VALUE sfloat_modf(VALUE self) {
|
|
4139
2134
|
ndfunc_arg_in_t ain[1] = { { cT, 0 } };
|
|
4140
|
-
ndfunc_arg_out_t aout[
|
|
4141
|
-
ndfunc_t ndf = {
|
|
2135
|
+
ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
|
|
2136
|
+
ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
|
|
4142
2137
|
|
|
4143
2138
|
return na_ndloop(&ndf, 1, self);
|
|
4144
2139
|
}
|