numo-narray-alt 0.9.11 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -1
  3. data/README.md +7 -0
  4. data/ext/numo/narray/numo/narray.h +2 -2
  5. data/ext/numo/narray/numo/types/robj_macro.h +1 -1
  6. data/ext/numo/narray/src/mh/bincount.h +233 -0
  7. data/ext/numo/narray/src/mh/bit/and.h +225 -0
  8. data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
  9. data/ext/numo/narray/src/mh/bit/not.h +173 -0
  10. data/ext/numo/narray/src/mh/bit/or.h +225 -0
  11. data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
  12. data/ext/numo/narray/src/mh/bit/xor.h +225 -0
  13. data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
  14. data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
  15. data/ext/numo/narray/src/mh/comp/eq.h +26 -0
  16. data/ext/numo/narray/src/mh/comp/ge.h +26 -0
  17. data/ext/numo/narray/src/mh/comp/gt.h +26 -0
  18. data/ext/numo/narray/src/mh/comp/le.h +26 -0
  19. data/ext/numo/narray/src/mh/comp/lt.h +26 -0
  20. data/ext/numo/narray/src/mh/comp/ne.h +26 -0
  21. data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
  22. data/ext/numo/narray/src/mh/divmod.h +142 -0
  23. data/ext/numo/narray/src/mh/eye.h +1 -1
  24. data/ext/numo/narray/src/mh/fill.h +94 -0
  25. data/ext/numo/narray/src/mh/format.h +108 -0
  26. data/ext/numo/narray/src/mh/format_to_a.h +89 -0
  27. data/ext/numo/narray/src/mh/inspect.h +33 -0
  28. data/ext/numo/narray/src/mh/isfinite.h +42 -0
  29. data/ext/numo/narray/src/mh/isinf.h +42 -0
  30. data/ext/numo/narray/src/mh/isnan.h +42 -0
  31. data/ext/numo/narray/src/mh/isneginf.h +42 -0
  32. data/ext/numo/narray/src/mh/isposinf.h +42 -0
  33. data/ext/numo/narray/src/mh/math/acos.h +2 -2
  34. data/ext/numo/narray/src/mh/math/acosh.h +2 -2
  35. data/ext/numo/narray/src/mh/math/asin.h +2 -2
  36. data/ext/numo/narray/src/mh/math/asinh.h +2 -2
  37. data/ext/numo/narray/src/mh/math/atan.h +2 -2
  38. data/ext/numo/narray/src/mh/math/atan2.h +3 -3
  39. data/ext/numo/narray/src/mh/math/atanh.h +2 -2
  40. data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
  41. data/ext/numo/narray/src/mh/math/cos.h +2 -2
  42. data/ext/numo/narray/src/mh/math/cosh.h +2 -2
  43. data/ext/numo/narray/src/mh/math/erf.h +2 -2
  44. data/ext/numo/narray/src/mh/math/erfc.h +2 -2
  45. data/ext/numo/narray/src/mh/math/exp.h +2 -2
  46. data/ext/numo/narray/src/mh/math/exp10.h +2 -2
  47. data/ext/numo/narray/src/mh/math/exp2.h +2 -2
  48. data/ext/numo/narray/src/mh/math/expm1.h +2 -2
  49. data/ext/numo/narray/src/mh/math/frexp.h +3 -3
  50. data/ext/numo/narray/src/mh/math/hypot.h +3 -3
  51. data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
  52. data/ext/numo/narray/src/mh/math/log.h +2 -2
  53. data/ext/numo/narray/src/mh/math/log10.h +2 -2
  54. data/ext/numo/narray/src/mh/math/log1p.h +2 -2
  55. data/ext/numo/narray/src/mh/math/log2.h +2 -2
  56. data/ext/numo/narray/src/mh/math/sin.h +2 -2
  57. data/ext/numo/narray/src/mh/math/sinc.h +2 -2
  58. data/ext/numo/narray/src/mh/math/sinh.h +2 -2
  59. data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
  60. data/ext/numo/narray/src/mh/math/tan.h +2 -2
  61. data/ext/numo/narray/src/mh/math/tanh.h +2 -2
  62. data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
  63. data/ext/numo/narray/src/mh/minus.h +125 -0
  64. data/ext/numo/narray/src/mh/op/add.h +78 -0
  65. data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
  66. data/ext/numo/narray/src/mh/op/div.h +118 -0
  67. data/ext/numo/narray/src/mh/op/mod.h +108 -0
  68. data/ext/numo/narray/src/mh/op/mul.h +78 -0
  69. data/ext/numo/narray/src/mh/op/sub.h +78 -0
  70. data/ext/numo/narray/src/mh/pow.h +197 -0
  71. data/ext/numo/narray/src/mh/rand.h +2 -2
  72. data/ext/numo/narray/src/mh/rand_norm.h +125 -0
  73. data/ext/numo/narray/src/mh/reciprocal.h +125 -0
  74. data/ext/numo/narray/src/mh/round/ceil.h +11 -0
  75. data/ext/numo/narray/src/mh/round/floor.h +11 -0
  76. data/ext/numo/narray/src/mh/round/rint.h +9 -0
  77. data/ext/numo/narray/src/mh/round/round.h +11 -0
  78. data/ext/numo/narray/src/mh/round/trunc.h +11 -0
  79. data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
  80. data/ext/numo/narray/src/mh/sign.h +125 -0
  81. data/ext/numo/narray/src/mh/square.h +125 -0
  82. data/ext/numo/narray/src/mh/to_a.h +78 -0
  83. data/ext/numo/narray/src/t_bit.c +45 -234
  84. data/ext/numo/narray/src/t_dcomplex.c +457 -2075
  85. data/ext/numo/narray/src/t_dfloat.c +154 -2560
  86. data/ext/numo/narray/src/t_int16.c +408 -2542
  87. data/ext/numo/narray/src/t_int32.c +408 -2542
  88. data/ext/numo/narray/src/t_int64.c +408 -2542
  89. data/ext/numo/narray/src/t_int8.c +409 -2138
  90. data/ext/numo/narray/src/t_robject.c +376 -2161
  91. data/ext/numo/narray/src/t_scomplex.c +435 -2053
  92. data/ext/numo/narray/src/t_sfloat.c +149 -2557
  93. data/ext/numo/narray/src/t_uint16.c +407 -2537
  94. data/ext/numo/narray/src/t_uint32.c +407 -2537
  95. data/ext/numo/narray/src/t_uint64.c +407 -2537
  96. data/ext/numo/narray/src/t_uint8.c +407 -2132
  97. metadata +47 -2
@@ -42,7 +42,41 @@ static ID id_to_a;
42
42
  VALUE cT;
43
43
  extern VALUE cRT;
44
44
 
45
+ #include "mh/coerce_cast.h"
46
+ #include "mh/to_a.h"
47
+ #include "mh/fill.h"
48
+ #include "mh/format.h"
49
+ #include "mh/format_to_a.h"
50
+ #include "mh/inspect.h"
51
+ #include "mh/op/add.h"
52
+ #include "mh/op/sub.h"
53
+ #include "mh/op/mul.h"
54
+ #include "mh/op/div.h"
55
+ #include "mh/op/mod.h"
56
+ #include "mh/divmod.h"
57
+ #include "mh/pow.h"
58
+ #include "mh/minus.h"
59
+ #include "mh/reciprocal.h"
60
+ #include "mh/sign.h"
61
+ #include "mh/square.h"
62
+ #include "mh/round/floor.h"
63
+ #include "mh/round/round.h"
64
+ #include "mh/round/ceil.h"
65
+ #include "mh/round/trunc.h"
66
+ #include "mh/round/rint.h"
67
+ #include "mh/comp/eq.h"
68
+ #include "mh/comp/ne.h"
69
+ #include "mh/comp/nearly_eq.h"
70
+ #include "mh/comp/gt.h"
71
+ #include "mh/comp/ge.h"
72
+ #include "mh/comp/lt.h"
73
+ #include "mh/comp/le.h"
45
74
  #include "mh/clip.h"
75
+ #include "mh/isnan.h"
76
+ #include "mh/isinf.h"
77
+ #include "mh/isposinf.h"
78
+ #include "mh/isneginf.h"
79
+ #include "mh/isfinite.h"
46
80
  #include "mh/sum.h"
47
81
  #include "mh/prod.h"
48
82
  #include "mh/mean.h"
@@ -66,6 +100,7 @@ extern VALUE cRT;
66
100
  #include "mh/logseq.h"
67
101
  #include "mh/eye.h"
68
102
  #include "mh/rand.h"
103
+ #include "mh/rand_norm.h"
69
104
  #include "mh/math/sqrt.h"
70
105
  #include "mh/math/cbrt.h"
71
106
  #include "mh/math/log.h"
@@ -98,7 +133,48 @@ extern VALUE cRT;
98
133
 
99
134
  typedef double dfloat; // Type aliases for shorter notation
100
135
  // following the codebase naming convention.
136
+ DEF_NARRAY_COERCE_CAST_METHOD_FUNC(dfloat)
137
+ DEF_NARRAY_TO_A_METHOD_FUNC(dfloat)
138
+ DEF_NARRAY_FILL_METHOD_FUNC(dfloat)
139
+ DEF_NARRAY_FORMAT_METHOD_FUNC(dfloat)
140
+ DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(dfloat)
141
+ DEF_NARRAY_INSPECT_METHOD_FUNC(dfloat)
142
+ #ifdef __SSE2__
143
+ DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
144
+ DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
145
+ DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
146
+ DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
147
+ #else
148
+ DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
149
+ DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
150
+ DEF_NARRAY_MUL_METHOD_FUNC(dfloat, numo_cDFloat)
151
+ DEF_NARRAY_FLT_DIV_METHOD_FUNC(dfloat, numo_cDFloat)
152
+ #endif
153
+ DEF_NARRAY_FLT_MOD_METHOD_FUNC(dfloat, numo_cDFloat)
154
+ DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(dfloat, numo_cDFloat)
155
+ DEF_NARRAY_POW_METHOD_FUNC(dfloat, numo_cDFloat)
156
+ DEF_NARRAY_MINUS_METHOD_FUNC(dfloat, numo_cDFloat)
157
+ DEF_NARRAY_RECIPROCAL_METHOD_FUNC(dfloat, numo_cDFloat)
158
+ DEF_NARRAY_SIGN_METHOD_FUNC(dfloat, numo_cDFloat)
159
+ DEF_NARRAY_SQUARE_METHOD_FUNC(dfloat, numo_cDFloat)
160
+ DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(dfloat, numo_cDFloat)
161
+ DEF_NARRAY_FLT_ROUND_METHOD_FUNC(dfloat, numo_cDFloat)
162
+ DEF_NARRAY_FLT_CEIL_METHOD_FUNC(dfloat, numo_cDFloat)
163
+ DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(dfloat, numo_cDFloat)
164
+ DEF_NARRAY_FLT_RINT_METHOD_FUNC(dfloat, numo_cDFloat)
165
+ DEF_NARRAY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
166
+ DEF_NARRAY_NE_METHOD_FUNC(dfloat, numo_cDFloat)
167
+ DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
168
+ DEF_NARRAY_GT_METHOD_FUNC(dfloat, numo_cDFloat)
169
+ DEF_NARRAY_GE_METHOD_FUNC(dfloat, numo_cDFloat)
170
+ DEF_NARRAY_LT_METHOD_FUNC(dfloat, numo_cDFloat)
171
+ DEF_NARRAY_LE_METHOD_FUNC(dfloat, numo_cDFloat)
101
172
  DEF_NARRAY_CLIP_METHOD_FUNC(dfloat, numo_cDFloat)
173
+ DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(dfloat, numo_cDFloat)
174
+ DEF_NARRAY_FLT_ISINF_METHOD_FUNC(dfloat, numo_cDFloat)
175
+ DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(dfloat, numo_cDFloat)
176
+ DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(dfloat, numo_cDFloat)
177
+ DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(dfloat, numo_cDFloat)
102
178
  DEF_NARRAY_FLT_SUM_METHOD_FUNC(dfloat, numo_cDFloat)
103
179
  DEF_NARRAY_FLT_PROD_METHOD_FUNC(dfloat, numo_cDFloat)
104
180
  DEF_NARRAY_FLT_MEAN_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
@@ -122,6 +198,7 @@ DEF_NARRAY_FLT_SEQ_METHOD_FUNC(dfloat)
122
198
  DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(dfloat)
123
199
  DEF_NARRAY_EYE_METHOD_FUNC(dfloat)
124
200
  DEF_NARRAY_FLT_RAND_METHOD_FUNC(dfloat)
201
+ DEF_NARRAY_FLT_RAND_NORM_METHOD_FUNC(dfloat)
125
202
  #ifdef __SSE2__
126
203
  DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
127
204
  #else
@@ -1275,171 +1352,6 @@ static VALUE dfloat_aset(int argc, VALUE* argv, VALUE self) {
1275
1352
  return argv[argc];
1276
1353
  }
1277
1354
 
1278
- static VALUE dfloat_coerce_cast(VALUE self, VALUE type) {
1279
- return Qnil;
1280
- }
1281
-
1282
- static void iter_dfloat_to_a(na_loop_t* const lp) {
1283
- size_t i, s1;
1284
- char* p1;
1285
- size_t* idx1;
1286
- dtype x;
1287
- volatile VALUE a, y;
1288
-
1289
- INIT_COUNTER(lp, i);
1290
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1291
- a = rb_ary_new2(i);
1292
- rb_ary_push(lp->args[1].value, a);
1293
- if (idx1) {
1294
- for (; i--;) {
1295
- GET_DATA_INDEX(p1, idx1, dtype, x);
1296
- y = m_data_to_num(x);
1297
- rb_ary_push(a, y);
1298
- }
1299
- } else {
1300
- for (; i--;) {
1301
- GET_DATA_STRIDE(p1, s1, dtype, x);
1302
- y = m_data_to_num(x);
1303
- rb_ary_push(a, y);
1304
- }
1305
- }
1306
- }
1307
-
1308
- static VALUE dfloat_to_a(VALUE self) {
1309
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1310
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1311
- ndfunc_t ndf = { iter_dfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1312
- return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
1313
- }
1314
-
1315
- static void iter_dfloat_fill(na_loop_t* const lp) {
1316
- size_t i;
1317
- char* p1;
1318
- ssize_t s1;
1319
- size_t* idx1;
1320
- VALUE x = lp->option;
1321
- dtype y;
1322
- INIT_COUNTER(lp, i);
1323
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1324
- y = m_num_to_data(x);
1325
- if (idx1) {
1326
- for (; i--;) {
1327
- SET_DATA_INDEX(p1, idx1, dtype, y);
1328
- }
1329
- } else {
1330
- for (; i--;) {
1331
- SET_DATA_STRIDE(p1, s1, dtype, y);
1332
- }
1333
- }
1334
- }
1335
-
1336
- static VALUE dfloat_fill(VALUE self, VALUE val) {
1337
- ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
1338
- ndfunc_t ndf = { iter_dfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
1339
-
1340
- na_ndloop(&ndf, 2, self, val);
1341
- return self;
1342
- }
1343
-
1344
- static VALUE format_dfloat(VALUE fmt, dtype* x) {
1345
- // fix-me
1346
- char s[48];
1347
- int n;
1348
-
1349
- if (NIL_P(fmt)) {
1350
- n = m_sprintf(s, *x);
1351
- return rb_str_new(s, n);
1352
- }
1353
- return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
1354
- }
1355
-
1356
- static void iter_dfloat_format(na_loop_t* const lp) {
1357
- size_t i;
1358
- char *p1, *p2;
1359
- ssize_t s1, s2;
1360
- size_t* idx1;
1361
- dtype* x;
1362
- VALUE y;
1363
- VALUE fmt = lp->option;
1364
- INIT_COUNTER(lp, i);
1365
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1366
- INIT_PTR(lp, 1, p2, s2);
1367
- if (idx1) {
1368
- for (; i--;) {
1369
- x = (dtype*)(p1 + *idx1);
1370
- idx1++;
1371
- y = format_dfloat(fmt, x);
1372
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1373
- }
1374
- } else {
1375
- for (; i--;) {
1376
- x = (dtype*)p1;
1377
- p1 += s1;
1378
- y = format_dfloat(fmt, x);
1379
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1380
- }
1381
- }
1382
- }
1383
-
1384
- static VALUE dfloat_format(int argc, VALUE* argv, VALUE self) {
1385
- VALUE fmt = Qnil;
1386
-
1387
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
1388
- ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
1389
- ndfunc_t ndf = { iter_dfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
1390
-
1391
- rb_scan_args(argc, argv, "01", &fmt);
1392
- return na_ndloop(&ndf, 2, self, fmt);
1393
- }
1394
-
1395
- static void iter_dfloat_format_to_a(na_loop_t* const lp) {
1396
- size_t i;
1397
- char* p1;
1398
- ssize_t s1;
1399
- size_t* idx1;
1400
- dtype* x;
1401
- VALUE y;
1402
- volatile VALUE a;
1403
- VALUE fmt = lp->option;
1404
- INIT_COUNTER(lp, i);
1405
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1406
- a = rb_ary_new2(i);
1407
- rb_ary_push(lp->args[1].value, a);
1408
- if (idx1) {
1409
- for (; i--;) {
1410
- x = (dtype*)(p1 + *idx1);
1411
- idx1++;
1412
- y = format_dfloat(fmt, x);
1413
- rb_ary_push(a, y);
1414
- }
1415
- } else {
1416
- for (; i--;) {
1417
- x = (dtype*)p1;
1418
- p1 += s1;
1419
- y = format_dfloat(fmt, x);
1420
- rb_ary_push(a, y);
1421
- }
1422
- }
1423
- }
1424
-
1425
- static VALUE dfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
1426
- VALUE fmt = Qnil;
1427
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1428
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1429
- ndfunc_t ndf = { iter_dfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1430
-
1431
- rb_scan_args(argc, argv, "01", &fmt);
1432
- return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
1433
- }
1434
-
1435
- static VALUE iter_dfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
1436
- return format_dfloat(fmt, (dtype*)(ptr + pos));
1437
- }
1438
-
1439
- static VALUE dfloat_inspect(VALUE ary) {
1440
- return na_ndloop_inspect(ary, iter_dfloat_inspect, Qnil);
1441
- }
1442
-
1443
1355
  static void iter_dfloat_each(na_loop_t* const lp) {
1444
1356
  size_t i, s1;
1445
1357
  char* p1;
@@ -1725,22 +1637,12 @@ static VALUE dfloat_abs(VALUE self) {
1725
1637
  #define check_intdivzero(y) \
1726
1638
  {}
1727
1639
 
1728
- static void iter_dfloat_add(na_loop_t* const lp) {
1640
+ static void iter_dfloat_copysign(na_loop_t* const lp) {
1729
1641
  size_t i = 0;
1730
1642
  size_t n;
1731
1643
  char *p1, *p2, *p3;
1732
1644
  ssize_t s1, s2, s3;
1733
1645
 
1734
- #ifdef __SSE2__
1735
- size_t cnt;
1736
- size_t cnt_simd_loop = -1;
1737
-
1738
- __m128d a;
1739
- __m128d b;
1740
-
1741
- size_t num_pack; // Number of elements packed for SIMD.
1742
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1743
- #endif
1744
1646
  INIT_COUNTER(lp, n);
1745
1647
  INIT_PTR(lp, 0, p1, s1);
1746
1648
  INIT_PTR(lp, 1, p2, s2);
@@ -1751,73 +1653,17 @@ static void iter_dfloat_add(na_loop_t* const lp) {
1751
1653
  is_aligned(p3, sizeof(dtype))) {
1752
1654
 
1753
1655
  if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1754
- #ifdef __SSE2__
1755
- // Check number of elements. & Check same alignment.
1756
- if ((n >= num_pack) &&
1757
- is_same_aligned3(
1758
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1759
- )) {
1760
- // Calculate up to the position just before the start of SIMD computation.
1761
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1762
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1763
- );
1764
- #endif
1765
- if (p1 == p3) { // inplace case
1766
- #ifdef __SSE2__
1767
- for (; i < cnt; i++) {
1768
- #else
1656
+ if (p1 == p3) { // inplace case
1769
1657
  for (; i < n; i++) {
1770
1658
  check_intdivzero(((dtype*)p2)[i]);
1771
- #endif
1772
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1773
- }
1774
- } else {
1775
- #ifdef __SSE2__
1776
- for (; i < cnt; i++) {
1777
- #else
1659
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1660
+ }
1661
+ } else {
1778
1662
  for (; i < n; i++) {
1779
1663
  check_intdivzero(((dtype*)p2)[i]);
1780
- #endif
1781
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1782
- }
1783
- }
1784
- #ifdef __SSE2__
1785
- // Get the count of SIMD computation loops.
1786
- cnt_simd_loop = (n - i) % num_pack;
1787
-
1788
- // SIMD computation.
1789
- if (p1 == p3) { // inplace case
1790
- for (; i < n - cnt_simd_loop; i += num_pack) {
1791
- a = _mm_load_pd(&((dtype*)p1)[i]);
1792
- b = _mm_load_pd(&((dtype*)p2)[i]);
1793
- a = _mm_add_pd(a, b);
1794
- _mm_store_pd(&((dtype*)p1)[i], a);
1795
- }
1796
- } else {
1797
- for (; i < n - cnt_simd_loop; i += num_pack) {
1798
- a = _mm_load_pd(&((dtype*)p1)[i]);
1799
- b = _mm_load_pd(&((dtype*)p2)[i]);
1800
- a = _mm_add_pd(a, b);
1801
- _mm_stream_pd(&((dtype*)p3)[i], a);
1802
- }
1803
- }
1804
- }
1805
-
1806
- // Compute the remainder of the SIMD operation.
1807
- if (cnt_simd_loop != 0) {
1808
- if (p1 == p3) { // inplace case
1809
- for (; i < n; i++) {
1810
- check_intdivzero(((dtype*)p2)[i]);
1811
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1812
- }
1813
- } else {
1814
- for (; i < n; i++) {
1815
- check_intdivzero(((dtype*)p2)[i]);
1816
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1817
- }
1664
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1818
1665
  }
1819
1666
  }
1820
- #endif
1821
1667
  return;
1822
1668
  }
1823
1669
 
@@ -1828,71 +1674,18 @@ static void iter_dfloat_add(na_loop_t* const lp) {
1828
1674
  if (s2 == 0) { // Broadcasting from scalar value.
1829
1675
  check_intdivzero(*(dtype*)p2);
1830
1676
  if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
1831
- #ifdef __SSE2__
1832
- // Broadcast a scalar value and use it for SIMD computation.
1833
- b = _mm_load1_pd(&((dtype*)p2)[0]);
1834
-
1835
- // Check number of elements. & Check same alignment.
1836
- if ((n >= num_pack) &&
1837
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
1838
- // Calculate up to the position just before the start of SIMD computation.
1839
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1840
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1841
- );
1842
- #endif
1843
- if (p1 == p3) { // inplace case
1844
- #ifdef __SSE2__
1845
- for (; i < cnt; i++) {
1846
- #else
1847
- for (; i < n; i++) {
1848
- #endif
1849
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1850
- }
1851
- } else {
1852
- #ifdef __SSE2__
1853
- for (; i < cnt; i++) {
1854
- #else
1677
+ if (p1 == p3) { // inplace case
1855
1678
  for (; i < n; i++) {
1856
- #endif
1857
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1858
- }
1859
- }
1860
- #ifdef __SSE2__
1861
- // Get the count of SIMD computation loops.
1862
- cnt_simd_loop = (n - i) % num_pack;
1863
-
1864
- // SIMD computation.
1865
- if (p1 == p3) { // inplace case
1866
- for (; i < n - cnt_simd_loop; i += num_pack) {
1867
- a = _mm_load_pd(&((dtype*)p1)[i]);
1868
- a = _mm_add_pd(a, b);
1869
- _mm_store_pd(&((dtype*)p1)[i], a);
1870
- }
1871
- } else {
1872
- for (; i < n - cnt_simd_loop; i += num_pack) {
1873
- a = _mm_load_pd(&((dtype*)p1)[i]);
1874
- a = _mm_add_pd(a, b);
1875
- _mm_stream_pd(&((dtype*)p3)[i], a);
1876
- }
1679
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
1877
1680
  }
1878
- }
1879
-
1880
- // Compute the remainder of the SIMD operation.
1881
- if (cnt_simd_loop != 0) {
1882
- if (p1 == p3) { // inplace case
1883
- for (; i < n; i++) {
1884
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1885
- }
1886
- } else {
1887
- for (; i < n; i++) {
1888
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1889
- }
1681
+ } else {
1682
+ for (; i < n; i++) {
1683
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
1890
1684
  }
1891
1685
  }
1892
- #endif
1893
1686
  } else {
1894
1687
  for (i = 0; i < n; i++) {
1895
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1688
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
1896
1689
  p1 += s1;
1897
1690
  p3 += s3;
1898
1691
  }
@@ -1901,14 +1694,14 @@ static void iter_dfloat_add(na_loop_t* const lp) {
1901
1694
  if (p1 == p3) { // inplace case
1902
1695
  for (i = 0; i < n; i++) {
1903
1696
  check_intdivzero(*(dtype*)p2);
1904
- *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
1697
+ *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
1905
1698
  p1 += s1;
1906
1699
  p2 += s2;
1907
1700
  }
1908
1701
  } else {
1909
1702
  for (i = 0; i < n; i++) {
1910
1703
  check_intdivzero(*(dtype*)p2);
1911
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1704
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
1912
1705
  p1 += s1;
1913
1706
  p2 += s2;
1914
1707
  p3 += s3;
@@ -1925,2226 +1718,100 @@ static void iter_dfloat_add(na_loop_t* const lp) {
1925
1718
  GET_DATA_STRIDE(p1, s1, dtype, x);
1926
1719
  GET_DATA_STRIDE(p2, s2, dtype, y);
1927
1720
  check_intdivzero(y);
1928
- z = m_add(x, y);
1721
+ z = m_copysign(x, y);
1929
1722
  SET_DATA_STRIDE(p3, s3, dtype, z);
1930
1723
  }
1931
1724
  //
1932
1725
  }
1933
1726
  #undef check_intdivzero
1934
1727
 
1935
- static VALUE dfloat_add_self(VALUE self, VALUE other) {
1728
+ static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
1936
1729
  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1937
1730
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1938
- ndfunc_t ndf = { iter_dfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
1731
+ ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
1939
1732
 
1940
1733
  return na_ndloop(&ndf, 2, self, other);
1941
1734
  }
1942
1735
 
1943
- static VALUE dfloat_add(VALUE self, VALUE other) {
1736
+ static VALUE dfloat_copysign(VALUE self, VALUE other) {
1944
1737
 
1945
1738
  VALUE klass, v;
1946
1739
 
1947
1740
  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
1948
1741
  if (klass == cT) {
1949
- return dfloat_add_self(self, other);
1742
+ return dfloat_copysign_self(self, other);
1950
1743
  } else {
1951
1744
  v = rb_funcall(klass, id_cast, 1, self);
1952
- return rb_funcall(v, '+', 1, other);
1745
+ return rb_funcall(v, id_copysign, 1, other);
1953
1746
  }
1954
1747
  }
1955
1748
 
1956
- #define check_intdivzero(y) \
1957
- {}
1958
-
1959
- static void iter_dfloat_sub(na_loop_t* const lp) {
1960
- size_t i = 0;
1961
- size_t n;
1962
- char *p1, *p2, *p3;
1963
- ssize_t s1, s2, s3;
1749
+ static void iter_dfloat_signbit(na_loop_t* const lp) {
1750
+ size_t i;
1751
+ char* p1;
1752
+ BIT_DIGIT* a2;
1753
+ size_t p2;
1754
+ ssize_t s1, s2;
1755
+ size_t* idx1;
1756
+ dtype x;
1757
+ BIT_DIGIT b;
1758
+ INIT_COUNTER(lp, i);
1759
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1760
+ INIT_PTR_BIT(lp, 1, a2, p2, s2);
1761
+ if (idx1) {
1762
+ for (; i--;) {
1763
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1764
+ b = (m_signbit(x)) ? 1 : 0;
1765
+ STORE_BIT(a2, p2, b);
1766
+ p2 += s2;
1767
+ }
1768
+ } else {
1769
+ for (; i--;) {
1770
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1771
+ b = (m_signbit(x)) ? 1 : 0;
1772
+ STORE_BIT(a2, p2, b);
1773
+ p2 += s2;
1774
+ }
1775
+ }
1776
+ }
1964
1777
 
1965
- #ifdef __SSE2__
1966
- size_t cnt;
1967
- size_t cnt_simd_loop = -1;
1778
+ static VALUE dfloat_signbit(VALUE self) {
1779
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
1780
+ ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
1781
+ ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
1968
1782
 
1969
- __m128d a;
1970
- __m128d b;
1783
+ return na_ndloop(&ndf, 1, self);
1784
+ }
1971
1785
 
1972
- size_t num_pack; // Number of elements packed for SIMD.
1973
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1974
- #endif
1975
- INIT_COUNTER(lp, n);
1786
+ static void iter_dfloat_modf(na_loop_t* const lp) {
1787
+ size_t i;
1788
+ char *p1, *p2, *p3;
1789
+ ssize_t s1, s2, s3;
1790
+ dtype x, y, z;
1791
+ INIT_COUNTER(lp, i);
1976
1792
  INIT_PTR(lp, 0, p1, s1);
1977
1793
  INIT_PTR(lp, 1, p2, s2);
1978
1794
  INIT_PTR(lp, 2, p3, s3);
1795
+ for (; i--;) {
1796
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1797
+ m_modf(x, y, z);
1798
+ SET_DATA_STRIDE(p2, s2, dtype, y);
1799
+ SET_DATA_STRIDE(p3, s3, dtype, z);
1800
+ }
1801
+ }
1979
1802
 
1980
- //
1981
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1982
- is_aligned(p3, sizeof(dtype))) {
1803
+ static VALUE dfloat_modf(VALUE self) {
1804
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
1805
+ ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
1806
+ ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
1983
1807
 
1984
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1985
- #ifdef __SSE2__
1986
- // Check number of elements. & Check same alignment.
1987
- if ((n >= num_pack) &&
1988
- is_same_aligned3(
1989
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1990
- )) {
1991
- // Calculate up to the position just before the start of SIMD computation.
1992
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1993
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1994
- );
1995
- #endif
1996
- if (p1 == p3) { // inplace case
1997
- #ifdef __SSE2__
1998
- for (; i < cnt; i++) {
1999
- #else
2000
- for (; i < n; i++) {
2001
- check_intdivzero(((dtype*)p2)[i]);
2002
- #endif
2003
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2004
- }
2005
- } else {
2006
- #ifdef __SSE2__
2007
- for (; i < cnt; i++) {
2008
- #else
2009
- for (; i < n; i++) {
2010
- check_intdivzero(((dtype*)p2)[i]);
2011
- #endif
2012
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2013
- }
2014
- }
1808
+ return na_ndloop(&ndf, 1, self);
1809
+ }
2015
1810
 
2016
- #ifdef __SSE2__
2017
- // Get the count of SIMD computation loops.
2018
- cnt_simd_loop = (n - i) % num_pack;
2019
-
2020
- // SIMD computation.
2021
- if (p1 == p3) { // inplace case
2022
- for (; i < n - cnt_simd_loop; i += num_pack) {
2023
- a = _mm_load_pd(&((dtype*)p1)[i]);
2024
- b = _mm_load_pd(&((dtype*)p2)[i]);
2025
- a = _mm_sub_pd(a, b);
2026
- _mm_store_pd(&((dtype*)p1)[i], a);
2027
- }
2028
- } else {
2029
- for (; i < n - cnt_simd_loop; i += num_pack) {
2030
- a = _mm_load_pd(&((dtype*)p1)[i]);
2031
- b = _mm_load_pd(&((dtype*)p2)[i]);
2032
- a = _mm_sub_pd(a, b);
2033
- _mm_stream_pd(&((dtype*)p3)[i], a);
2034
- }
2035
- }
2036
- }
2037
-
2038
- // Compute the remainder of the SIMD operation.
2039
- if (cnt_simd_loop != 0) {
2040
- if (p1 == p3) { // inplace case
2041
- for (; i < n; i++) {
2042
- check_intdivzero(((dtype*)p2)[i]);
2043
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2044
- }
2045
- } else {
2046
- for (; i < n; i++) {
2047
- check_intdivzero(((dtype*)p2)[i]);
2048
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2049
- }
2050
- }
2051
- }
2052
- #endif
2053
- return;
2054
- }
2055
-
2056
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2057
- is_aligned_step(s3, sizeof(dtype))) {
2058
- //
2059
-
2060
- if (s2 == 0) { // Broadcasting from scalar value.
2061
- check_intdivzero(*(dtype*)p2);
2062
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2063
- #ifdef __SSE2__
2064
- // Broadcast a scalar value and use it for SIMD computation.
2065
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2066
-
2067
- // Check number of elements. & Check same alignment.
2068
- if ((n >= num_pack) &&
2069
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2070
- // Calculate up to the position just before the start of SIMD computation.
2071
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2072
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2073
- );
2074
- #endif
2075
- if (p1 == p3) { // inplace case
2076
- #ifdef __SSE2__
2077
- for (; i < cnt; i++) {
2078
- #else
2079
- for (; i < n; i++) {
2080
- #endif
2081
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2082
- }
2083
- } else {
2084
- #ifdef __SSE2__
2085
- for (; i < cnt; i++) {
2086
- #else
2087
- for (; i < n; i++) {
2088
- #endif
2089
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2090
- }
2091
- }
2092
-
2093
- #ifdef __SSE2__
2094
- // Get the count of SIMD computation loops.
2095
- cnt_simd_loop = (n - i) % num_pack;
2096
-
2097
- // SIMD computation.
2098
- if (p1 == p3) { // inplace case
2099
- for (; i < n - cnt_simd_loop; i += num_pack) {
2100
- a = _mm_load_pd(&((dtype*)p1)[i]);
2101
- a = _mm_sub_pd(a, b);
2102
- _mm_store_pd(&((dtype*)p1)[i], a);
2103
- }
2104
- } else {
2105
- for (; i < n - cnt_simd_loop; i += num_pack) {
2106
- a = _mm_load_pd(&((dtype*)p1)[i]);
2107
- a = _mm_sub_pd(a, b);
2108
- _mm_stream_pd(&((dtype*)p3)[i], a);
2109
- }
2110
- }
2111
- }
2112
-
2113
- // Compute the remainder of the SIMD operation.
2114
- if (cnt_simd_loop != 0) {
2115
- if (p1 == p3) { // inplace case
2116
- for (; i < n; i++) {
2117
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2118
- }
2119
- } else {
2120
- for (; i < n; i++) {
2121
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2122
- }
2123
- }
2124
- }
2125
- #endif
2126
- } else {
2127
- for (i = 0; i < n; i++) {
2128
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2129
- p1 += s1;
2130
- p3 += s3;
2131
- }
2132
- }
2133
- } else {
2134
- if (p1 == p3) { // inplace case
2135
- for (i = 0; i < n; i++) {
2136
- check_intdivzero(*(dtype*)p2);
2137
- *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
2138
- p1 += s1;
2139
- p2 += s2;
2140
- }
2141
- } else {
2142
- for (i = 0; i < n; i++) {
2143
- check_intdivzero(*(dtype*)p2);
2144
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2145
- p1 += s1;
2146
- p2 += s2;
2147
- p3 += s3;
2148
- }
2149
- }
2150
- }
2151
-
2152
- return;
2153
- //
2154
- }
2155
- }
2156
- for (i = 0; i < n; i++) {
2157
- dtype x, y, z;
2158
- GET_DATA_STRIDE(p1, s1, dtype, x);
2159
- GET_DATA_STRIDE(p2, s2, dtype, y);
2160
- check_intdivzero(y);
2161
- z = m_sub(x, y);
2162
- SET_DATA_STRIDE(p3, s3, dtype, z);
2163
- }
2164
- //
2165
- }
2166
- #undef check_intdivzero
2167
-
2168
- static VALUE dfloat_sub_self(VALUE self, VALUE other) {
2169
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2170
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2171
- ndfunc_t ndf = { iter_dfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
2172
-
2173
- return na_ndloop(&ndf, 2, self, other);
2174
- }
2175
-
2176
- static VALUE dfloat_sub(VALUE self, VALUE other) {
2177
-
2178
- VALUE klass, v;
2179
-
2180
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2181
- if (klass == cT) {
2182
- return dfloat_sub_self(self, other);
2183
- } else {
2184
- v = rb_funcall(klass, id_cast, 1, self);
2185
- return rb_funcall(v, '-', 1, other);
2186
- }
2187
- }
2188
-
2189
- #define check_intdivzero(y) \
2190
- {}
2191
-
2192
- static void iter_dfloat_mul(na_loop_t* const lp) {
2193
- size_t i = 0;
2194
- size_t n;
2195
- char *p1, *p2, *p3;
2196
- ssize_t s1, s2, s3;
2197
-
2198
- #ifdef __SSE2__
2199
- size_t cnt;
2200
- size_t cnt_simd_loop = -1;
2201
-
2202
- __m128d a;
2203
- __m128d b;
2204
-
2205
- size_t num_pack; // Number of elements packed for SIMD.
2206
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2207
- #endif
2208
- INIT_COUNTER(lp, n);
2209
- INIT_PTR(lp, 0, p1, s1);
2210
- INIT_PTR(lp, 1, p2, s2);
2211
- INIT_PTR(lp, 2, p3, s3);
2212
-
2213
- //
2214
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2215
- is_aligned(p3, sizeof(dtype))) {
2216
-
2217
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2218
- #ifdef __SSE2__
2219
- // Check number of elements. & Check same alignment.
2220
- if ((n >= num_pack) &&
2221
- is_same_aligned3(
2222
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2223
- )) {
2224
- // Calculate up to the position just before the start of SIMD computation.
2225
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2226
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2227
- );
2228
- #endif
2229
- if (p1 == p3) { // inplace case
2230
- #ifdef __SSE2__
2231
- for (; i < cnt; i++) {
2232
- #else
2233
- for (; i < n; i++) {
2234
- check_intdivzero(((dtype*)p2)[i]);
2235
- #endif
2236
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2237
- }
2238
- } else {
2239
- #ifdef __SSE2__
2240
- for (; i < cnt; i++) {
2241
- #else
2242
- for (; i < n; i++) {
2243
- check_intdivzero(((dtype*)p2)[i]);
2244
- #endif
2245
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2246
- }
2247
- }
2248
-
2249
- #ifdef __SSE2__
2250
- // Get the count of SIMD computation loops.
2251
- cnt_simd_loop = (n - i) % num_pack;
2252
-
2253
- // SIMD computation.
2254
- if (p1 == p3) { // inplace case
2255
- for (; i < n - cnt_simd_loop; i += num_pack) {
2256
- a = _mm_load_pd(&((dtype*)p1)[i]);
2257
- b = _mm_load_pd(&((dtype*)p2)[i]);
2258
- a = _mm_mul_pd(a, b);
2259
- _mm_store_pd(&((dtype*)p1)[i], a);
2260
- }
2261
- } else {
2262
- for (; i < n - cnt_simd_loop; i += num_pack) {
2263
- a = _mm_load_pd(&((dtype*)p1)[i]);
2264
- b = _mm_load_pd(&((dtype*)p2)[i]);
2265
- a = _mm_mul_pd(a, b);
2266
- _mm_stream_pd(&((dtype*)p3)[i], a);
2267
- }
2268
- }
2269
- }
2270
-
2271
- // Compute the remainder of the SIMD operation.
2272
- if (cnt_simd_loop != 0) {
2273
- if (p1 == p3) { // inplace case
2274
- for (; i < n; i++) {
2275
- check_intdivzero(((dtype*)p2)[i]);
2276
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2277
- }
2278
- } else {
2279
- for (; i < n; i++) {
2280
- check_intdivzero(((dtype*)p2)[i]);
2281
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2282
- }
2283
- }
2284
- }
2285
- #endif
2286
- return;
2287
- }
2288
-
2289
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2290
- is_aligned_step(s3, sizeof(dtype))) {
2291
- //
2292
-
2293
- if (s2 == 0) { // Broadcasting from scalar value.
2294
- check_intdivzero(*(dtype*)p2);
2295
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2296
- #ifdef __SSE2__
2297
- // Broadcast a scalar value and use it for SIMD computation.
2298
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2299
-
2300
- // Check number of elements. & Check same alignment.
2301
- if ((n >= num_pack) &&
2302
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2303
- // Calculate up to the position just before the start of SIMD computation.
2304
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2305
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2306
- );
2307
- #endif
2308
- if (p1 == p3) { // inplace case
2309
- #ifdef __SSE2__
2310
- for (; i < cnt; i++) {
2311
- #else
2312
- for (; i < n; i++) {
2313
- #endif
2314
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2315
- }
2316
- } else {
2317
- #ifdef __SSE2__
2318
- for (; i < cnt; i++) {
2319
- #else
2320
- for (; i < n; i++) {
2321
- #endif
2322
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2323
- }
2324
- }
2325
-
2326
- #ifdef __SSE2__
2327
- // Get the count of SIMD computation loops.
2328
- cnt_simd_loop = (n - i) % num_pack;
2329
-
2330
- // SIMD computation.
2331
- if (p1 == p3) { // inplace case
2332
- for (; i < n - cnt_simd_loop; i += num_pack) {
2333
- a = _mm_load_pd(&((dtype*)p1)[i]);
2334
- a = _mm_mul_pd(a, b);
2335
- _mm_store_pd(&((dtype*)p1)[i], a);
2336
- }
2337
- } else {
2338
- for (; i < n - cnt_simd_loop; i += num_pack) {
2339
- a = _mm_load_pd(&((dtype*)p1)[i]);
2340
- a = _mm_mul_pd(a, b);
2341
- _mm_stream_pd(&((dtype*)p3)[i], a);
2342
- }
2343
- }
2344
- }
2345
-
2346
- // Compute the remainder of the SIMD operation.
2347
- if (cnt_simd_loop != 0) {
2348
- if (p1 == p3) { // inplace case
2349
- for (; i < n; i++) {
2350
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2351
- }
2352
- } else {
2353
- for (; i < n; i++) {
2354
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2355
- }
2356
- }
2357
- }
2358
- #endif
2359
- } else {
2360
- for (i = 0; i < n; i++) {
2361
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2362
- p1 += s1;
2363
- p3 += s3;
2364
- }
2365
- }
2366
- } else {
2367
- if (p1 == p3) { // inplace case
2368
- for (i = 0; i < n; i++) {
2369
- check_intdivzero(*(dtype*)p2);
2370
- *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
2371
- p1 += s1;
2372
- p2 += s2;
2373
- }
2374
- } else {
2375
- for (i = 0; i < n; i++) {
2376
- check_intdivzero(*(dtype*)p2);
2377
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2378
- p1 += s1;
2379
- p2 += s2;
2380
- p3 += s3;
2381
- }
2382
- }
2383
- }
2384
-
2385
- return;
2386
- //
2387
- }
2388
- }
2389
- for (i = 0; i < n; i++) {
2390
- dtype x, y, z;
2391
- GET_DATA_STRIDE(p1, s1, dtype, x);
2392
- GET_DATA_STRIDE(p2, s2, dtype, y);
2393
- check_intdivzero(y);
2394
- z = m_mul(x, y);
2395
- SET_DATA_STRIDE(p3, s3, dtype, z);
2396
- }
2397
- //
2398
- }
2399
- #undef check_intdivzero
2400
-
2401
- static VALUE dfloat_mul_self(VALUE self, VALUE other) {
2402
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2403
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2404
- ndfunc_t ndf = { iter_dfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
2405
-
2406
- return na_ndloop(&ndf, 2, self, other);
2407
- }
2408
-
2409
- static VALUE dfloat_mul(VALUE self, VALUE other) {
2410
-
2411
- VALUE klass, v;
2412
-
2413
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2414
- if (klass == cT) {
2415
- return dfloat_mul_self(self, other);
2416
- } else {
2417
- v = rb_funcall(klass, id_cast, 1, self);
2418
- return rb_funcall(v, '*', 1, other);
2419
- }
2420
- }
2421
-
2422
- #define check_intdivzero(y) \
2423
- {}
2424
-
2425
- static void iter_dfloat_div(na_loop_t* const lp) {
2426
- size_t i = 0;
2427
- size_t n;
2428
- char *p1, *p2, *p3;
2429
- ssize_t s1, s2, s3;
2430
-
2431
- #ifdef __SSE2__
2432
- size_t cnt;
2433
- size_t cnt_simd_loop = -1;
2434
-
2435
- __m128d a;
2436
- __m128d b;
2437
-
2438
- size_t num_pack; // Number of elements packed for SIMD.
2439
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2440
- #endif
2441
- INIT_COUNTER(lp, n);
2442
- INIT_PTR(lp, 0, p1, s1);
2443
- INIT_PTR(lp, 1, p2, s2);
2444
- INIT_PTR(lp, 2, p3, s3);
2445
-
2446
- //
2447
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2448
- is_aligned(p3, sizeof(dtype))) {
2449
-
2450
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2451
- #ifdef __SSE2__
2452
- // Check number of elements. & Check same alignment.
2453
- if ((n >= num_pack) &&
2454
- is_same_aligned3(
2455
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2456
- )) {
2457
- // Calculate up to the position just before the start of SIMD computation.
2458
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2459
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2460
- );
2461
- #endif
2462
- if (p1 == p3) { // inplace case
2463
- #ifdef __SSE2__
2464
- for (; i < cnt; i++) {
2465
- #else
2466
- for (; i < n; i++) {
2467
- #endif
2468
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2469
- }
2470
- } else {
2471
- #ifdef __SSE2__
2472
- for (; i < cnt; i++) {
2473
- #else
2474
- for (; i < n; i++) {
2475
- #endif
2476
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2477
- }
2478
- }
2479
-
2480
- #ifdef __SSE2__
2481
- // Get the count of SIMD computation loops.
2482
- cnt_simd_loop = (n - i) % num_pack;
2483
-
2484
- // SIMD computation.
2485
- if (p1 == p3) { // inplace case
2486
- for (; i < n - cnt_simd_loop; i += num_pack) {
2487
- a = _mm_load_pd(&((dtype*)p1)[i]);
2488
- b = _mm_load_pd(&((dtype*)p2)[i]);
2489
- a = _mm_div_pd(a, b);
2490
- _mm_store_pd(&((dtype*)p1)[i], a);
2491
- }
2492
- } else {
2493
- for (; i < n - cnt_simd_loop; i += num_pack) {
2494
- a = _mm_load_pd(&((dtype*)p1)[i]);
2495
- b = _mm_load_pd(&((dtype*)p2)[i]);
2496
- a = _mm_div_pd(a, b);
2497
- _mm_stream_pd(&((dtype*)p3)[i], a);
2498
- }
2499
- }
2500
- }
2501
-
2502
- // Compute the remainder of the SIMD operation.
2503
- if (cnt_simd_loop != 0) {
2504
- if (p1 == p3) { // inplace case
2505
- for (; i < n; i++) {
2506
- check_intdivzero(((dtype*)p2)[i]);
2507
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2508
- }
2509
- } else {
2510
- for (; i < n; i++) {
2511
- check_intdivzero(((dtype*)p2)[i]);
2512
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2513
- }
2514
- }
2515
- }
2516
- #endif
2517
- return;
2518
- }
2519
-
2520
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2521
- is_aligned_step(s3, sizeof(dtype))) {
2522
- //
2523
-
2524
- if (s2 == 0) { // Broadcasting from scalar value.
2525
- check_intdivzero(*(dtype*)p2);
2526
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2527
- #ifdef __SSE2__
2528
- // Broadcast a scalar value and use it for SIMD computation.
2529
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2530
-
2531
- // Check number of elements. & Check same alignment.
2532
- if ((n >= num_pack) &&
2533
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2534
- // Calculate up to the position just before the start of SIMD computation.
2535
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2536
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2537
- );
2538
- #endif
2539
- if (p1 == p3) { // inplace case
2540
- #ifdef __SSE2__
2541
- for (; i < cnt; i++) {
2542
- #else
2543
- for (; i < n; i++) {
2544
- check_intdivzero(((dtype*)p2)[i]);
2545
- #endif
2546
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2547
- }
2548
- } else {
2549
- #ifdef __SSE2__
2550
- for (; i < cnt; i++) {
2551
- #else
2552
- for (; i < n; i++) {
2553
- check_intdivzero(((dtype*)p2)[i]);
2554
- #endif
2555
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2556
- }
2557
- }
2558
-
2559
- #ifdef __SSE2__
2560
- // Get the count of SIMD computation loops.
2561
- cnt_simd_loop = (n - i) % num_pack;
2562
-
2563
- // SIMD computation.
2564
- if (p1 == p3) { // inplace case
2565
- for (; i < n - cnt_simd_loop; i += num_pack) {
2566
- a = _mm_load_pd(&((dtype*)p1)[i]);
2567
- a = _mm_div_pd(a, b);
2568
- _mm_store_pd(&((dtype*)p1)[i], a);
2569
- }
2570
- } else {
2571
- for (; i < n - cnt_simd_loop; i += num_pack) {
2572
- a = _mm_load_pd(&((dtype*)p1)[i]);
2573
- a = _mm_div_pd(a, b);
2574
- _mm_stream_pd(&((dtype*)p3)[i], a);
2575
- }
2576
- }
2577
- }
2578
-
2579
- // Compute the remainder of the SIMD operation.
2580
- if (cnt_simd_loop != 0) {
2581
- if (p1 == p3) { // inplace case
2582
- for (; i < n; i++) {
2583
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2584
- }
2585
- } else {
2586
- for (; i < n; i++) {
2587
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2588
- }
2589
- }
2590
- }
2591
- #endif
2592
- } else {
2593
- for (i = 0; i < n; i++) {
2594
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2595
- p1 += s1;
2596
- p3 += s3;
2597
- }
2598
- }
2599
- } else {
2600
- if (p1 == p3) { // inplace case
2601
- for (i = 0; i < n; i++) {
2602
- check_intdivzero(*(dtype*)p2);
2603
- *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
2604
- p1 += s1;
2605
- p2 += s2;
2606
- }
2607
- } else {
2608
- for (i = 0; i < n; i++) {
2609
- check_intdivzero(*(dtype*)p2);
2610
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2611
- p1 += s1;
2612
- p2 += s2;
2613
- p3 += s3;
2614
- }
2615
- }
2616
- }
2617
-
2618
- return;
2619
- //
2620
- }
2621
- }
2622
- for (i = 0; i < n; i++) {
2623
- dtype x, y, z;
2624
- GET_DATA_STRIDE(p1, s1, dtype, x);
2625
- GET_DATA_STRIDE(p2, s2, dtype, y);
2626
- check_intdivzero(y);
2627
- z = m_div(x, y);
2628
- SET_DATA_STRIDE(p3, s3, dtype, z);
2629
- }
2630
- //
2631
- }
2632
- #undef check_intdivzero
2633
-
2634
- static VALUE dfloat_div_self(VALUE self, VALUE other) {
2635
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2636
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2637
- ndfunc_t ndf = { iter_dfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
2638
-
2639
- return na_ndloop(&ndf, 2, self, other);
2640
- }
2641
-
2642
- static VALUE dfloat_div(VALUE self, VALUE other) {
2643
-
2644
- VALUE klass, v;
2645
-
2646
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2647
- if (klass == cT) {
2648
- return dfloat_div_self(self, other);
2649
- } else {
2650
- v = rb_funcall(klass, id_cast, 1, self);
2651
- return rb_funcall(v, '/', 1, other);
2652
- }
2653
- }
2654
-
2655
- #define check_intdivzero(y) \
2656
- {}
2657
-
2658
- static void iter_dfloat_mod(na_loop_t* const lp) {
2659
- size_t i = 0;
2660
- size_t n;
2661
- char *p1, *p2, *p3;
2662
- ssize_t s1, s2, s3;
2663
-
2664
- INIT_COUNTER(lp, n);
2665
- INIT_PTR(lp, 0, p1, s1);
2666
- INIT_PTR(lp, 1, p2, s2);
2667
- INIT_PTR(lp, 2, p3, s3);
2668
-
2669
- //
2670
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2671
- is_aligned(p3, sizeof(dtype))) {
2672
-
2673
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2674
- if (p1 == p3) { // inplace case
2675
- for (; i < n; i++) {
2676
- check_intdivzero(((dtype*)p2)[i]);
2677
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2678
- }
2679
- } else {
2680
- for (; i < n; i++) {
2681
- check_intdivzero(((dtype*)p2)[i]);
2682
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2683
- }
2684
- }
2685
- return;
2686
- }
2687
-
2688
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2689
- is_aligned_step(s3, sizeof(dtype))) {
2690
- //
2691
-
2692
- if (s2 == 0) { // Broadcasting from scalar value.
2693
- check_intdivzero(*(dtype*)p2);
2694
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2695
- if (p1 == p3) { // inplace case
2696
- for (; i < n; i++) {
2697
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2698
- }
2699
- } else {
2700
- for (; i < n; i++) {
2701
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2702
- }
2703
- }
2704
- } else {
2705
- for (i = 0; i < n; i++) {
2706
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2707
- p1 += s1;
2708
- p3 += s3;
2709
- }
2710
- }
2711
- } else {
2712
- if (p1 == p3) { // inplace case
2713
- for (i = 0; i < n; i++) {
2714
- check_intdivzero(*(dtype*)p2);
2715
- *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
2716
- p1 += s1;
2717
- p2 += s2;
2718
- }
2719
- } else {
2720
- for (i = 0; i < n; i++) {
2721
- check_intdivzero(*(dtype*)p2);
2722
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2723
- p1 += s1;
2724
- p2 += s2;
2725
- p3 += s3;
2726
- }
2727
- }
2728
- }
2729
-
2730
- return;
2731
- //
2732
- }
2733
- }
2734
- for (i = 0; i < n; i++) {
2735
- dtype x, y, z;
2736
- GET_DATA_STRIDE(p1, s1, dtype, x);
2737
- GET_DATA_STRIDE(p2, s2, dtype, y);
2738
- check_intdivzero(y);
2739
- z = m_mod(x, y);
2740
- SET_DATA_STRIDE(p3, s3, dtype, z);
2741
- }
2742
- //
2743
- }
2744
- #undef check_intdivzero
2745
-
2746
- static VALUE dfloat_mod_self(VALUE self, VALUE other) {
2747
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2748
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2749
- ndfunc_t ndf = { iter_dfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
2750
-
2751
- return na_ndloop(&ndf, 2, self, other);
2752
- }
2753
-
2754
- static VALUE dfloat_mod(VALUE self, VALUE other) {
2755
-
2756
- VALUE klass, v;
2757
-
2758
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2759
- if (klass == cT) {
2760
- return dfloat_mod_self(self, other);
2761
- } else {
2762
- v = rb_funcall(klass, id_cast, 1, self);
2763
- return rb_funcall(v, '%', 1, other);
2764
- }
2765
- }
2766
-
2767
- static void iter_dfloat_divmod(na_loop_t* const lp) {
2768
- size_t i, n;
2769
- char *p1, *p2, *p3, *p4;
2770
- ssize_t s1, s2, s3, s4;
2771
- dtype x, y, a, b;
2772
- INIT_COUNTER(lp, n);
2773
- INIT_PTR(lp, 0, p1, s1);
2774
- INIT_PTR(lp, 1, p2, s2);
2775
- INIT_PTR(lp, 2, p3, s3);
2776
- INIT_PTR(lp, 3, p4, s4);
2777
- for (i = n; i--;) {
2778
- GET_DATA_STRIDE(p1, s1, dtype, x);
2779
- GET_DATA_STRIDE(p2, s2, dtype, y);
2780
- m_divmod(x, y, a, b);
2781
- SET_DATA_STRIDE(p3, s3, dtype, a);
2782
- SET_DATA_STRIDE(p4, s4, dtype, b);
2783
- }
2784
- }
2785
-
2786
- static VALUE dfloat_divmod_self(VALUE self, VALUE other) {
2787
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2788
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2789
- ndfunc_t ndf = { iter_dfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
2790
-
2791
- return na_ndloop(&ndf, 2, self, other);
2792
- }
2793
-
2794
- static VALUE dfloat_divmod(VALUE self, VALUE other) {
2795
-
2796
- VALUE klass, v;
2797
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2798
- if (klass == cT) {
2799
- return dfloat_divmod_self(self, other);
2800
- } else {
2801
- v = rb_funcall(klass, id_cast, 1, self);
2802
- return rb_funcall(v, id_divmod, 1, other);
2803
- }
2804
- }
2805
-
2806
- static void iter_dfloat_pow(na_loop_t* const lp) {
2807
- size_t i;
2808
- char *p1, *p2, *p3;
2809
- ssize_t s1, s2, s3;
2810
- dtype x, y;
2811
- INIT_COUNTER(lp, i);
2812
- INIT_PTR(lp, 0, p1, s1);
2813
- INIT_PTR(lp, 1, p2, s2);
2814
- INIT_PTR(lp, 2, p3, s3);
2815
- for (; i--;) {
2816
- GET_DATA_STRIDE(p1, s1, dtype, x);
2817
- GET_DATA_STRIDE(p2, s2, dtype, y);
2818
- x = m_pow(x, y);
2819
- SET_DATA_STRIDE(p3, s3, dtype, x);
2820
- }
2821
- }
2822
-
2823
- static void iter_dfloat_pow_int32(na_loop_t* const lp) {
2824
- size_t i;
2825
- char *p1, *p2, *p3;
2826
- ssize_t s1, s2, s3;
2827
- dtype x;
2828
- int32_t y;
2829
- INIT_COUNTER(lp, i);
2830
- INIT_PTR(lp, 0, p1, s1);
2831
- INIT_PTR(lp, 1, p2, s2);
2832
- INIT_PTR(lp, 2, p3, s3);
2833
- for (; i--;) {
2834
- GET_DATA_STRIDE(p1, s1, dtype, x);
2835
- GET_DATA_STRIDE(p2, s2, int32_t, y);
2836
- x = m_pow_int(x, y);
2837
- SET_DATA_STRIDE(p3, s3, dtype, x);
2838
- }
2839
- }
2840
-
2841
- static VALUE dfloat_pow_self(VALUE self, VALUE other) {
2842
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2843
- ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
2844
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2845
- ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
2846
- ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
2847
-
2848
- // fixme : use na.integer?
2849
- if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
2850
- return na_ndloop(&ndf_i, 2, self, other);
2851
- } else {
2852
- return na_ndloop(&ndf, 2, self, other);
2853
- }
2854
- }
2855
-
2856
- static VALUE dfloat_pow(VALUE self, VALUE other) {
2857
-
2858
- VALUE klass, v;
2859
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2860
- if (klass == cT) {
2861
- return dfloat_pow_self(self, other);
2862
- } else {
2863
- v = rb_funcall(klass, id_cast, 1, self);
2864
- return rb_funcall(v, id_pow, 1, other);
2865
- }
2866
- }
2867
-
2868
- static void iter_dfloat_minus(na_loop_t* const lp) {
2869
- size_t i, n;
2870
- char *p1, *p2;
2871
- ssize_t s1, s2;
2872
- size_t *idx1, *idx2;
2873
- dtype x;
2874
-
2875
- INIT_COUNTER(lp, n);
2876
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2877
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2878
-
2879
- if (idx1) {
2880
- if (idx2) {
2881
- for (i = 0; i < n; i++) {
2882
- GET_DATA_INDEX(p1, idx1, dtype, x);
2883
- x = m_minus(x);
2884
- SET_DATA_INDEX(p2, idx2, dtype, x);
2885
- }
2886
- } else {
2887
- for (i = 0; i < n; i++) {
2888
- GET_DATA_INDEX(p1, idx1, dtype, x);
2889
- x = m_minus(x);
2890
- SET_DATA_STRIDE(p2, s2, dtype, x);
2891
- }
2892
- }
2893
- } else {
2894
- if (idx2) {
2895
- for (i = 0; i < n; i++) {
2896
- GET_DATA_STRIDE(p1, s1, dtype, x);
2897
- x = m_minus(x);
2898
- SET_DATA_INDEX(p2, idx2, dtype, x);
2899
- }
2900
- } else {
2901
- //
2902
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2903
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2904
- for (i = 0; i < n; i++) {
2905
- ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2906
- }
2907
- return;
2908
- }
2909
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2910
- //
2911
- for (i = 0; i < n; i++) {
2912
- *(dtype*)p2 = m_minus(*(dtype*)p1);
2913
- p1 += s1;
2914
- p2 += s2;
2915
- }
2916
- return;
2917
- //
2918
- }
2919
- }
2920
- for (i = 0; i < n; i++) {
2921
- GET_DATA_STRIDE(p1, s1, dtype, x);
2922
- x = m_minus(x);
2923
- SET_DATA_STRIDE(p2, s2, dtype, x);
2924
- }
2925
- //
2926
- }
2927
- }
2928
- }
2929
-
2930
- static VALUE dfloat_minus(VALUE self) {
2931
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2932
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2933
- ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2934
-
2935
- return na_ndloop(&ndf, 1, self);
2936
- }
2937
-
2938
- static void iter_dfloat_reciprocal(na_loop_t* const lp) {
2939
- size_t i, n;
2940
- char *p1, *p2;
2941
- ssize_t s1, s2;
2942
- size_t *idx1, *idx2;
2943
- dtype x;
2944
-
2945
- INIT_COUNTER(lp, n);
2946
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2947
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2948
-
2949
- if (idx1) {
2950
- if (idx2) {
2951
- for (i = 0; i < n; i++) {
2952
- GET_DATA_INDEX(p1, idx1, dtype, x);
2953
- x = m_reciprocal(x);
2954
- SET_DATA_INDEX(p2, idx2, dtype, x);
2955
- }
2956
- } else {
2957
- for (i = 0; i < n; i++) {
2958
- GET_DATA_INDEX(p1, idx1, dtype, x);
2959
- x = m_reciprocal(x);
2960
- SET_DATA_STRIDE(p2, s2, dtype, x);
2961
- }
2962
- }
2963
- } else {
2964
- if (idx2) {
2965
- for (i = 0; i < n; i++) {
2966
- GET_DATA_STRIDE(p1, s1, dtype, x);
2967
- x = m_reciprocal(x);
2968
- SET_DATA_INDEX(p2, idx2, dtype, x);
2969
- }
2970
- } else {
2971
- //
2972
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2973
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2974
- for (i = 0; i < n; i++) {
2975
- ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2976
- }
2977
- return;
2978
- }
2979
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2980
- //
2981
- for (i = 0; i < n; i++) {
2982
- *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
2983
- p1 += s1;
2984
- p2 += s2;
2985
- }
2986
- return;
2987
- //
2988
- }
2989
- }
2990
- for (i = 0; i < n; i++) {
2991
- GET_DATA_STRIDE(p1, s1, dtype, x);
2992
- x = m_reciprocal(x);
2993
- SET_DATA_STRIDE(p2, s2, dtype, x);
2994
- }
2995
- //
2996
- }
2997
- }
2998
- }
2999
-
3000
- static VALUE dfloat_reciprocal(VALUE self) {
3001
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3002
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3003
- ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
3004
-
3005
- return na_ndloop(&ndf, 1, self);
3006
- }
3007
-
3008
- static void iter_dfloat_sign(na_loop_t* const lp) {
3009
- size_t i, n;
3010
- char *p1, *p2;
3011
- ssize_t s1, s2;
3012
- size_t *idx1, *idx2;
3013
- dtype x;
3014
-
3015
- INIT_COUNTER(lp, n);
3016
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3017
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3018
-
3019
- if (idx1) {
3020
- if (idx2) {
3021
- for (i = 0; i < n; i++) {
3022
- GET_DATA_INDEX(p1, idx1, dtype, x);
3023
- x = m_sign(x);
3024
- SET_DATA_INDEX(p2, idx2, dtype, x);
3025
- }
3026
- } else {
3027
- for (i = 0; i < n; i++) {
3028
- GET_DATA_INDEX(p1, idx1, dtype, x);
3029
- x = m_sign(x);
3030
- SET_DATA_STRIDE(p2, s2, dtype, x);
3031
- }
3032
- }
3033
- } else {
3034
- if (idx2) {
3035
- for (i = 0; i < n; i++) {
3036
- GET_DATA_STRIDE(p1, s1, dtype, x);
3037
- x = m_sign(x);
3038
- SET_DATA_INDEX(p2, idx2, dtype, x);
3039
- }
3040
- } else {
3041
- //
3042
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3043
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3044
- for (i = 0; i < n; i++) {
3045
- ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
3046
- }
3047
- return;
3048
- }
3049
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3050
- //
3051
- for (i = 0; i < n; i++) {
3052
- *(dtype*)p2 = m_sign(*(dtype*)p1);
3053
- p1 += s1;
3054
- p2 += s2;
3055
- }
3056
- return;
3057
- //
3058
- }
3059
- }
3060
- for (i = 0; i < n; i++) {
3061
- GET_DATA_STRIDE(p1, s1, dtype, x);
3062
- x = m_sign(x);
3063
- SET_DATA_STRIDE(p2, s2, dtype, x);
3064
- }
3065
- //
3066
- }
3067
- }
3068
- }
3069
-
3070
- static VALUE dfloat_sign(VALUE self) {
3071
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3072
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3073
- ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3074
-
3075
- return na_ndloop(&ndf, 1, self);
3076
- }
3077
-
3078
- static void iter_dfloat_square(na_loop_t* const lp) {
3079
- size_t i, n;
3080
- char *p1, *p2;
3081
- ssize_t s1, s2;
3082
- size_t *idx1, *idx2;
3083
- dtype x;
3084
-
3085
- INIT_COUNTER(lp, n);
3086
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3087
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3088
-
3089
- if (idx1) {
3090
- if (idx2) {
3091
- for (i = 0; i < n; i++) {
3092
- GET_DATA_INDEX(p1, idx1, dtype, x);
3093
- x = m_square(x);
3094
- SET_DATA_INDEX(p2, idx2, dtype, x);
3095
- }
3096
- } else {
3097
- for (i = 0; i < n; i++) {
3098
- GET_DATA_INDEX(p1, idx1, dtype, x);
3099
- x = m_square(x);
3100
- SET_DATA_STRIDE(p2, s2, dtype, x);
3101
- }
3102
- }
3103
- } else {
3104
- if (idx2) {
3105
- for (i = 0; i < n; i++) {
3106
- GET_DATA_STRIDE(p1, s1, dtype, x);
3107
- x = m_square(x);
3108
- SET_DATA_INDEX(p2, idx2, dtype, x);
3109
- }
3110
- } else {
3111
- //
3112
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3113
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3114
- for (i = 0; i < n; i++) {
3115
- ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
3116
- }
3117
- return;
3118
- }
3119
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3120
- //
3121
- for (i = 0; i < n; i++) {
3122
- *(dtype*)p2 = m_square(*(dtype*)p1);
3123
- p1 += s1;
3124
- p2 += s2;
3125
- }
3126
- return;
3127
- //
3128
- }
3129
- }
3130
- for (i = 0; i < n; i++) {
3131
- GET_DATA_STRIDE(p1, s1, dtype, x);
3132
- x = m_square(x);
3133
- SET_DATA_STRIDE(p2, s2, dtype, x);
3134
- }
3135
- //
3136
- }
3137
- }
3138
- }
3139
-
3140
- static VALUE dfloat_square(VALUE self) {
3141
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3142
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3143
- ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
3144
-
3145
- return na_ndloop(&ndf, 1, self);
3146
- }
3147
-
3148
- static void iter_dfloat_eq(na_loop_t* const lp) {
3149
- size_t i;
3150
- char *p1, *p2;
3151
- BIT_DIGIT* a3;
3152
- size_t p3;
3153
- ssize_t s1, s2, s3;
3154
- dtype x, y;
3155
- BIT_DIGIT b;
3156
- INIT_COUNTER(lp, i);
3157
- INIT_PTR(lp, 0, p1, s1);
3158
- INIT_PTR(lp, 1, p2, s2);
3159
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3160
- for (; i--;) {
3161
- GET_DATA_STRIDE(p1, s1, dtype, x);
3162
- GET_DATA_STRIDE(p2, s2, dtype, y);
3163
- b = (m_eq(x, y)) ? 1 : 0;
3164
- STORE_BIT(a3, p3, b);
3165
- p3 += s3;
3166
- }
3167
- }
3168
-
3169
- static VALUE dfloat_eq_self(VALUE self, VALUE other) {
3170
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3171
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3172
- ndfunc_t ndf = { iter_dfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
3173
-
3174
- return na_ndloop(&ndf, 2, self, other);
3175
- }
3176
-
3177
- static VALUE dfloat_eq(VALUE self, VALUE other) {
3178
-
3179
- VALUE klass, v;
3180
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3181
- if (klass == cT) {
3182
- return dfloat_eq_self(self, other);
3183
- } else {
3184
- v = rb_funcall(klass, id_cast, 1, self);
3185
- return rb_funcall(v, id_eq, 1, other);
3186
- }
3187
- }
3188
-
3189
- static void iter_dfloat_ne(na_loop_t* const lp) {
3190
- size_t i;
3191
- char *p1, *p2;
3192
- BIT_DIGIT* a3;
3193
- size_t p3;
3194
- ssize_t s1, s2, s3;
3195
- dtype x, y;
3196
- BIT_DIGIT b;
3197
- INIT_COUNTER(lp, i);
3198
- INIT_PTR(lp, 0, p1, s1);
3199
- INIT_PTR(lp, 1, p2, s2);
3200
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3201
- for (; i--;) {
3202
- GET_DATA_STRIDE(p1, s1, dtype, x);
3203
- GET_DATA_STRIDE(p2, s2, dtype, y);
3204
- b = (m_ne(x, y)) ? 1 : 0;
3205
- STORE_BIT(a3, p3, b);
3206
- p3 += s3;
3207
- }
3208
- }
3209
-
3210
- static VALUE dfloat_ne_self(VALUE self, VALUE other) {
3211
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3212
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3213
- ndfunc_t ndf = { iter_dfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
3214
-
3215
- return na_ndloop(&ndf, 2, self, other);
3216
- }
3217
-
3218
- static VALUE dfloat_ne(VALUE self, VALUE other) {
3219
-
3220
- VALUE klass, v;
3221
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3222
- if (klass == cT) {
3223
- return dfloat_ne_self(self, other);
3224
- } else {
3225
- v = rb_funcall(klass, id_cast, 1, self);
3226
- return rb_funcall(v, id_ne, 1, other);
3227
- }
3228
- }
3229
-
3230
- static void iter_dfloat_nearly_eq(na_loop_t* const lp) {
3231
- size_t i;
3232
- char *p1, *p2;
3233
- BIT_DIGIT* a3;
3234
- size_t p3;
3235
- ssize_t s1, s2, s3;
3236
- dtype x, y;
3237
- BIT_DIGIT b;
3238
- INIT_COUNTER(lp, i);
3239
- INIT_PTR(lp, 0, p1, s1);
3240
- INIT_PTR(lp, 1, p2, s2);
3241
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3242
- for (; i--;) {
3243
- GET_DATA_STRIDE(p1, s1, dtype, x);
3244
- GET_DATA_STRIDE(p2, s2, dtype, y);
3245
- b = (m_nearly_eq(x, y)) ? 1 : 0;
3246
- STORE_BIT(a3, p3, b);
3247
- p3 += s3;
3248
- }
3249
- }
3250
-
3251
- static VALUE dfloat_nearly_eq_self(VALUE self, VALUE other) {
3252
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3253
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3254
- ndfunc_t ndf = { iter_dfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
3255
-
3256
- return na_ndloop(&ndf, 2, self, other);
3257
- }
3258
-
3259
- static VALUE dfloat_nearly_eq(VALUE self, VALUE other) {
3260
-
3261
- VALUE klass, v;
3262
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3263
- if (klass == cT) {
3264
- return dfloat_nearly_eq_self(self, other);
3265
- } else {
3266
- v = rb_funcall(klass, id_cast, 1, self);
3267
- return rb_funcall(v, id_nearly_eq, 1, other);
3268
- }
3269
- }
3270
-
3271
- static void iter_dfloat_floor(na_loop_t* const lp) {
3272
- size_t i, n;
3273
- char *p1, *p2;
3274
- ssize_t s1, s2;
3275
- size_t *idx1, *idx2;
3276
- dtype x;
3277
-
3278
- INIT_COUNTER(lp, n);
3279
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3280
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3281
-
3282
- if (idx1) {
3283
- if (idx2) {
3284
- for (i = 0; i < n; i++) {
3285
- GET_DATA_INDEX(p1, idx1, dtype, x);
3286
- x = m_floor(x);
3287
- SET_DATA_INDEX(p2, idx2, dtype, x);
3288
- }
3289
- } else {
3290
- for (i = 0; i < n; i++) {
3291
- GET_DATA_INDEX(p1, idx1, dtype, x);
3292
- x = m_floor(x);
3293
- SET_DATA_STRIDE(p2, s2, dtype, x);
3294
- }
3295
- }
3296
- } else {
3297
- if (idx2) {
3298
- for (i = 0; i < n; i++) {
3299
- GET_DATA_STRIDE(p1, s1, dtype, x);
3300
- x = m_floor(x);
3301
- SET_DATA_INDEX(p2, idx2, dtype, x);
3302
- }
3303
- } else {
3304
- //
3305
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3306
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3307
- for (i = 0; i < n; i++) {
3308
- ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
3309
- }
3310
- return;
3311
- }
3312
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3313
- //
3314
- for (i = 0; i < n; i++) {
3315
- *(dtype*)p2 = m_floor(*(dtype*)p1);
3316
- p1 += s1;
3317
- p2 += s2;
3318
- }
3319
- return;
3320
- //
3321
- }
3322
- }
3323
- for (i = 0; i < n; i++) {
3324
- GET_DATA_STRIDE(p1, s1, dtype, x);
3325
- x = m_floor(x);
3326
- SET_DATA_STRIDE(p2, s2, dtype, x);
3327
- }
3328
- //
3329
- }
3330
- }
3331
- }
3332
-
3333
- static VALUE dfloat_floor(VALUE self) {
3334
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3335
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3336
- ndfunc_t ndf = { iter_dfloat_floor, FULL_LOOP, 1, 1, ain, aout };
3337
-
3338
- return na_ndloop(&ndf, 1, self);
3339
- }
3340
-
3341
- static void iter_dfloat_round(na_loop_t* const lp) {
3342
- size_t i, n;
3343
- char *p1, *p2;
3344
- ssize_t s1, s2;
3345
- size_t *idx1, *idx2;
3346
- dtype x;
3347
-
3348
- INIT_COUNTER(lp, n);
3349
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3350
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3351
-
3352
- if (idx1) {
3353
- if (idx2) {
3354
- for (i = 0; i < n; i++) {
3355
- GET_DATA_INDEX(p1, idx1, dtype, x);
3356
- x = m_round(x);
3357
- SET_DATA_INDEX(p2, idx2, dtype, x);
3358
- }
3359
- } else {
3360
- for (i = 0; i < n; i++) {
3361
- GET_DATA_INDEX(p1, idx1, dtype, x);
3362
- x = m_round(x);
3363
- SET_DATA_STRIDE(p2, s2, dtype, x);
3364
- }
3365
- }
3366
- } else {
3367
- if (idx2) {
3368
- for (i = 0; i < n; i++) {
3369
- GET_DATA_STRIDE(p1, s1, dtype, x);
3370
- x = m_round(x);
3371
- SET_DATA_INDEX(p2, idx2, dtype, x);
3372
- }
3373
- } else {
3374
- //
3375
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3376
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3377
- for (i = 0; i < n; i++) {
3378
- ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
3379
- }
3380
- return;
3381
- }
3382
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3383
- //
3384
- for (i = 0; i < n; i++) {
3385
- *(dtype*)p2 = m_round(*(dtype*)p1);
3386
- p1 += s1;
3387
- p2 += s2;
3388
- }
3389
- return;
3390
- //
3391
- }
3392
- }
3393
- for (i = 0; i < n; i++) {
3394
- GET_DATA_STRIDE(p1, s1, dtype, x);
3395
- x = m_round(x);
3396
- SET_DATA_STRIDE(p2, s2, dtype, x);
3397
- }
3398
- //
3399
- }
3400
- }
3401
- }
3402
-
3403
- static VALUE dfloat_round(VALUE self) {
3404
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3405
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3406
- ndfunc_t ndf = { iter_dfloat_round, FULL_LOOP, 1, 1, ain, aout };
3407
-
3408
- return na_ndloop(&ndf, 1, self);
3409
- }
3410
-
3411
- static void iter_dfloat_ceil(na_loop_t* const lp) {
3412
- size_t i, n;
3413
- char *p1, *p2;
3414
- ssize_t s1, s2;
3415
- size_t *idx1, *idx2;
3416
- dtype x;
3417
-
3418
- INIT_COUNTER(lp, n);
3419
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3420
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3421
-
3422
- if (idx1) {
3423
- if (idx2) {
3424
- for (i = 0; i < n; i++) {
3425
- GET_DATA_INDEX(p1, idx1, dtype, x);
3426
- x = m_ceil(x);
3427
- SET_DATA_INDEX(p2, idx2, dtype, x);
3428
- }
3429
- } else {
3430
- for (i = 0; i < n; i++) {
3431
- GET_DATA_INDEX(p1, idx1, dtype, x);
3432
- x = m_ceil(x);
3433
- SET_DATA_STRIDE(p2, s2, dtype, x);
3434
- }
3435
- }
3436
- } else {
3437
- if (idx2) {
3438
- for (i = 0; i < n; i++) {
3439
- GET_DATA_STRIDE(p1, s1, dtype, x);
3440
- x = m_ceil(x);
3441
- SET_DATA_INDEX(p2, idx2, dtype, x);
3442
- }
3443
- } else {
3444
- //
3445
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3446
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3447
- for (i = 0; i < n; i++) {
3448
- ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
3449
- }
3450
- return;
3451
- }
3452
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3453
- //
3454
- for (i = 0; i < n; i++) {
3455
- *(dtype*)p2 = m_ceil(*(dtype*)p1);
3456
- p1 += s1;
3457
- p2 += s2;
3458
- }
3459
- return;
3460
- //
3461
- }
3462
- }
3463
- for (i = 0; i < n; i++) {
3464
- GET_DATA_STRIDE(p1, s1, dtype, x);
3465
- x = m_ceil(x);
3466
- SET_DATA_STRIDE(p2, s2, dtype, x);
3467
- }
3468
- //
3469
- }
3470
- }
3471
- }
3472
-
3473
- static VALUE dfloat_ceil(VALUE self) {
3474
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3475
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3476
- ndfunc_t ndf = { iter_dfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
3477
-
3478
- return na_ndloop(&ndf, 1, self);
3479
- }
3480
-
3481
- static void iter_dfloat_trunc(na_loop_t* const lp) {
3482
- size_t i, n;
3483
- char *p1, *p2;
3484
- ssize_t s1, s2;
3485
- size_t *idx1, *idx2;
3486
- dtype x;
3487
-
3488
- INIT_COUNTER(lp, n);
3489
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3490
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3491
-
3492
- if (idx1) {
3493
- if (idx2) {
3494
- for (i = 0; i < n; i++) {
3495
- GET_DATA_INDEX(p1, idx1, dtype, x);
3496
- x = m_trunc(x);
3497
- SET_DATA_INDEX(p2, idx2, dtype, x);
3498
- }
3499
- } else {
3500
- for (i = 0; i < n; i++) {
3501
- GET_DATA_INDEX(p1, idx1, dtype, x);
3502
- x = m_trunc(x);
3503
- SET_DATA_STRIDE(p2, s2, dtype, x);
3504
- }
3505
- }
3506
- } else {
3507
- if (idx2) {
3508
- for (i = 0; i < n; i++) {
3509
- GET_DATA_STRIDE(p1, s1, dtype, x);
3510
- x = m_trunc(x);
3511
- SET_DATA_INDEX(p2, idx2, dtype, x);
3512
- }
3513
- } else {
3514
- //
3515
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3516
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3517
- for (i = 0; i < n; i++) {
3518
- ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
3519
- }
3520
- return;
3521
- }
3522
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3523
- //
3524
- for (i = 0; i < n; i++) {
3525
- *(dtype*)p2 = m_trunc(*(dtype*)p1);
3526
- p1 += s1;
3527
- p2 += s2;
3528
- }
3529
- return;
3530
- //
3531
- }
3532
- }
3533
- for (i = 0; i < n; i++) {
3534
- GET_DATA_STRIDE(p1, s1, dtype, x);
3535
- x = m_trunc(x);
3536
- SET_DATA_STRIDE(p2, s2, dtype, x);
3537
- }
3538
- //
3539
- }
3540
- }
3541
- }
3542
-
3543
- static VALUE dfloat_trunc(VALUE self) {
3544
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3545
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3546
- ndfunc_t ndf = { iter_dfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
3547
-
3548
- return na_ndloop(&ndf, 1, self);
3549
- }
3550
-
3551
- static void iter_dfloat_rint(na_loop_t* const lp) {
3552
- size_t i, n;
3553
- char *p1, *p2;
3554
- ssize_t s1, s2;
3555
- size_t *idx1, *idx2;
3556
- dtype x;
3557
-
3558
- INIT_COUNTER(lp, n);
3559
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3560
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3561
-
3562
- if (idx1) {
3563
- if (idx2) {
3564
- for (i = 0; i < n; i++) {
3565
- GET_DATA_INDEX(p1, idx1, dtype, x);
3566
- x = m_rint(x);
3567
- SET_DATA_INDEX(p2, idx2, dtype, x);
3568
- }
3569
- } else {
3570
- for (i = 0; i < n; i++) {
3571
- GET_DATA_INDEX(p1, idx1, dtype, x);
3572
- x = m_rint(x);
3573
- SET_DATA_STRIDE(p2, s2, dtype, x);
3574
- }
3575
- }
3576
- } else {
3577
- if (idx2) {
3578
- for (i = 0; i < n; i++) {
3579
- GET_DATA_STRIDE(p1, s1, dtype, x);
3580
- x = m_rint(x);
3581
- SET_DATA_INDEX(p2, idx2, dtype, x);
3582
- }
3583
- } else {
3584
- //
3585
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3586
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3587
- for (i = 0; i < n; i++) {
3588
- ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
3589
- }
3590
- return;
3591
- }
3592
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3593
- //
3594
- for (i = 0; i < n; i++) {
3595
- *(dtype*)p2 = m_rint(*(dtype*)p1);
3596
- p1 += s1;
3597
- p2 += s2;
3598
- }
3599
- return;
3600
- //
3601
- }
3602
- }
3603
- for (i = 0; i < n; i++) {
3604
- GET_DATA_STRIDE(p1, s1, dtype, x);
3605
- x = m_rint(x);
3606
- SET_DATA_STRIDE(p2, s2, dtype, x);
3607
- }
3608
- //
3609
- }
3610
- }
3611
- }
3612
-
3613
- static VALUE dfloat_rint(VALUE self) {
3614
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3615
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3616
- ndfunc_t ndf = { iter_dfloat_rint, FULL_LOOP, 1, 1, ain, aout };
3617
-
3618
- return na_ndloop(&ndf, 1, self);
3619
- }
3620
-
3621
- #define check_intdivzero(y) \
3622
- {}
3623
-
3624
- static void iter_dfloat_copysign(na_loop_t* const lp) {
3625
- size_t i = 0;
3626
- size_t n;
3627
- char *p1, *p2, *p3;
3628
- ssize_t s1, s2, s3;
3629
-
3630
- INIT_COUNTER(lp, n);
3631
- INIT_PTR(lp, 0, p1, s1);
3632
- INIT_PTR(lp, 1, p2, s2);
3633
- INIT_PTR(lp, 2, p3, s3);
3634
-
3635
- //
3636
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
3637
- is_aligned(p3, sizeof(dtype))) {
3638
-
3639
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
3640
- if (p1 == p3) { // inplace case
3641
- for (; i < n; i++) {
3642
- check_intdivzero(((dtype*)p2)[i]);
3643
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3644
- }
3645
- } else {
3646
- for (; i < n; i++) {
3647
- check_intdivzero(((dtype*)p2)[i]);
3648
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3649
- }
3650
- }
3651
- return;
3652
- }
3653
-
3654
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
3655
- is_aligned_step(s3, sizeof(dtype))) {
3656
- //
3657
-
3658
- if (s2 == 0) { // Broadcasting from scalar value.
3659
- check_intdivzero(*(dtype*)p2);
3660
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
3661
- if (p1 == p3) { // inplace case
3662
- for (; i < n; i++) {
3663
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3664
- }
3665
- } else {
3666
- for (; i < n; i++) {
3667
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3668
- }
3669
- }
3670
- } else {
3671
- for (i = 0; i < n; i++) {
3672
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3673
- p1 += s1;
3674
- p3 += s3;
3675
- }
3676
- }
3677
- } else {
3678
- if (p1 == p3) { // inplace case
3679
- for (i = 0; i < n; i++) {
3680
- check_intdivzero(*(dtype*)p2);
3681
- *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3682
- p1 += s1;
3683
- p2 += s2;
3684
- }
3685
- } else {
3686
- for (i = 0; i < n; i++) {
3687
- check_intdivzero(*(dtype*)p2);
3688
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3689
- p1 += s1;
3690
- p2 += s2;
3691
- p3 += s3;
3692
- }
3693
- }
3694
- }
3695
-
3696
- return;
3697
- //
3698
- }
3699
- }
3700
- for (i = 0; i < n; i++) {
3701
- dtype x, y, z;
3702
- GET_DATA_STRIDE(p1, s1, dtype, x);
3703
- GET_DATA_STRIDE(p2, s2, dtype, y);
3704
- check_intdivzero(y);
3705
- z = m_copysign(x, y);
3706
- SET_DATA_STRIDE(p3, s3, dtype, z);
3707
- }
3708
- //
3709
- }
3710
- #undef check_intdivzero
3711
-
3712
- static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
3713
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3714
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3715
- ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
3716
-
3717
- return na_ndloop(&ndf, 2, self, other);
3718
- }
3719
-
3720
- static VALUE dfloat_copysign(VALUE self, VALUE other) {
3721
-
3722
- VALUE klass, v;
3723
-
3724
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3725
- if (klass == cT) {
3726
- return dfloat_copysign_self(self, other);
3727
- } else {
3728
- v = rb_funcall(klass, id_cast, 1, self);
3729
- return rb_funcall(v, id_copysign, 1, other);
3730
- }
3731
- }
3732
-
3733
- static void iter_dfloat_signbit(na_loop_t* const lp) {
3734
- size_t i;
3735
- char* p1;
3736
- BIT_DIGIT* a2;
3737
- size_t p2;
3738
- ssize_t s1, s2;
3739
- size_t* idx1;
3740
- dtype x;
3741
- BIT_DIGIT b;
3742
- INIT_COUNTER(lp, i);
3743
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3744
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3745
- if (idx1) {
3746
- for (; i--;) {
3747
- GET_DATA_INDEX(p1, idx1, dtype, x);
3748
- b = (m_signbit(x)) ? 1 : 0;
3749
- STORE_BIT(a2, p2, b);
3750
- p2 += s2;
3751
- }
3752
- } else {
3753
- for (; i--;) {
3754
- GET_DATA_STRIDE(p1, s1, dtype, x);
3755
- b = (m_signbit(x)) ? 1 : 0;
3756
- STORE_BIT(a2, p2, b);
3757
- p2 += s2;
3758
- }
3759
- }
3760
- }
3761
-
3762
- static VALUE dfloat_signbit(VALUE self) {
3763
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3764
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3765
- ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
3766
-
3767
- return na_ndloop(&ndf, 1, self);
3768
- }
3769
-
3770
- static void iter_dfloat_modf(na_loop_t* const lp) {
3771
- size_t i;
3772
- char *p1, *p2, *p3;
3773
- ssize_t s1, s2, s3;
3774
- dtype x, y, z;
3775
- INIT_COUNTER(lp, i);
3776
- INIT_PTR(lp, 0, p1, s1);
3777
- INIT_PTR(lp, 1, p2, s2);
3778
- INIT_PTR(lp, 2, p3, s3);
3779
- for (; i--;) {
3780
- GET_DATA_STRIDE(p1, s1, dtype, x);
3781
- m_modf(x, y, z);
3782
- SET_DATA_STRIDE(p2, s2, dtype, y);
3783
- SET_DATA_STRIDE(p3, s3, dtype, z);
3784
- }
3785
- }
3786
-
3787
- static VALUE dfloat_modf(VALUE self) {
3788
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3789
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
3790
- ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
3791
-
3792
- return na_ndloop(&ndf, 1, self);
3793
- }
3794
-
3795
- static void iter_dfloat_gt(na_loop_t* const lp) {
3796
- size_t i;
3797
- char *p1, *p2;
3798
- BIT_DIGIT* a3;
3799
- size_t p3;
3800
- ssize_t s1, s2, s3;
3801
- dtype x, y;
3802
- BIT_DIGIT b;
3803
- INIT_COUNTER(lp, i);
3804
- INIT_PTR(lp, 0, p1, s1);
3805
- INIT_PTR(lp, 1, p2, s2);
3806
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3807
- for (; i--;) {
3808
- GET_DATA_STRIDE(p1, s1, dtype, x);
3809
- GET_DATA_STRIDE(p2, s2, dtype, y);
3810
- b = (m_gt(x, y)) ? 1 : 0;
3811
- STORE_BIT(a3, p3, b);
3812
- p3 += s3;
3813
- }
3814
- }
3815
-
3816
- static VALUE dfloat_gt_self(VALUE self, VALUE other) {
3817
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3818
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3819
- ndfunc_t ndf = { iter_dfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
3820
-
3821
- return na_ndloop(&ndf, 2, self, other);
3822
- }
3823
-
3824
- static VALUE dfloat_gt(VALUE self, VALUE other) {
3825
-
3826
- VALUE klass, v;
3827
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3828
- if (klass == cT) {
3829
- return dfloat_gt_self(self, other);
3830
- } else {
3831
- v = rb_funcall(klass, id_cast, 1, self);
3832
- return rb_funcall(v, id_gt, 1, other);
3833
- }
3834
- }
3835
-
3836
- static void iter_dfloat_ge(na_loop_t* const lp) {
3837
- size_t i;
3838
- char *p1, *p2;
3839
- BIT_DIGIT* a3;
3840
- size_t p3;
3841
- ssize_t s1, s2, s3;
3842
- dtype x, y;
3843
- BIT_DIGIT b;
3844
- INIT_COUNTER(lp, i);
3845
- INIT_PTR(lp, 0, p1, s1);
3846
- INIT_PTR(lp, 1, p2, s2);
3847
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3848
- for (; i--;) {
3849
- GET_DATA_STRIDE(p1, s1, dtype, x);
3850
- GET_DATA_STRIDE(p2, s2, dtype, y);
3851
- b = (m_ge(x, y)) ? 1 : 0;
3852
- STORE_BIT(a3, p3, b);
3853
- p3 += s3;
3854
- }
3855
- }
3856
-
3857
- static VALUE dfloat_ge_self(VALUE self, VALUE other) {
3858
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3859
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3860
- ndfunc_t ndf = { iter_dfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
3861
-
3862
- return na_ndloop(&ndf, 2, self, other);
3863
- }
3864
-
3865
- static VALUE dfloat_ge(VALUE self, VALUE other) {
3866
-
3867
- VALUE klass, v;
3868
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3869
- if (klass == cT) {
3870
- return dfloat_ge_self(self, other);
3871
- } else {
3872
- v = rb_funcall(klass, id_cast, 1, self);
3873
- return rb_funcall(v, id_ge, 1, other);
3874
- }
3875
- }
3876
-
3877
- static void iter_dfloat_lt(na_loop_t* const lp) {
3878
- size_t i;
3879
- char *p1, *p2;
3880
- BIT_DIGIT* a3;
3881
- size_t p3;
3882
- ssize_t s1, s2, s3;
3883
- dtype x, y;
3884
- BIT_DIGIT b;
3885
- INIT_COUNTER(lp, i);
3886
- INIT_PTR(lp, 0, p1, s1);
3887
- INIT_PTR(lp, 1, p2, s2);
3888
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3889
- for (; i--;) {
3890
- GET_DATA_STRIDE(p1, s1, dtype, x);
3891
- GET_DATA_STRIDE(p2, s2, dtype, y);
3892
- b = (m_lt(x, y)) ? 1 : 0;
3893
- STORE_BIT(a3, p3, b);
3894
- p3 += s3;
3895
- }
3896
- }
3897
-
3898
- static VALUE dfloat_lt_self(VALUE self, VALUE other) {
3899
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3900
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3901
- ndfunc_t ndf = { iter_dfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
3902
-
3903
- return na_ndloop(&ndf, 2, self, other);
3904
- }
3905
-
3906
- static VALUE dfloat_lt(VALUE self, VALUE other) {
3907
-
3908
- VALUE klass, v;
3909
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3910
- if (klass == cT) {
3911
- return dfloat_lt_self(self, other);
3912
- } else {
3913
- v = rb_funcall(klass, id_cast, 1, self);
3914
- return rb_funcall(v, id_lt, 1, other);
3915
- }
3916
- }
3917
-
3918
- static void iter_dfloat_le(na_loop_t* const lp) {
3919
- size_t i;
3920
- char *p1, *p2;
3921
- BIT_DIGIT* a3;
3922
- size_t p3;
3923
- ssize_t s1, s2, s3;
3924
- dtype x, y;
3925
- BIT_DIGIT b;
3926
- INIT_COUNTER(lp, i);
3927
- INIT_PTR(lp, 0, p1, s1);
3928
- INIT_PTR(lp, 1, p2, s2);
3929
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3930
- for (; i--;) {
3931
- GET_DATA_STRIDE(p1, s1, dtype, x);
3932
- GET_DATA_STRIDE(p2, s2, dtype, y);
3933
- b = (m_le(x, y)) ? 1 : 0;
3934
- STORE_BIT(a3, p3, b);
3935
- p3 += s3;
3936
- }
3937
- }
3938
-
3939
- static VALUE dfloat_le_self(VALUE self, VALUE other) {
3940
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3941
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3942
- ndfunc_t ndf = { iter_dfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
3943
-
3944
- return na_ndloop(&ndf, 2, self, other);
3945
- }
3946
-
3947
- static VALUE dfloat_le(VALUE self, VALUE other) {
3948
-
3949
- VALUE klass, v;
3950
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3951
- if (klass == cT) {
3952
- return dfloat_le_self(self, other);
3953
- } else {
3954
- v = rb_funcall(klass, id_cast, 1, self);
3955
- return rb_funcall(v, id_le, 1, other);
3956
- }
3957
- }
3958
-
3959
- static void iter_dfloat_isnan(na_loop_t* const lp) {
3960
- size_t i;
3961
- char* p1;
3962
- BIT_DIGIT* a2;
3963
- size_t p2;
3964
- ssize_t s1, s2;
3965
- size_t* idx1;
3966
- dtype x;
3967
- BIT_DIGIT b;
3968
- INIT_COUNTER(lp, i);
3969
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3970
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3971
- if (idx1) {
3972
- for (; i--;) {
3973
- GET_DATA_INDEX(p1, idx1, dtype, x);
3974
- b = (m_isnan(x)) ? 1 : 0;
3975
- STORE_BIT(a2, p2, b);
3976
- p2 += s2;
3977
- }
3978
- } else {
3979
- for (; i--;) {
3980
- GET_DATA_STRIDE(p1, s1, dtype, x);
3981
- b = (m_isnan(x)) ? 1 : 0;
3982
- STORE_BIT(a2, p2, b);
3983
- p2 += s2;
3984
- }
3985
- }
3986
- }
3987
-
3988
- static VALUE dfloat_isnan(VALUE self) {
3989
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3990
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3991
- ndfunc_t ndf = { iter_dfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
3992
-
3993
- return na_ndloop(&ndf, 1, self);
3994
- }
3995
-
3996
- static void iter_dfloat_isinf(na_loop_t* const lp) {
3997
- size_t i;
3998
- char* p1;
3999
- BIT_DIGIT* a2;
4000
- size_t p2;
4001
- ssize_t s1, s2;
4002
- size_t* idx1;
4003
- dtype x;
4004
- BIT_DIGIT b;
4005
- INIT_COUNTER(lp, i);
4006
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4007
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4008
- if (idx1) {
4009
- for (; i--;) {
4010
- GET_DATA_INDEX(p1, idx1, dtype, x);
4011
- b = (m_isinf(x)) ? 1 : 0;
4012
- STORE_BIT(a2, p2, b);
4013
- p2 += s2;
4014
- }
4015
- } else {
4016
- for (; i--;) {
4017
- GET_DATA_STRIDE(p1, s1, dtype, x);
4018
- b = (m_isinf(x)) ? 1 : 0;
4019
- STORE_BIT(a2, p2, b);
4020
- p2 += s2;
4021
- }
4022
- }
4023
- }
4024
-
4025
- static VALUE dfloat_isinf(VALUE self) {
4026
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4027
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4028
- ndfunc_t ndf = { iter_dfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
4029
-
4030
- return na_ndloop(&ndf, 1, self);
4031
- }
4032
-
4033
- static void iter_dfloat_isposinf(na_loop_t* const lp) {
4034
- size_t i;
4035
- char* p1;
4036
- BIT_DIGIT* a2;
4037
- size_t p2;
4038
- ssize_t s1, s2;
4039
- size_t* idx1;
4040
- dtype x;
4041
- BIT_DIGIT b;
4042
- INIT_COUNTER(lp, i);
4043
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4044
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4045
- if (idx1) {
4046
- for (; i--;) {
4047
- GET_DATA_INDEX(p1, idx1, dtype, x);
4048
- b = (m_isposinf(x)) ? 1 : 0;
4049
- STORE_BIT(a2, p2, b);
4050
- p2 += s2;
4051
- }
4052
- } else {
4053
- for (; i--;) {
4054
- GET_DATA_STRIDE(p1, s1, dtype, x);
4055
- b = (m_isposinf(x)) ? 1 : 0;
4056
- STORE_BIT(a2, p2, b);
4057
- p2 += s2;
4058
- }
4059
- }
4060
- }
4061
-
4062
- static VALUE dfloat_isposinf(VALUE self) {
4063
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4064
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4065
- ndfunc_t ndf = { iter_dfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
4066
-
4067
- return na_ndloop(&ndf, 1, self);
4068
- }
4069
-
4070
- static void iter_dfloat_isneginf(na_loop_t* const lp) {
4071
- size_t i;
4072
- char* p1;
4073
- BIT_DIGIT* a2;
4074
- size_t p2;
4075
- ssize_t s1, s2;
4076
- size_t* idx1;
4077
- dtype x;
4078
- BIT_DIGIT b;
4079
- INIT_COUNTER(lp, i);
4080
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4081
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4082
- if (idx1) {
4083
- for (; i--;) {
4084
- GET_DATA_INDEX(p1, idx1, dtype, x);
4085
- b = (m_isneginf(x)) ? 1 : 0;
4086
- STORE_BIT(a2, p2, b);
4087
- p2 += s2;
4088
- }
4089
- } else {
4090
- for (; i--;) {
4091
- GET_DATA_STRIDE(p1, s1, dtype, x);
4092
- b = (m_isneginf(x)) ? 1 : 0;
4093
- STORE_BIT(a2, p2, b);
4094
- p2 += s2;
4095
- }
4096
- }
4097
- }
4098
-
4099
- static VALUE dfloat_isneginf(VALUE self) {
4100
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4101
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4102
- ndfunc_t ndf = { iter_dfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
4103
-
4104
- return na_ndloop(&ndf, 1, self);
4105
- }
4106
-
4107
- static void iter_dfloat_isfinite(na_loop_t* const lp) {
4108
- size_t i;
4109
- char* p1;
4110
- BIT_DIGIT* a2;
4111
- size_t p2;
4112
- ssize_t s1, s2;
4113
- size_t* idx1;
4114
- dtype x;
4115
- BIT_DIGIT b;
4116
- INIT_COUNTER(lp, i);
4117
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4118
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4119
- if (idx1) {
4120
- for (; i--;) {
4121
- GET_DATA_INDEX(p1, idx1, dtype, x);
4122
- b = (m_isfinite(x)) ? 1 : 0;
4123
- STORE_BIT(a2, p2, b);
4124
- p2 += s2;
4125
- }
4126
- } else {
4127
- for (; i--;) {
4128
- GET_DATA_STRIDE(p1, s1, dtype, x);
4129
- b = (m_isfinite(x)) ? 1 : 0;
4130
- STORE_BIT(a2, p2, b);
4131
- p2 += s2;
4132
- }
4133
- }
4134
- }
4135
-
4136
- static VALUE dfloat_isfinite(VALUE self) {
4137
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4138
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4139
- ndfunc_t ndf = { iter_dfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
4140
-
4141
- return na_ndloop(&ndf, 1, self);
4142
- }
4143
-
4144
- static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
4145
- size_t n;
4146
- char *p1, *p2;
4147
- ssize_t s1;
1811
+ static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
1812
+ size_t n;
1813
+ char *p1, *p2;
1814
+ ssize_t s1;
4148
1815
 
4149
1816
  INIT_COUNTER(lp, n);
4150
1817
  INIT_PTR(lp, 0, p1, s1);
@@ -4177,79 +1844,6 @@ static VALUE dfloat_kahan_sum(int argc, VALUE* argv, VALUE self) {
4177
1844
  return dfloat_extract(v);
4178
1845
  }
4179
1846
 
4180
- typedef struct {
4181
- dtype mu;
4182
- rtype sigma;
4183
- } randn_opt_t;
4184
-
4185
- static void iter_dfloat_rand_norm(na_loop_t* const lp) {
4186
- size_t i;
4187
- char* p1;
4188
- ssize_t s1;
4189
- size_t* idx1;
4190
-
4191
- dtype *a0, *a1;
4192
-
4193
- dtype mu;
4194
- rtype sigma;
4195
- randn_opt_t* g;
4196
-
4197
- INIT_COUNTER(lp, i);
4198
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4199
- g = (randn_opt_t*)(lp->opt_ptr);
4200
- mu = g->mu;
4201
- sigma = g->sigma;
4202
-
4203
- if (idx1) {
4204
-
4205
- for (; i > 1; i -= 2) {
4206
- a0 = (dtype*)(p1 + *idx1);
4207
- a1 = (dtype*)(p1 + *(idx1 + 1));
4208
- m_rand_norm(mu, sigma, a0, a1);
4209
- idx1 += 2;
4210
- }
4211
- if (i > 0) {
4212
- a0 = (dtype*)(p1 + *idx1);
4213
- m_rand_norm(mu, sigma, a0, 0);
4214
- }
4215
-
4216
- } else {
4217
-
4218
- for (; i > 1; i -= 2) {
4219
- a0 = (dtype*)(p1);
4220
- a1 = (dtype*)(p1 + s1);
4221
- m_rand_norm(mu, sigma, a0, a1);
4222
- p1 += s1 * 2;
4223
- }
4224
- if (i > 0) {
4225
- a0 = (dtype*)(p1);
4226
- m_rand_norm(mu, sigma, a0, 0);
4227
- }
4228
- }
4229
- }
4230
-
4231
- static VALUE dfloat_rand_norm(int argc, VALUE* args, VALUE self) {
4232
- int n;
4233
- randn_opt_t g;
4234
- VALUE v1 = Qnil, v2 = Qnil;
4235
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
4236
- ndfunc_t ndf = { iter_dfloat_rand_norm, FULL_LOOP, 1, 0, ain, 0 };
4237
-
4238
- n = rb_scan_args(argc, args, "02", &v1, &v2);
4239
- if (n == 0) {
4240
- g.mu = m_zero;
4241
- } else {
4242
- g.mu = m_num_to_data(v1);
4243
- }
4244
- if (n == 2) {
4245
- g.sigma = NUM2DBL(v2);
4246
- } else {
4247
- g.sigma = 1;
4248
- }
4249
- na_ndloop3(&ndf, &g, 1, self);
4250
- return self;
4251
- }
4252
-
4253
1847
  static void iter_dfloat_poly(na_loop_t* const lp) {
4254
1848
  size_t i;
4255
1849
  dtype x, y, a;