numo-narray-alt 0.9.11 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -1
  3. data/README.md +7 -0
  4. data/ext/numo/narray/numo/narray.h +2 -2
  5. data/ext/numo/narray/numo/types/robj_macro.h +1 -1
  6. data/ext/numo/narray/src/mh/bincount.h +233 -0
  7. data/ext/numo/narray/src/mh/bit/and.h +225 -0
  8. data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
  9. data/ext/numo/narray/src/mh/bit/not.h +173 -0
  10. data/ext/numo/narray/src/mh/bit/or.h +225 -0
  11. data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
  12. data/ext/numo/narray/src/mh/bit/xor.h +225 -0
  13. data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
  14. data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
  15. data/ext/numo/narray/src/mh/comp/eq.h +26 -0
  16. data/ext/numo/narray/src/mh/comp/ge.h +26 -0
  17. data/ext/numo/narray/src/mh/comp/gt.h +26 -0
  18. data/ext/numo/narray/src/mh/comp/le.h +26 -0
  19. data/ext/numo/narray/src/mh/comp/lt.h +26 -0
  20. data/ext/numo/narray/src/mh/comp/ne.h +26 -0
  21. data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
  22. data/ext/numo/narray/src/mh/divmod.h +142 -0
  23. data/ext/numo/narray/src/mh/eye.h +1 -1
  24. data/ext/numo/narray/src/mh/fill.h +94 -0
  25. data/ext/numo/narray/src/mh/format.h +108 -0
  26. data/ext/numo/narray/src/mh/format_to_a.h +89 -0
  27. data/ext/numo/narray/src/mh/inspect.h +33 -0
  28. data/ext/numo/narray/src/mh/isfinite.h +42 -0
  29. data/ext/numo/narray/src/mh/isinf.h +42 -0
  30. data/ext/numo/narray/src/mh/isnan.h +42 -0
  31. data/ext/numo/narray/src/mh/isneginf.h +42 -0
  32. data/ext/numo/narray/src/mh/isposinf.h +42 -0
  33. data/ext/numo/narray/src/mh/math/acos.h +2 -2
  34. data/ext/numo/narray/src/mh/math/acosh.h +2 -2
  35. data/ext/numo/narray/src/mh/math/asin.h +2 -2
  36. data/ext/numo/narray/src/mh/math/asinh.h +2 -2
  37. data/ext/numo/narray/src/mh/math/atan.h +2 -2
  38. data/ext/numo/narray/src/mh/math/atan2.h +3 -3
  39. data/ext/numo/narray/src/mh/math/atanh.h +2 -2
  40. data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
  41. data/ext/numo/narray/src/mh/math/cos.h +2 -2
  42. data/ext/numo/narray/src/mh/math/cosh.h +2 -2
  43. data/ext/numo/narray/src/mh/math/erf.h +2 -2
  44. data/ext/numo/narray/src/mh/math/erfc.h +2 -2
  45. data/ext/numo/narray/src/mh/math/exp.h +2 -2
  46. data/ext/numo/narray/src/mh/math/exp10.h +2 -2
  47. data/ext/numo/narray/src/mh/math/exp2.h +2 -2
  48. data/ext/numo/narray/src/mh/math/expm1.h +2 -2
  49. data/ext/numo/narray/src/mh/math/frexp.h +3 -3
  50. data/ext/numo/narray/src/mh/math/hypot.h +3 -3
  51. data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
  52. data/ext/numo/narray/src/mh/math/log.h +2 -2
  53. data/ext/numo/narray/src/mh/math/log10.h +2 -2
  54. data/ext/numo/narray/src/mh/math/log1p.h +2 -2
  55. data/ext/numo/narray/src/mh/math/log2.h +2 -2
  56. data/ext/numo/narray/src/mh/math/sin.h +2 -2
  57. data/ext/numo/narray/src/mh/math/sinc.h +2 -2
  58. data/ext/numo/narray/src/mh/math/sinh.h +2 -2
  59. data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
  60. data/ext/numo/narray/src/mh/math/tan.h +2 -2
  61. data/ext/numo/narray/src/mh/math/tanh.h +2 -2
  62. data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
  63. data/ext/numo/narray/src/mh/op/add.h +78 -0
  64. data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
  65. data/ext/numo/narray/src/mh/op/div.h +118 -0
  66. data/ext/numo/narray/src/mh/op/mod.h +108 -0
  67. data/ext/numo/narray/src/mh/op/mul.h +78 -0
  68. data/ext/numo/narray/src/mh/op/sub.h +78 -0
  69. data/ext/numo/narray/src/mh/rand.h +2 -2
  70. data/ext/numo/narray/src/mh/round/ceil.h +11 -0
  71. data/ext/numo/narray/src/mh/round/floor.h +11 -0
  72. data/ext/numo/narray/src/mh/round/rint.h +9 -0
  73. data/ext/numo/narray/src/mh/round/round.h +11 -0
  74. data/ext/numo/narray/src/mh/round/trunc.h +11 -0
  75. data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
  76. data/ext/numo/narray/src/mh/to_a.h +78 -0
  77. data/ext/numo/narray/src/t_bit.c +45 -234
  78. data/ext/numo/narray/src/t_dcomplex.c +584 -1809
  79. data/ext/numo/narray/src/t_dfloat.c +429 -2432
  80. data/ext/numo/narray/src/t_int16.c +481 -2283
  81. data/ext/numo/narray/src/t_int32.c +481 -2283
  82. data/ext/numo/narray/src/t_int64.c +481 -2283
  83. data/ext/numo/narray/src/t_int8.c +408 -1873
  84. data/ext/numo/narray/src/t_robject.c +448 -1977
  85. data/ext/numo/narray/src/t_scomplex.c +584 -1809
  86. data/ext/numo/narray/src/t_sfloat.c +429 -2434
  87. data/ext/numo/narray/src/t_uint16.c +480 -2278
  88. data/ext/numo/narray/src/t_uint32.c +480 -2278
  89. data/ext/numo/narray/src/t_uint64.c +480 -2278
  90. data/ext/numo/narray/src/t_uint8.c +407 -1868
  91. metadata +41 -2
@@ -42,7 +42,36 @@ static ID id_to_a;
42
42
  VALUE cT;
43
43
  extern VALUE cRT;
44
44
 
45
+ #include "mh/coerce_cast.h"
46
+ #include "mh/to_a.h"
47
+ #include "mh/fill.h"
48
+ #include "mh/format.h"
49
+ #include "mh/format_to_a.h"
50
+ #include "mh/inspect.h"
51
+ #include "mh/op/add.h"
52
+ #include "mh/op/sub.h"
53
+ #include "mh/op/mul.h"
54
+ #include "mh/op/div.h"
55
+ #include "mh/op/mod.h"
56
+ #include "mh/divmod.h"
57
+ #include "mh/round/floor.h"
58
+ #include "mh/round/round.h"
59
+ #include "mh/round/ceil.h"
60
+ #include "mh/round/trunc.h"
61
+ #include "mh/round/rint.h"
62
+ #include "mh/comp/eq.h"
63
+ #include "mh/comp/ne.h"
64
+ #include "mh/comp/nearly_eq.h"
65
+ #include "mh/comp/gt.h"
66
+ #include "mh/comp/ge.h"
67
+ #include "mh/comp/lt.h"
68
+ #include "mh/comp/le.h"
45
69
  #include "mh/clip.h"
70
+ #include "mh/isnan.h"
71
+ #include "mh/isinf.h"
72
+ #include "mh/isposinf.h"
73
+ #include "mh/isneginf.h"
74
+ #include "mh/isfinite.h"
46
75
  #include "mh/sum.h"
47
76
  #include "mh/prod.h"
48
77
  #include "mh/mean.h"
@@ -98,7 +127,43 @@ extern VALUE cRT;
98
127
 
99
128
  typedef double dfloat; // Type aliases for shorter notation
100
129
  // following the codebase naming convention.
130
+ DEF_NARRAY_COERCE_CAST_METHOD_FUNC(dfloat)
131
+ DEF_NARRAY_TO_A_METHOD_FUNC(dfloat)
132
+ DEF_NARRAY_FILL_METHOD_FUNC(dfloat)
133
+ DEF_NARRAY_FORMAT_METHOD_FUNC(dfloat)
134
+ DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(dfloat)
135
+ DEF_NARRAY_INSPECT_METHOD_FUNC(dfloat)
136
+ #ifdef __SSE2__
137
+ DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
138
+ DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
139
+ DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
140
+ DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
141
+ #else
142
+ DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
143
+ DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
144
+ DEF_NARRAY_MUL_METHOD_FUNC(dfloat, numo_cDFloat)
145
+ DEF_NARRAY_FLT_DIV_METHOD_FUNC(dfloat, numo_cDFloat)
146
+ #endif
147
+ DEF_NARRAY_FLT_MOD_METHOD_FUNC(dfloat, numo_cDFloat)
148
+ DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(dfloat, numo_cDFloat)
149
+ DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(dfloat, numo_cDFloat)
150
+ DEF_NARRAY_FLT_ROUND_METHOD_FUNC(dfloat, numo_cDFloat)
151
+ DEF_NARRAY_FLT_CEIL_METHOD_FUNC(dfloat, numo_cDFloat)
152
+ DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(dfloat, numo_cDFloat)
153
+ DEF_NARRAY_FLT_RINT_METHOD_FUNC(dfloat, numo_cDFloat)
154
+ DEF_NARRAY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
155
+ DEF_NARRAY_NE_METHOD_FUNC(dfloat, numo_cDFloat)
156
+ DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
157
+ DEF_NARRAY_GT_METHOD_FUNC(dfloat, numo_cDFloat)
158
+ DEF_NARRAY_GE_METHOD_FUNC(dfloat, numo_cDFloat)
159
+ DEF_NARRAY_LT_METHOD_FUNC(dfloat, numo_cDFloat)
160
+ DEF_NARRAY_LE_METHOD_FUNC(dfloat, numo_cDFloat)
101
161
  DEF_NARRAY_CLIP_METHOD_FUNC(dfloat, numo_cDFloat)
162
+ DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(dfloat, numo_cDFloat)
163
+ DEF_NARRAY_FLT_ISINF_METHOD_FUNC(dfloat, numo_cDFloat)
164
+ DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(dfloat, numo_cDFloat)
165
+ DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(dfloat, numo_cDFloat)
166
+ DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(dfloat, numo_cDFloat)
102
167
  DEF_NARRAY_FLT_SUM_METHOD_FUNC(dfloat, numo_cDFloat)
103
168
  DEF_NARRAY_FLT_PROD_METHOD_FUNC(dfloat, numo_cDFloat)
104
169
  DEF_NARRAY_FLT_MEAN_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
@@ -1275,171 +1340,6 @@ static VALUE dfloat_aset(int argc, VALUE* argv, VALUE self) {
1275
1340
  return argv[argc];
1276
1341
  }
1277
1342
 
1278
- static VALUE dfloat_coerce_cast(VALUE self, VALUE type) {
1279
- return Qnil;
1280
- }
1281
-
1282
- static void iter_dfloat_to_a(na_loop_t* const lp) {
1283
- size_t i, s1;
1284
- char* p1;
1285
- size_t* idx1;
1286
- dtype x;
1287
- volatile VALUE a, y;
1288
-
1289
- INIT_COUNTER(lp, i);
1290
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1291
- a = rb_ary_new2(i);
1292
- rb_ary_push(lp->args[1].value, a);
1293
- if (idx1) {
1294
- for (; i--;) {
1295
- GET_DATA_INDEX(p1, idx1, dtype, x);
1296
- y = m_data_to_num(x);
1297
- rb_ary_push(a, y);
1298
- }
1299
- } else {
1300
- for (; i--;) {
1301
- GET_DATA_STRIDE(p1, s1, dtype, x);
1302
- y = m_data_to_num(x);
1303
- rb_ary_push(a, y);
1304
- }
1305
- }
1306
- }
1307
-
1308
- static VALUE dfloat_to_a(VALUE self) {
1309
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1310
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1311
- ndfunc_t ndf = { iter_dfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1312
- return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
1313
- }
1314
-
1315
- static void iter_dfloat_fill(na_loop_t* const lp) {
1316
- size_t i;
1317
- char* p1;
1318
- ssize_t s1;
1319
- size_t* idx1;
1320
- VALUE x = lp->option;
1321
- dtype y;
1322
- INIT_COUNTER(lp, i);
1323
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1324
- y = m_num_to_data(x);
1325
- if (idx1) {
1326
- for (; i--;) {
1327
- SET_DATA_INDEX(p1, idx1, dtype, y);
1328
- }
1329
- } else {
1330
- for (; i--;) {
1331
- SET_DATA_STRIDE(p1, s1, dtype, y);
1332
- }
1333
- }
1334
- }
1335
-
1336
- static VALUE dfloat_fill(VALUE self, VALUE val) {
1337
- ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
1338
- ndfunc_t ndf = { iter_dfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
1339
-
1340
- na_ndloop(&ndf, 2, self, val);
1341
- return self;
1342
- }
1343
-
1344
- static VALUE format_dfloat(VALUE fmt, dtype* x) {
1345
- // fix-me
1346
- char s[48];
1347
- int n;
1348
-
1349
- if (NIL_P(fmt)) {
1350
- n = m_sprintf(s, *x);
1351
- return rb_str_new(s, n);
1352
- }
1353
- return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
1354
- }
1355
-
1356
- static void iter_dfloat_format(na_loop_t* const lp) {
1357
- size_t i;
1358
- char *p1, *p2;
1359
- ssize_t s1, s2;
1360
- size_t* idx1;
1361
- dtype* x;
1362
- VALUE y;
1363
- VALUE fmt = lp->option;
1364
- INIT_COUNTER(lp, i);
1365
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1366
- INIT_PTR(lp, 1, p2, s2);
1367
- if (idx1) {
1368
- for (; i--;) {
1369
- x = (dtype*)(p1 + *idx1);
1370
- idx1++;
1371
- y = format_dfloat(fmt, x);
1372
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1373
- }
1374
- } else {
1375
- for (; i--;) {
1376
- x = (dtype*)p1;
1377
- p1 += s1;
1378
- y = format_dfloat(fmt, x);
1379
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1380
- }
1381
- }
1382
- }
1383
-
1384
- static VALUE dfloat_format(int argc, VALUE* argv, VALUE self) {
1385
- VALUE fmt = Qnil;
1386
-
1387
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
1388
- ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
1389
- ndfunc_t ndf = { iter_dfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
1390
-
1391
- rb_scan_args(argc, argv, "01", &fmt);
1392
- return na_ndloop(&ndf, 2, self, fmt);
1393
- }
1394
-
1395
- static void iter_dfloat_format_to_a(na_loop_t* const lp) {
1396
- size_t i;
1397
- char* p1;
1398
- ssize_t s1;
1399
- size_t* idx1;
1400
- dtype* x;
1401
- VALUE y;
1402
- volatile VALUE a;
1403
- VALUE fmt = lp->option;
1404
- INIT_COUNTER(lp, i);
1405
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1406
- a = rb_ary_new2(i);
1407
- rb_ary_push(lp->args[1].value, a);
1408
- if (idx1) {
1409
- for (; i--;) {
1410
- x = (dtype*)(p1 + *idx1);
1411
- idx1++;
1412
- y = format_dfloat(fmt, x);
1413
- rb_ary_push(a, y);
1414
- }
1415
- } else {
1416
- for (; i--;) {
1417
- x = (dtype*)p1;
1418
- p1 += s1;
1419
- y = format_dfloat(fmt, x);
1420
- rb_ary_push(a, y);
1421
- }
1422
- }
1423
- }
1424
-
1425
- static VALUE dfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
1426
- VALUE fmt = Qnil;
1427
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1428
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1429
- ndfunc_t ndf = { iter_dfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1430
-
1431
- rb_scan_args(argc, argv, "01", &fmt);
1432
- return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
1433
- }
1434
-
1435
- static VALUE iter_dfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
1436
- return format_dfloat(fmt, (dtype*)(ptr + pos));
1437
- }
1438
-
1439
- static VALUE dfloat_inspect(VALUE ary) {
1440
- return na_ndloop_inspect(ary, iter_dfloat_inspect, Qnil);
1441
- }
1442
-
1443
1343
  static void iter_dfloat_each(na_loop_t* const lp) {
1444
1344
  size_t i, s1;
1445
1345
  char* p1;
@@ -1722,2352 +1622,461 @@ static VALUE dfloat_abs(VALUE self) {
1722
1622
  return na_ndloop(&ndf, 1, self);
1723
1623
  }
1724
1624
 
1725
- #define check_intdivzero(y) \
1726
- {}
1727
-
1728
- static void iter_dfloat_add(na_loop_t* const lp) {
1729
- size_t i = 0;
1730
- size_t n;
1625
+ static void iter_dfloat_pow(na_loop_t* const lp) {
1626
+ size_t i;
1731
1627
  char *p1, *p2, *p3;
1732
1628
  ssize_t s1, s2, s3;
1733
-
1734
- #ifdef __SSE2__
1735
- size_t cnt;
1736
- size_t cnt_simd_loop = -1;
1737
-
1738
- __m128d a;
1739
- __m128d b;
1740
-
1741
- size_t num_pack; // Number of elements packed for SIMD.
1742
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1743
- #endif
1744
- INIT_COUNTER(lp, n);
1629
+ dtype x, y;
1630
+ INIT_COUNTER(lp, i);
1745
1631
  INIT_PTR(lp, 0, p1, s1);
1746
1632
  INIT_PTR(lp, 1, p2, s2);
1747
1633
  INIT_PTR(lp, 2, p3, s3);
1748
-
1749
- //
1750
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1751
- is_aligned(p3, sizeof(dtype))) {
1752
-
1753
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1754
- #ifdef __SSE2__
1755
- // Check number of elements. & Check same alignment.
1756
- if ((n >= num_pack) &&
1757
- is_same_aligned3(
1758
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1759
- )) {
1760
- // Calculate up to the position just before the start of SIMD computation.
1761
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1762
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1763
- );
1764
- #endif
1765
- if (p1 == p3) { // inplace case
1766
- #ifdef __SSE2__
1767
- for (; i < cnt; i++) {
1768
- #else
1769
- for (; i < n; i++) {
1770
- check_intdivzero(((dtype*)p2)[i]);
1771
- #endif
1772
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1773
- }
1774
- } else {
1775
- #ifdef __SSE2__
1776
- for (; i < cnt; i++) {
1777
- #else
1778
- for (; i < n; i++) {
1779
- check_intdivzero(((dtype*)p2)[i]);
1780
- #endif
1781
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1782
- }
1783
- }
1784
- #ifdef __SSE2__
1785
- // Get the count of SIMD computation loops.
1786
- cnt_simd_loop = (n - i) % num_pack;
1787
-
1788
- // SIMD computation.
1789
- if (p1 == p3) { // inplace case
1790
- for (; i < n - cnt_simd_loop; i += num_pack) {
1791
- a = _mm_load_pd(&((dtype*)p1)[i]);
1792
- b = _mm_load_pd(&((dtype*)p2)[i]);
1793
- a = _mm_add_pd(a, b);
1794
- _mm_store_pd(&((dtype*)p1)[i], a);
1795
- }
1796
- } else {
1797
- for (; i < n - cnt_simd_loop; i += num_pack) {
1798
- a = _mm_load_pd(&((dtype*)p1)[i]);
1799
- b = _mm_load_pd(&((dtype*)p2)[i]);
1800
- a = _mm_add_pd(a, b);
1801
- _mm_stream_pd(&((dtype*)p3)[i], a);
1802
- }
1803
- }
1804
- }
1805
-
1806
- // Compute the remainder of the SIMD operation.
1807
- if (cnt_simd_loop != 0) {
1808
- if (p1 == p3) { // inplace case
1809
- for (; i < n; i++) {
1810
- check_intdivzero(((dtype*)p2)[i]);
1811
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1812
- }
1813
- } else {
1814
- for (; i < n; i++) {
1815
- check_intdivzero(((dtype*)p2)[i]);
1816
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1817
- }
1818
- }
1819
- }
1820
- #endif
1821
- return;
1822
- }
1823
-
1824
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
1825
- is_aligned_step(s3, sizeof(dtype))) {
1826
- //
1827
-
1828
- if (s2 == 0) { // Broadcasting from scalar value.
1829
- check_intdivzero(*(dtype*)p2);
1830
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
1831
- #ifdef __SSE2__
1832
- // Broadcast a scalar value and use it for SIMD computation.
1833
- b = _mm_load1_pd(&((dtype*)p2)[0]);
1834
-
1835
- // Check number of elements. & Check same alignment.
1836
- if ((n >= num_pack) &&
1837
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
1838
- // Calculate up to the position just before the start of SIMD computation.
1839
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1840
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1841
- );
1842
- #endif
1843
- if (p1 == p3) { // inplace case
1844
- #ifdef __SSE2__
1845
- for (; i < cnt; i++) {
1846
- #else
1847
- for (; i < n; i++) {
1848
- #endif
1849
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1850
- }
1851
- } else {
1852
- #ifdef __SSE2__
1853
- for (; i < cnt; i++) {
1854
- #else
1855
- for (; i < n; i++) {
1856
- #endif
1857
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1858
- }
1859
- }
1860
- #ifdef __SSE2__
1861
- // Get the count of SIMD computation loops.
1862
- cnt_simd_loop = (n - i) % num_pack;
1863
-
1864
- // SIMD computation.
1865
- if (p1 == p3) { // inplace case
1866
- for (; i < n - cnt_simd_loop; i += num_pack) {
1867
- a = _mm_load_pd(&((dtype*)p1)[i]);
1868
- a = _mm_add_pd(a, b);
1869
- _mm_store_pd(&((dtype*)p1)[i], a);
1870
- }
1871
- } else {
1872
- for (; i < n - cnt_simd_loop; i += num_pack) {
1873
- a = _mm_load_pd(&((dtype*)p1)[i]);
1874
- a = _mm_add_pd(a, b);
1875
- _mm_stream_pd(&((dtype*)p3)[i], a);
1876
- }
1877
- }
1878
- }
1879
-
1880
- // Compute the remainder of the SIMD operation.
1881
- if (cnt_simd_loop != 0) {
1882
- if (p1 == p3) { // inplace case
1883
- for (; i < n; i++) {
1884
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1885
- }
1886
- } else {
1887
- for (; i < n; i++) {
1888
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1889
- }
1890
- }
1891
- }
1892
- #endif
1893
- } else {
1894
- for (i = 0; i < n; i++) {
1895
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1896
- p1 += s1;
1897
- p3 += s3;
1898
- }
1899
- }
1900
- } else {
1901
- if (p1 == p3) { // inplace case
1902
- for (i = 0; i < n; i++) {
1903
- check_intdivzero(*(dtype*)p2);
1904
- *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
1905
- p1 += s1;
1906
- p2 += s2;
1907
- }
1908
- } else {
1909
- for (i = 0; i < n; i++) {
1910
- check_intdivzero(*(dtype*)p2);
1911
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1912
- p1 += s1;
1913
- p2 += s2;
1914
- p3 += s3;
1915
- }
1916
- }
1917
- }
1918
-
1919
- return;
1920
- //
1921
- }
1922
- }
1923
- for (i = 0; i < n; i++) {
1924
- dtype x, y, z;
1634
+ for (; i--;) {
1925
1635
  GET_DATA_STRIDE(p1, s1, dtype, x);
1926
1636
  GET_DATA_STRIDE(p2, s2, dtype, y);
1927
- check_intdivzero(y);
1928
- z = m_add(x, y);
1929
- SET_DATA_STRIDE(p3, s3, dtype, z);
1637
+ x = m_pow(x, y);
1638
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1639
+ }
1640
+ }
1641
+
1642
+ static void iter_dfloat_pow_int32(na_loop_t* const lp) {
1643
+ size_t i;
1644
+ char *p1, *p2, *p3;
1645
+ ssize_t s1, s2, s3;
1646
+ dtype x;
1647
+ int32_t y;
1648
+ INIT_COUNTER(lp, i);
1649
+ INIT_PTR(lp, 0, p1, s1);
1650
+ INIT_PTR(lp, 1, p2, s2);
1651
+ INIT_PTR(lp, 2, p3, s3);
1652
+ for (; i--;) {
1653
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1654
+ GET_DATA_STRIDE(p2, s2, int32_t, y);
1655
+ x = m_pow_int(x, y);
1656
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1930
1657
  }
1931
- //
1932
1658
  }
1933
- #undef check_intdivzero
1934
1659
 
1935
- static VALUE dfloat_add_self(VALUE self, VALUE other) {
1660
+ static VALUE dfloat_pow_self(VALUE self, VALUE other) {
1936
1661
  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1662
+ ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
1937
1663
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1938
- ndfunc_t ndf = { iter_dfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
1664
+ ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
1665
+ ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
1939
1666
 
1940
- return na_ndloop(&ndf, 2, self, other);
1667
+ // fixme : use na.integer?
1668
+ if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
1669
+ return na_ndloop(&ndf_i, 2, self, other);
1670
+ } else {
1671
+ return na_ndloop(&ndf, 2, self, other);
1672
+ }
1941
1673
  }
1942
1674
 
1943
- static VALUE dfloat_add(VALUE self, VALUE other) {
1675
+ static VALUE dfloat_pow(VALUE self, VALUE other) {
1944
1676
 
1945
1677
  VALUE klass, v;
1946
-
1947
1678
  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
1948
1679
  if (klass == cT) {
1949
- return dfloat_add_self(self, other);
1680
+ return dfloat_pow_self(self, other);
1950
1681
  } else {
1951
1682
  v = rb_funcall(klass, id_cast, 1, self);
1952
- return rb_funcall(v, '+', 1, other);
1683
+ return rb_funcall(v, id_pow, 1, other);
1953
1684
  }
1954
1685
  }
1955
1686
 
1956
- #define check_intdivzero(y) \
1957
- {}
1687
+ static void iter_dfloat_minus(na_loop_t* const lp) {
1688
+ size_t i, n;
1689
+ char *p1, *p2;
1690
+ ssize_t s1, s2;
1691
+ size_t *idx1, *idx2;
1692
+ dtype x;
1958
1693
 
1959
- static void iter_dfloat_sub(na_loop_t* const lp) {
1960
- size_t i = 0;
1961
- size_t n;
1962
- char *p1, *p2, *p3;
1963
- ssize_t s1, s2, s3;
1964
-
1965
- #ifdef __SSE2__
1966
- size_t cnt;
1967
- size_t cnt_simd_loop = -1;
1968
-
1969
- __m128d a;
1970
- __m128d b;
1971
-
1972
- size_t num_pack; // Number of elements packed for SIMD.
1973
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1974
- #endif
1975
1694
  INIT_COUNTER(lp, n);
1976
- INIT_PTR(lp, 0, p1, s1);
1977
- INIT_PTR(lp, 1, p2, s2);
1978
- INIT_PTR(lp, 2, p3, s3);
1979
-
1980
- //
1981
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1982
- is_aligned(p3, sizeof(dtype))) {
1983
-
1984
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1985
- #ifdef __SSE2__
1986
- // Check number of elements. & Check same alignment.
1987
- if ((n >= num_pack) &&
1988
- is_same_aligned3(
1989
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1990
- )) {
1991
- // Calculate up to the position just before the start of SIMD computation.
1992
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1993
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1994
- );
1995
- #endif
1996
- if (p1 == p3) { // inplace case
1997
- #ifdef __SSE2__
1998
- for (; i < cnt; i++) {
1999
- #else
2000
- for (; i < n; i++) {
2001
- check_intdivzero(((dtype*)p2)[i]);
2002
- #endif
2003
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2004
- }
2005
- } else {
2006
- #ifdef __SSE2__
2007
- for (; i < cnt; i++) {
2008
- #else
2009
- for (; i < n; i++) {
2010
- check_intdivzero(((dtype*)p2)[i]);
2011
- #endif
2012
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2013
- }
2014
- }
2015
-
2016
- #ifdef __SSE2__
2017
- // Get the count of SIMD computation loops.
2018
- cnt_simd_loop = (n - i) % num_pack;
1695
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1696
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2019
1697
 
2020
- // SIMD computation.
2021
- if (p1 == p3) { // inplace case
2022
- for (; i < n - cnt_simd_loop; i += num_pack) {
2023
- a = _mm_load_pd(&((dtype*)p1)[i]);
2024
- b = _mm_load_pd(&((dtype*)p2)[i]);
2025
- a = _mm_sub_pd(a, b);
2026
- _mm_store_pd(&((dtype*)p1)[i], a);
2027
- }
2028
- } else {
2029
- for (; i < n - cnt_simd_loop; i += num_pack) {
2030
- a = _mm_load_pd(&((dtype*)p1)[i]);
2031
- b = _mm_load_pd(&((dtype*)p2)[i]);
2032
- a = _mm_sub_pd(a, b);
2033
- _mm_stream_pd(&((dtype*)p3)[i], a);
2034
- }
2035
- }
1698
+ if (idx1) {
1699
+ if (idx2) {
1700
+ for (i = 0; i < n; i++) {
1701
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1702
+ x = m_minus(x);
1703
+ SET_DATA_INDEX(p2, idx2, dtype, x);
2036
1704
  }
2037
-
2038
- // Compute the remainder of the SIMD operation.
2039
- if (cnt_simd_loop != 0) {
2040
- if (p1 == p3) { // inplace case
2041
- for (; i < n; i++) {
2042
- check_intdivzero(((dtype*)p2)[i]);
2043
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2044
- }
2045
- } else {
2046
- for (; i < n; i++) {
2047
- check_intdivzero(((dtype*)p2)[i]);
2048
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2049
- }
2050
- }
1705
+ } else {
1706
+ for (i = 0; i < n; i++) {
1707
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1708
+ x = m_minus(x);
1709
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2051
1710
  }
2052
- #endif
2053
- return;
2054
1711
  }
2055
-
2056
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2057
- is_aligned_step(s3, sizeof(dtype))) {
1712
+ } else {
1713
+ if (idx2) {
1714
+ for (i = 0; i < n; i++) {
1715
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1716
+ x = m_minus(x);
1717
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1718
+ }
1719
+ } else {
2058
1720
  //
2059
-
2060
- if (s2 == 0) { // Broadcasting from scalar value.
2061
- check_intdivzero(*(dtype*)p2);
2062
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2063
- #ifdef __SSE2__
2064
- // Broadcast a scalar value and use it for SIMD computation.
2065
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2066
-
2067
- // Check number of elements. & Check same alignment.
2068
- if ((n >= num_pack) &&
2069
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2070
- // Calculate up to the position just before the start of SIMD computation.
2071
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2072
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2073
- );
2074
- #endif
2075
- if (p1 == p3) { // inplace case
2076
- #ifdef __SSE2__
2077
- for (; i < cnt; i++) {
2078
- #else
2079
- for (; i < n; i++) {
2080
- #endif
2081
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2082
- }
2083
- } else {
2084
- #ifdef __SSE2__
2085
- for (; i < cnt; i++) {
2086
- #else
2087
- for (; i < n; i++) {
2088
- #endif
2089
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2090
- }
2091
- }
2092
-
2093
- #ifdef __SSE2__
2094
- // Get the count of SIMD computation loops.
2095
- cnt_simd_loop = (n - i) % num_pack;
2096
-
2097
- // SIMD computation.
2098
- if (p1 == p3) { // inplace case
2099
- for (; i < n - cnt_simd_loop; i += num_pack) {
2100
- a = _mm_load_pd(&((dtype*)p1)[i]);
2101
- a = _mm_sub_pd(a, b);
2102
- _mm_store_pd(&((dtype*)p1)[i], a);
2103
- }
2104
- } else {
2105
- for (; i < n - cnt_simd_loop; i += num_pack) {
2106
- a = _mm_load_pd(&((dtype*)p1)[i]);
2107
- a = _mm_sub_pd(a, b);
2108
- _mm_stream_pd(&((dtype*)p3)[i], a);
2109
- }
2110
- }
2111
- }
2112
-
2113
- // Compute the remainder of the SIMD operation.
2114
- if (cnt_simd_loop != 0) {
2115
- if (p1 == p3) { // inplace case
2116
- for (; i < n; i++) {
2117
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2118
- }
2119
- } else {
2120
- for (; i < n; i++) {
2121
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2122
- }
2123
- }
2124
- }
2125
- #endif
2126
- } else {
1721
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1722
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2127
1723
  for (i = 0; i < n; i++) {
2128
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2129
- p1 += s1;
2130
- p3 += s3;
1724
+ ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2131
1725
  }
1726
+ return;
2132
1727
  }
2133
- } else {
2134
- if (p1 == p3) { // inplace case
2135
- for (i = 0; i < n; i++) {
2136
- check_intdivzero(*(dtype*)p2);
2137
- *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
2138
- p1 += s1;
2139
- p2 += s2;
2140
- }
2141
- } else {
1728
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1729
+ //
2142
1730
  for (i = 0; i < n; i++) {
2143
- check_intdivzero(*(dtype*)p2);
2144
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
1731
+ *(dtype*)p2 = m_minus(*(dtype*)p1);
2145
1732
  p1 += s1;
2146
1733
  p2 += s2;
2147
- p3 += s3;
2148
1734
  }
1735
+ return;
1736
+ //
2149
1737
  }
2150
1738
  }
2151
-
2152
- return;
1739
+ for (i = 0; i < n; i++) {
1740
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1741
+ x = m_minus(x);
1742
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1743
+ }
2153
1744
  //
2154
1745
  }
2155
1746
  }
2156
- for (i = 0; i < n; i++) {
2157
- dtype x, y, z;
2158
- GET_DATA_STRIDE(p1, s1, dtype, x);
2159
- GET_DATA_STRIDE(p2, s2, dtype, y);
2160
- check_intdivzero(y);
2161
- z = m_sub(x, y);
2162
- SET_DATA_STRIDE(p3, s3, dtype, z);
2163
- }
2164
- //
2165
1747
  }
2166
- #undef check_intdivzero
2167
1748
 
2168
- static VALUE dfloat_sub_self(VALUE self, VALUE other) {
2169
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1749
+ static VALUE dfloat_minus(VALUE self) {
1750
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2170
1751
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2171
- ndfunc_t ndf = { iter_dfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
2172
-
2173
- return na_ndloop(&ndf, 2, self, other);
2174
- }
2175
-
2176
- static VALUE dfloat_sub(VALUE self, VALUE other) {
2177
-
2178
- VALUE klass, v;
1752
+ ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2179
1753
 
2180
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2181
- if (klass == cT) {
2182
- return dfloat_sub_self(self, other);
2183
- } else {
2184
- v = rb_funcall(klass, id_cast, 1, self);
2185
- return rb_funcall(v, '-', 1, other);
2186
- }
1754
+ return na_ndloop(&ndf, 1, self);
2187
1755
  }
2188
1756
 
2189
- #define check_intdivzero(y) \
2190
- {}
2191
-
2192
- static void iter_dfloat_mul(na_loop_t* const lp) {
2193
- size_t i = 0;
2194
- size_t n;
2195
- char *p1, *p2, *p3;
2196
- ssize_t s1, s2, s3;
2197
-
2198
- #ifdef __SSE2__
2199
- size_t cnt;
2200
- size_t cnt_simd_loop = -1;
2201
-
2202
- __m128d a;
2203
- __m128d b;
1757
+ static void iter_dfloat_reciprocal(na_loop_t* const lp) {
1758
+ size_t i, n;
1759
+ char *p1, *p2;
1760
+ ssize_t s1, s2;
1761
+ size_t *idx1, *idx2;
1762
+ dtype x;
2204
1763
 
2205
- size_t num_pack; // Number of elements packed for SIMD.
2206
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2207
- #endif
2208
1764
  INIT_COUNTER(lp, n);
2209
- INIT_PTR(lp, 0, p1, s1);
2210
- INIT_PTR(lp, 1, p2, s2);
2211
- INIT_PTR(lp, 2, p3, s3);
2212
-
2213
- //
2214
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2215
- is_aligned(p3, sizeof(dtype))) {
1765
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1766
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2216
1767
 
2217
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2218
- #ifdef __SSE2__
2219
- // Check number of elements. & Check same alignment.
2220
- if ((n >= num_pack) &&
2221
- is_same_aligned3(
2222
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2223
- )) {
2224
- // Calculate up to the position just before the start of SIMD computation.
2225
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2226
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2227
- );
2228
- #endif
2229
- if (p1 == p3) { // inplace case
2230
- #ifdef __SSE2__
2231
- for (; i < cnt; i++) {
2232
- #else
2233
- for (; i < n; i++) {
2234
- check_intdivzero(((dtype*)p2)[i]);
2235
- #endif
2236
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2237
- }
2238
- } else {
2239
- #ifdef __SSE2__
2240
- for (; i < cnt; i++) {
2241
- #else
2242
- for (; i < n; i++) {
2243
- check_intdivzero(((dtype*)p2)[i]);
2244
- #endif
2245
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
1768
+ if (idx1) {
1769
+ if (idx2) {
1770
+ for (i = 0; i < n; i++) {
1771
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1772
+ x = m_reciprocal(x);
1773
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1774
+ }
1775
+ } else {
1776
+ for (i = 0; i < n; i++) {
1777
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1778
+ x = m_reciprocal(x);
1779
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1780
+ }
1781
+ }
1782
+ } else {
1783
+ if (idx2) {
1784
+ for (i = 0; i < n; i++) {
1785
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1786
+ x = m_reciprocal(x);
1787
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1788
+ }
1789
+ } else {
1790
+ //
1791
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1792
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
1793
+ for (i = 0; i < n; i++) {
1794
+ ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2246
1795
  }
1796
+ return;
2247
1797
  }
2248
-
2249
- #ifdef __SSE2__
2250
- // Get the count of SIMD computation loops.
2251
- cnt_simd_loop = (n - i) % num_pack;
2252
-
2253
- // SIMD computation.
2254
- if (p1 == p3) { // inplace case
2255
- for (; i < n - cnt_simd_loop; i += num_pack) {
2256
- a = _mm_load_pd(&((dtype*)p1)[i]);
2257
- b = _mm_load_pd(&((dtype*)p2)[i]);
2258
- a = _mm_mul_pd(a, b);
2259
- _mm_store_pd(&((dtype*)p1)[i], a);
2260
- }
2261
- } else {
2262
- for (; i < n - cnt_simd_loop; i += num_pack) {
2263
- a = _mm_load_pd(&((dtype*)p1)[i]);
2264
- b = _mm_load_pd(&((dtype*)p2)[i]);
2265
- a = _mm_mul_pd(a, b);
2266
- _mm_stream_pd(&((dtype*)p3)[i], a);
1798
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1799
+ //
1800
+ for (i = 0; i < n; i++) {
1801
+ *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
1802
+ p1 += s1;
1803
+ p2 += s2;
2267
1804
  }
1805
+ return;
1806
+ //
2268
1807
  }
2269
1808
  }
2270
-
2271
- // Compute the remainder of the SIMD operation.
2272
- if (cnt_simd_loop != 0) {
2273
- if (p1 == p3) { // inplace case
2274
- for (; i < n; i++) {
2275
- check_intdivzero(((dtype*)p2)[i]);
2276
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2277
- }
2278
- } else {
2279
- for (; i < n; i++) {
2280
- check_intdivzero(((dtype*)p2)[i]);
2281
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2282
- }
2283
- }
1809
+ for (i = 0; i < n; i++) {
1810
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1811
+ x = m_reciprocal(x);
1812
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2284
1813
  }
2285
- #endif
2286
- return;
2287
- }
2288
-
2289
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2290
- is_aligned_step(s3, sizeof(dtype))) {
2291
1814
  //
1815
+ }
1816
+ }
1817
+ }
2292
1818
 
2293
- if (s2 == 0) { // Broadcasting from scalar value.
2294
- check_intdivzero(*(dtype*)p2);
2295
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2296
- #ifdef __SSE2__
2297
- // Broadcast a scalar value and use it for SIMD computation.
2298
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2299
-
2300
- // Check number of elements. & Check same alignment.
2301
- if ((n >= num_pack) &&
2302
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2303
- // Calculate up to the position just before the start of SIMD computation.
2304
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2305
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2306
- );
2307
- #endif
2308
- if (p1 == p3) { // inplace case
2309
- #ifdef __SSE2__
2310
- for (; i < cnt; i++) {
2311
- #else
2312
- for (; i < n; i++) {
2313
- #endif
2314
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2315
- }
2316
- } else {
2317
- #ifdef __SSE2__
2318
- for (; i < cnt; i++) {
2319
- #else
2320
- for (; i < n; i++) {
2321
- #endif
2322
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2323
- }
2324
- }
2325
-
2326
- #ifdef __SSE2__
2327
- // Get the count of SIMD computation loops.
2328
- cnt_simd_loop = (n - i) % num_pack;
2329
-
2330
- // SIMD computation.
2331
- if (p1 == p3) { // inplace case
2332
- for (; i < n - cnt_simd_loop; i += num_pack) {
2333
- a = _mm_load_pd(&((dtype*)p1)[i]);
2334
- a = _mm_mul_pd(a, b);
2335
- _mm_store_pd(&((dtype*)p1)[i], a);
2336
- }
2337
- } else {
2338
- for (; i < n - cnt_simd_loop; i += num_pack) {
2339
- a = _mm_load_pd(&((dtype*)p1)[i]);
2340
- a = _mm_mul_pd(a, b);
2341
- _mm_stream_pd(&((dtype*)p3)[i], a);
2342
- }
2343
- }
2344
- }
2345
-
2346
- // Compute the remainder of the SIMD operation.
2347
- if (cnt_simd_loop != 0) {
2348
- if (p1 == p3) { // inplace case
2349
- for (; i < n; i++) {
2350
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2351
- }
2352
- } else {
2353
- for (; i < n; i++) {
2354
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2355
- }
2356
- }
2357
- }
2358
- #endif
2359
- } else {
2360
- for (i = 0; i < n; i++) {
2361
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2362
- p1 += s1;
2363
- p3 += s3;
2364
- }
2365
- }
2366
- } else {
2367
- if (p1 == p3) { // inplace case
2368
- for (i = 0; i < n; i++) {
2369
- check_intdivzero(*(dtype*)p2);
2370
- *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
2371
- p1 += s1;
2372
- p2 += s2;
2373
- }
2374
- } else {
2375
- for (i = 0; i < n; i++) {
2376
- check_intdivzero(*(dtype*)p2);
2377
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2378
- p1 += s1;
2379
- p2 += s2;
2380
- p3 += s3;
2381
- }
2382
- }
2383
- }
2384
-
2385
- return;
2386
- //
2387
- }
2388
- }
2389
- for (i = 0; i < n; i++) {
2390
- dtype x, y, z;
2391
- GET_DATA_STRIDE(p1, s1, dtype, x);
2392
- GET_DATA_STRIDE(p2, s2, dtype, y);
2393
- check_intdivzero(y);
2394
- z = m_mul(x, y);
2395
- SET_DATA_STRIDE(p3, s3, dtype, z);
2396
- }
2397
- //
2398
- }
2399
- #undef check_intdivzero
2400
-
2401
- static VALUE dfloat_mul_self(VALUE self, VALUE other) {
2402
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1819
+ static VALUE dfloat_reciprocal(VALUE self) {
1820
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2403
1821
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2404
- ndfunc_t ndf = { iter_dfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
2405
-
2406
- return na_ndloop(&ndf, 2, self, other);
2407
- }
2408
-
2409
- static VALUE dfloat_mul(VALUE self, VALUE other) {
2410
-
2411
- VALUE klass, v;
1822
+ ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
2412
1823
 
2413
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2414
- if (klass == cT) {
2415
- return dfloat_mul_self(self, other);
2416
- } else {
2417
- v = rb_funcall(klass, id_cast, 1, self);
2418
- return rb_funcall(v, '*', 1, other);
2419
- }
1824
+ return na_ndloop(&ndf, 1, self);
2420
1825
  }
2421
1826
 
2422
- #define check_intdivzero(y) \
2423
- {}
2424
-
2425
- static void iter_dfloat_div(na_loop_t* const lp) {
2426
- size_t i = 0;
2427
- size_t n;
2428
- char *p1, *p2, *p3;
2429
- ssize_t s1, s2, s3;
2430
-
2431
- #ifdef __SSE2__
2432
- size_t cnt;
2433
- size_t cnt_simd_loop = -1;
2434
-
2435
- __m128d a;
2436
- __m128d b;
1827
+ static void iter_dfloat_sign(na_loop_t* const lp) {
1828
+ size_t i, n;
1829
+ char *p1, *p2;
1830
+ ssize_t s1, s2;
1831
+ size_t *idx1, *idx2;
1832
+ dtype x;
2437
1833
 
2438
- size_t num_pack; // Number of elements packed for SIMD.
2439
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2440
- #endif
2441
1834
  INIT_COUNTER(lp, n);
2442
- INIT_PTR(lp, 0, p1, s1);
2443
- INIT_PTR(lp, 1, p2, s2);
2444
- INIT_PTR(lp, 2, p3, s3);
2445
-
2446
- //
2447
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2448
- is_aligned(p3, sizeof(dtype))) {
2449
-
2450
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2451
- #ifdef __SSE2__
2452
- // Check number of elements. & Check same alignment.
2453
- if ((n >= num_pack) &&
2454
- is_same_aligned3(
2455
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2456
- )) {
2457
- // Calculate up to the position just before the start of SIMD computation.
2458
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2459
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2460
- );
2461
- #endif
2462
- if (p1 == p3) { // inplace case
2463
- #ifdef __SSE2__
2464
- for (; i < cnt; i++) {
2465
- #else
2466
- for (; i < n; i++) {
2467
- #endif
2468
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2469
- }
2470
- } else {
2471
- #ifdef __SSE2__
2472
- for (; i < cnt; i++) {
2473
- #else
2474
- for (; i < n; i++) {
2475
- #endif
2476
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2477
- }
2478
- }
2479
-
2480
- #ifdef __SSE2__
2481
- // Get the count of SIMD computation loops.
2482
- cnt_simd_loop = (n - i) % num_pack;
1835
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1836
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2483
1837
 
2484
- // SIMD computation.
2485
- if (p1 == p3) { // inplace case
2486
- for (; i < n - cnt_simd_loop; i += num_pack) {
2487
- a = _mm_load_pd(&((dtype*)p1)[i]);
2488
- b = _mm_load_pd(&((dtype*)p2)[i]);
2489
- a = _mm_div_pd(a, b);
2490
- _mm_store_pd(&((dtype*)p1)[i], a);
2491
- }
2492
- } else {
2493
- for (; i < n - cnt_simd_loop; i += num_pack) {
2494
- a = _mm_load_pd(&((dtype*)p1)[i]);
2495
- b = _mm_load_pd(&((dtype*)p2)[i]);
2496
- a = _mm_div_pd(a, b);
2497
- _mm_stream_pd(&((dtype*)p3)[i], a);
2498
- }
2499
- }
1838
+ if (idx1) {
1839
+ if (idx2) {
1840
+ for (i = 0; i < n; i++) {
1841
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1842
+ x = m_sign(x);
1843
+ SET_DATA_INDEX(p2, idx2, dtype, x);
2500
1844
  }
2501
-
2502
- // Compute the remainder of the SIMD operation.
2503
- if (cnt_simd_loop != 0) {
2504
- if (p1 == p3) { // inplace case
2505
- for (; i < n; i++) {
2506
- check_intdivzero(((dtype*)p2)[i]);
2507
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2508
- }
2509
- } else {
2510
- for (; i < n; i++) {
2511
- check_intdivzero(((dtype*)p2)[i]);
2512
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2513
- }
2514
- }
1845
+ } else {
1846
+ for (i = 0; i < n; i++) {
1847
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1848
+ x = m_sign(x);
1849
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2515
1850
  }
2516
- #endif
2517
- return;
2518
1851
  }
2519
-
2520
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2521
- is_aligned_step(s3, sizeof(dtype))) {
1852
+ } else {
1853
+ if (idx2) {
1854
+ for (i = 0; i < n; i++) {
1855
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1856
+ x = m_sign(x);
1857
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1858
+ }
1859
+ } else {
2522
1860
  //
2523
-
2524
- if (s2 == 0) { // Broadcasting from scalar value.
2525
- check_intdivzero(*(dtype*)p2);
2526
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2527
- #ifdef __SSE2__
2528
- // Broadcast a scalar value and use it for SIMD computation.
2529
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2530
-
2531
- // Check number of elements. & Check same alignment.
2532
- if ((n >= num_pack) &&
2533
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2534
- // Calculate up to the position just before the start of SIMD computation.
2535
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2536
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2537
- );
2538
- #endif
2539
- if (p1 == p3) { // inplace case
2540
- #ifdef __SSE2__
2541
- for (; i < cnt; i++) {
2542
- #else
2543
- for (; i < n; i++) {
2544
- check_intdivzero(((dtype*)p2)[i]);
2545
- #endif
2546
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2547
- }
2548
- } else {
2549
- #ifdef __SSE2__
2550
- for (; i < cnt; i++) {
2551
- #else
2552
- for (; i < n; i++) {
2553
- check_intdivzero(((dtype*)p2)[i]);
2554
- #endif
2555
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2556
- }
2557
- }
2558
-
2559
- #ifdef __SSE2__
2560
- // Get the count of SIMD computation loops.
2561
- cnt_simd_loop = (n - i) % num_pack;
2562
-
2563
- // SIMD computation.
2564
- if (p1 == p3) { // inplace case
2565
- for (; i < n - cnt_simd_loop; i += num_pack) {
2566
- a = _mm_load_pd(&((dtype*)p1)[i]);
2567
- a = _mm_div_pd(a, b);
2568
- _mm_store_pd(&((dtype*)p1)[i], a);
2569
- }
2570
- } else {
2571
- for (; i < n - cnt_simd_loop; i += num_pack) {
2572
- a = _mm_load_pd(&((dtype*)p1)[i]);
2573
- a = _mm_div_pd(a, b);
2574
- _mm_stream_pd(&((dtype*)p3)[i], a);
2575
- }
2576
- }
2577
- }
2578
-
2579
- // Compute the remainder of the SIMD operation.
2580
- if (cnt_simd_loop != 0) {
2581
- if (p1 == p3) { // inplace case
2582
- for (; i < n; i++) {
2583
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2584
- }
2585
- } else {
2586
- for (; i < n; i++) {
2587
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2588
- }
2589
- }
2590
- }
2591
- #endif
2592
- } else {
1861
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1862
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2593
1863
  for (i = 0; i < n; i++) {
2594
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2595
- p1 += s1;
2596
- p3 += s3;
1864
+ ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
2597
1865
  }
1866
+ return;
2598
1867
  }
2599
- } else {
2600
- if (p1 == p3) { // inplace case
2601
- for (i = 0; i < n; i++) {
2602
- check_intdivzero(*(dtype*)p2);
2603
- *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
2604
- p1 += s1;
2605
- p2 += s2;
2606
- }
2607
- } else {
1868
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1869
+ //
2608
1870
  for (i = 0; i < n; i++) {
2609
- check_intdivzero(*(dtype*)p2);
2610
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
1871
+ *(dtype*)p2 = m_sign(*(dtype*)p1);
2611
1872
  p1 += s1;
2612
1873
  p2 += s2;
2613
- p3 += s3;
2614
1874
  }
2615
- }
2616
- }
2617
-
2618
- return;
2619
- //
2620
- }
2621
- }
2622
- for (i = 0; i < n; i++) {
2623
- dtype x, y, z;
2624
- GET_DATA_STRIDE(p1, s1, dtype, x);
2625
- GET_DATA_STRIDE(p2, s2, dtype, y);
2626
- check_intdivzero(y);
2627
- z = m_div(x, y);
2628
- SET_DATA_STRIDE(p3, s3, dtype, z);
2629
- }
2630
- //
2631
- }
2632
- #undef check_intdivzero
2633
-
2634
- static VALUE dfloat_div_self(VALUE self, VALUE other) {
2635
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2636
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2637
- ndfunc_t ndf = { iter_dfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
2638
-
2639
- return na_ndloop(&ndf, 2, self, other);
2640
- }
2641
-
2642
- static VALUE dfloat_div(VALUE self, VALUE other) {
2643
-
2644
- VALUE klass, v;
2645
-
2646
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2647
- if (klass == cT) {
2648
- return dfloat_div_self(self, other);
2649
- } else {
2650
- v = rb_funcall(klass, id_cast, 1, self);
2651
- return rb_funcall(v, '/', 1, other);
2652
- }
2653
- }
2654
-
2655
- #define check_intdivzero(y) \
2656
- {}
2657
-
2658
- static void iter_dfloat_mod(na_loop_t* const lp) {
2659
- size_t i = 0;
2660
- size_t n;
2661
- char *p1, *p2, *p3;
2662
- ssize_t s1, s2, s3;
2663
-
2664
- INIT_COUNTER(lp, n);
2665
- INIT_PTR(lp, 0, p1, s1);
2666
- INIT_PTR(lp, 1, p2, s2);
2667
- INIT_PTR(lp, 2, p3, s3);
2668
-
2669
- //
2670
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2671
- is_aligned(p3, sizeof(dtype))) {
2672
-
2673
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2674
- if (p1 == p3) { // inplace case
2675
- for (; i < n; i++) {
2676
- check_intdivzero(((dtype*)p2)[i]);
2677
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2678
- }
2679
- } else {
2680
- for (; i < n; i++) {
2681
- check_intdivzero(((dtype*)p2)[i]);
2682
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2683
- }
2684
- }
2685
- return;
2686
- }
2687
-
2688
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2689
- is_aligned_step(s3, sizeof(dtype))) {
2690
- //
2691
-
2692
- if (s2 == 0) { // Broadcasting from scalar value.
2693
- check_intdivzero(*(dtype*)p2);
2694
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2695
- if (p1 == p3) { // inplace case
2696
- for (; i < n; i++) {
2697
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2698
- }
2699
- } else {
2700
- for (; i < n; i++) {
2701
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2702
- }
2703
- }
2704
- } else {
2705
- for (i = 0; i < n; i++) {
2706
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2707
- p1 += s1;
2708
- p3 += s3;
2709
- }
2710
- }
2711
- } else {
2712
- if (p1 == p3) { // inplace case
2713
- for (i = 0; i < n; i++) {
2714
- check_intdivzero(*(dtype*)p2);
2715
- *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
2716
- p1 += s1;
2717
- p2 += s2;
2718
- }
2719
- } else {
2720
- for (i = 0; i < n; i++) {
2721
- check_intdivzero(*(dtype*)p2);
2722
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2723
- p1 += s1;
2724
- p2 += s2;
2725
- p3 += s3;
2726
- }
2727
- }
2728
- }
2729
-
2730
- return;
2731
- //
2732
- }
2733
- }
2734
- for (i = 0; i < n; i++) {
2735
- dtype x, y, z;
2736
- GET_DATA_STRIDE(p1, s1, dtype, x);
2737
- GET_DATA_STRIDE(p2, s2, dtype, y);
2738
- check_intdivzero(y);
2739
- z = m_mod(x, y);
2740
- SET_DATA_STRIDE(p3, s3, dtype, z);
2741
- }
2742
- //
2743
- }
2744
- #undef check_intdivzero
2745
-
2746
- static VALUE dfloat_mod_self(VALUE self, VALUE other) {
2747
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2748
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2749
- ndfunc_t ndf = { iter_dfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
2750
-
2751
- return na_ndloop(&ndf, 2, self, other);
2752
- }
2753
-
2754
- static VALUE dfloat_mod(VALUE self, VALUE other) {
2755
-
2756
- VALUE klass, v;
2757
-
2758
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2759
- if (klass == cT) {
2760
- return dfloat_mod_self(self, other);
2761
- } else {
2762
- v = rb_funcall(klass, id_cast, 1, self);
2763
- return rb_funcall(v, '%', 1, other);
2764
- }
2765
- }
2766
-
2767
- static void iter_dfloat_divmod(na_loop_t* const lp) {
2768
- size_t i, n;
2769
- char *p1, *p2, *p3, *p4;
2770
- ssize_t s1, s2, s3, s4;
2771
- dtype x, y, a, b;
2772
- INIT_COUNTER(lp, n);
2773
- INIT_PTR(lp, 0, p1, s1);
2774
- INIT_PTR(lp, 1, p2, s2);
2775
- INIT_PTR(lp, 2, p3, s3);
2776
- INIT_PTR(lp, 3, p4, s4);
2777
- for (i = n; i--;) {
2778
- GET_DATA_STRIDE(p1, s1, dtype, x);
2779
- GET_DATA_STRIDE(p2, s2, dtype, y);
2780
- m_divmod(x, y, a, b);
2781
- SET_DATA_STRIDE(p3, s3, dtype, a);
2782
- SET_DATA_STRIDE(p4, s4, dtype, b);
2783
- }
2784
- }
2785
-
2786
- static VALUE dfloat_divmod_self(VALUE self, VALUE other) {
2787
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2788
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2789
- ndfunc_t ndf = { iter_dfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
2790
-
2791
- return na_ndloop(&ndf, 2, self, other);
2792
- }
2793
-
2794
- static VALUE dfloat_divmod(VALUE self, VALUE other) {
2795
-
2796
- VALUE klass, v;
2797
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2798
- if (klass == cT) {
2799
- return dfloat_divmod_self(self, other);
2800
- } else {
2801
- v = rb_funcall(klass, id_cast, 1, self);
2802
- return rb_funcall(v, id_divmod, 1, other);
2803
- }
2804
- }
2805
-
2806
- static void iter_dfloat_pow(na_loop_t* const lp) {
2807
- size_t i;
2808
- char *p1, *p2, *p3;
2809
- ssize_t s1, s2, s3;
2810
- dtype x, y;
2811
- INIT_COUNTER(lp, i);
2812
- INIT_PTR(lp, 0, p1, s1);
2813
- INIT_PTR(lp, 1, p2, s2);
2814
- INIT_PTR(lp, 2, p3, s3);
2815
- for (; i--;) {
2816
- GET_DATA_STRIDE(p1, s1, dtype, x);
2817
- GET_DATA_STRIDE(p2, s2, dtype, y);
2818
- x = m_pow(x, y);
2819
- SET_DATA_STRIDE(p3, s3, dtype, x);
2820
- }
2821
- }
2822
-
2823
- static void iter_dfloat_pow_int32(na_loop_t* const lp) {
2824
- size_t i;
2825
- char *p1, *p2, *p3;
2826
- ssize_t s1, s2, s3;
2827
- dtype x;
2828
- int32_t y;
2829
- INIT_COUNTER(lp, i);
2830
- INIT_PTR(lp, 0, p1, s1);
2831
- INIT_PTR(lp, 1, p2, s2);
2832
- INIT_PTR(lp, 2, p3, s3);
2833
- for (; i--;) {
2834
- GET_DATA_STRIDE(p1, s1, dtype, x);
2835
- GET_DATA_STRIDE(p2, s2, int32_t, y);
2836
- x = m_pow_int(x, y);
2837
- SET_DATA_STRIDE(p3, s3, dtype, x);
2838
- }
2839
- }
2840
-
2841
- static VALUE dfloat_pow_self(VALUE self, VALUE other) {
2842
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2843
- ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
2844
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2845
- ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
2846
- ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
2847
-
2848
- // fixme : use na.integer?
2849
- if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
2850
- return na_ndloop(&ndf_i, 2, self, other);
2851
- } else {
2852
- return na_ndloop(&ndf, 2, self, other);
2853
- }
2854
- }
2855
-
2856
- static VALUE dfloat_pow(VALUE self, VALUE other) {
2857
-
2858
- VALUE klass, v;
2859
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2860
- if (klass == cT) {
2861
- return dfloat_pow_self(self, other);
2862
- } else {
2863
- v = rb_funcall(klass, id_cast, 1, self);
2864
- return rb_funcall(v, id_pow, 1, other);
2865
- }
2866
- }
2867
-
2868
- static void iter_dfloat_minus(na_loop_t* const lp) {
2869
- size_t i, n;
2870
- char *p1, *p2;
2871
- ssize_t s1, s2;
2872
- size_t *idx1, *idx2;
2873
- dtype x;
2874
-
2875
- INIT_COUNTER(lp, n);
2876
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2877
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2878
-
2879
- if (idx1) {
2880
- if (idx2) {
2881
- for (i = 0; i < n; i++) {
2882
- GET_DATA_INDEX(p1, idx1, dtype, x);
2883
- x = m_minus(x);
2884
- SET_DATA_INDEX(p2, idx2, dtype, x);
2885
- }
2886
- } else {
2887
- for (i = 0; i < n; i++) {
2888
- GET_DATA_INDEX(p1, idx1, dtype, x);
2889
- x = m_minus(x);
2890
- SET_DATA_STRIDE(p2, s2, dtype, x);
2891
- }
2892
- }
2893
- } else {
2894
- if (idx2) {
2895
- for (i = 0; i < n; i++) {
2896
- GET_DATA_STRIDE(p1, s1, dtype, x);
2897
- x = m_minus(x);
2898
- SET_DATA_INDEX(p2, idx2, dtype, x);
2899
- }
2900
- } else {
2901
- //
2902
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2903
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2904
- for (i = 0; i < n; i++) {
2905
- ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2906
- }
2907
- return;
2908
- }
2909
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2910
- //
2911
- for (i = 0; i < n; i++) {
2912
- *(dtype*)p2 = m_minus(*(dtype*)p1);
2913
- p1 += s1;
2914
- p2 += s2;
2915
- }
2916
- return;
2917
- //
2918
- }
2919
- }
2920
- for (i = 0; i < n; i++) {
2921
- GET_DATA_STRIDE(p1, s1, dtype, x);
2922
- x = m_minus(x);
2923
- SET_DATA_STRIDE(p2, s2, dtype, x);
2924
- }
2925
- //
2926
- }
2927
- }
2928
- }
2929
-
2930
- static VALUE dfloat_minus(VALUE self) {
2931
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2932
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2933
- ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2934
-
2935
- return na_ndloop(&ndf, 1, self);
2936
- }
2937
-
2938
- static void iter_dfloat_reciprocal(na_loop_t* const lp) {
2939
- size_t i, n;
2940
- char *p1, *p2;
2941
- ssize_t s1, s2;
2942
- size_t *idx1, *idx2;
2943
- dtype x;
2944
-
2945
- INIT_COUNTER(lp, n);
2946
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2947
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2948
-
2949
- if (idx1) {
2950
- if (idx2) {
2951
- for (i = 0; i < n; i++) {
2952
- GET_DATA_INDEX(p1, idx1, dtype, x);
2953
- x = m_reciprocal(x);
2954
- SET_DATA_INDEX(p2, idx2, dtype, x);
2955
- }
2956
- } else {
2957
- for (i = 0; i < n; i++) {
2958
- GET_DATA_INDEX(p1, idx1, dtype, x);
2959
- x = m_reciprocal(x);
2960
- SET_DATA_STRIDE(p2, s2, dtype, x);
2961
- }
2962
- }
2963
- } else {
2964
- if (idx2) {
2965
- for (i = 0; i < n; i++) {
2966
- GET_DATA_STRIDE(p1, s1, dtype, x);
2967
- x = m_reciprocal(x);
2968
- SET_DATA_INDEX(p2, idx2, dtype, x);
2969
- }
2970
- } else {
2971
- //
2972
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2973
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2974
- for (i = 0; i < n; i++) {
2975
- ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2976
- }
2977
- return;
2978
- }
2979
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2980
- //
2981
- for (i = 0; i < n; i++) {
2982
- *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
2983
- p1 += s1;
2984
- p2 += s2;
2985
- }
2986
- return;
2987
- //
2988
- }
2989
- }
2990
- for (i = 0; i < n; i++) {
2991
- GET_DATA_STRIDE(p1, s1, dtype, x);
2992
- x = m_reciprocal(x);
2993
- SET_DATA_STRIDE(p2, s2, dtype, x);
2994
- }
2995
- //
2996
- }
2997
- }
2998
- }
2999
-
3000
- static VALUE dfloat_reciprocal(VALUE self) {
3001
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3002
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3003
- ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
3004
-
3005
- return na_ndloop(&ndf, 1, self);
3006
- }
3007
-
3008
- static void iter_dfloat_sign(na_loop_t* const lp) {
3009
- size_t i, n;
3010
- char *p1, *p2;
3011
- ssize_t s1, s2;
3012
- size_t *idx1, *idx2;
3013
- dtype x;
3014
-
3015
- INIT_COUNTER(lp, n);
3016
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3017
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3018
-
3019
- if (idx1) {
3020
- if (idx2) {
3021
- for (i = 0; i < n; i++) {
3022
- GET_DATA_INDEX(p1, idx1, dtype, x);
3023
- x = m_sign(x);
3024
- SET_DATA_INDEX(p2, idx2, dtype, x);
3025
- }
3026
- } else {
3027
- for (i = 0; i < n; i++) {
3028
- GET_DATA_INDEX(p1, idx1, dtype, x);
3029
- x = m_sign(x);
3030
- SET_DATA_STRIDE(p2, s2, dtype, x);
3031
- }
3032
- }
3033
- } else {
3034
- if (idx2) {
3035
- for (i = 0; i < n; i++) {
3036
- GET_DATA_STRIDE(p1, s1, dtype, x);
3037
- x = m_sign(x);
3038
- SET_DATA_INDEX(p2, idx2, dtype, x);
3039
- }
3040
- } else {
3041
- //
3042
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3043
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3044
- for (i = 0; i < n; i++) {
3045
- ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
3046
- }
3047
- return;
3048
- }
3049
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3050
- //
3051
- for (i = 0; i < n; i++) {
3052
- *(dtype*)p2 = m_sign(*(dtype*)p1);
3053
- p1 += s1;
3054
- p2 += s2;
3055
- }
3056
- return;
3057
- //
3058
- }
3059
- }
3060
- for (i = 0; i < n; i++) {
3061
- GET_DATA_STRIDE(p1, s1, dtype, x);
3062
- x = m_sign(x);
3063
- SET_DATA_STRIDE(p2, s2, dtype, x);
3064
- }
3065
- //
3066
- }
3067
- }
3068
- }
3069
-
3070
- static VALUE dfloat_sign(VALUE self) {
3071
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3072
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3073
- ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3074
-
3075
- return na_ndloop(&ndf, 1, self);
3076
- }
3077
-
3078
- static void iter_dfloat_square(na_loop_t* const lp) {
3079
- size_t i, n;
3080
- char *p1, *p2;
3081
- ssize_t s1, s2;
3082
- size_t *idx1, *idx2;
3083
- dtype x;
3084
-
3085
- INIT_COUNTER(lp, n);
3086
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3087
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3088
-
3089
- if (idx1) {
3090
- if (idx2) {
3091
- for (i = 0; i < n; i++) {
3092
- GET_DATA_INDEX(p1, idx1, dtype, x);
3093
- x = m_square(x);
3094
- SET_DATA_INDEX(p2, idx2, dtype, x);
3095
- }
3096
- } else {
3097
- for (i = 0; i < n; i++) {
3098
- GET_DATA_INDEX(p1, idx1, dtype, x);
3099
- x = m_square(x);
3100
- SET_DATA_STRIDE(p2, s2, dtype, x);
3101
- }
3102
- }
3103
- } else {
3104
- if (idx2) {
3105
- for (i = 0; i < n; i++) {
3106
- GET_DATA_STRIDE(p1, s1, dtype, x);
3107
- x = m_square(x);
3108
- SET_DATA_INDEX(p2, idx2, dtype, x);
3109
- }
3110
- } else {
3111
- //
3112
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3113
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3114
- for (i = 0; i < n; i++) {
3115
- ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
3116
- }
3117
- return;
3118
- }
3119
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3120
- //
3121
- for (i = 0; i < n; i++) {
3122
- *(dtype*)p2 = m_square(*(dtype*)p1);
3123
- p1 += s1;
3124
- p2 += s2;
3125
- }
3126
- return;
3127
- //
3128
- }
3129
- }
3130
- for (i = 0; i < n; i++) {
3131
- GET_DATA_STRIDE(p1, s1, dtype, x);
3132
- x = m_square(x);
3133
- SET_DATA_STRIDE(p2, s2, dtype, x);
3134
- }
3135
- //
3136
- }
3137
- }
3138
- }
3139
-
3140
- static VALUE dfloat_square(VALUE self) {
3141
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3142
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3143
- ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
3144
-
3145
- return na_ndloop(&ndf, 1, self);
3146
- }
3147
-
3148
- static void iter_dfloat_eq(na_loop_t* const lp) {
3149
- size_t i;
3150
- char *p1, *p2;
3151
- BIT_DIGIT* a3;
3152
- size_t p3;
3153
- ssize_t s1, s2, s3;
3154
- dtype x, y;
3155
- BIT_DIGIT b;
3156
- INIT_COUNTER(lp, i);
3157
- INIT_PTR(lp, 0, p1, s1);
3158
- INIT_PTR(lp, 1, p2, s2);
3159
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3160
- for (; i--;) {
3161
- GET_DATA_STRIDE(p1, s1, dtype, x);
3162
- GET_DATA_STRIDE(p2, s2, dtype, y);
3163
- b = (m_eq(x, y)) ? 1 : 0;
3164
- STORE_BIT(a3, p3, b);
3165
- p3 += s3;
3166
- }
3167
- }
3168
-
3169
- static VALUE dfloat_eq_self(VALUE self, VALUE other) {
3170
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3171
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3172
- ndfunc_t ndf = { iter_dfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
3173
-
3174
- return na_ndloop(&ndf, 2, self, other);
3175
- }
3176
-
3177
- static VALUE dfloat_eq(VALUE self, VALUE other) {
3178
-
3179
- VALUE klass, v;
3180
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3181
- if (klass == cT) {
3182
- return dfloat_eq_self(self, other);
3183
- } else {
3184
- v = rb_funcall(klass, id_cast, 1, self);
3185
- return rb_funcall(v, id_eq, 1, other);
3186
- }
3187
- }
3188
-
3189
- static void iter_dfloat_ne(na_loop_t* const lp) {
3190
- size_t i;
3191
- char *p1, *p2;
3192
- BIT_DIGIT* a3;
3193
- size_t p3;
3194
- ssize_t s1, s2, s3;
3195
- dtype x, y;
3196
- BIT_DIGIT b;
3197
- INIT_COUNTER(lp, i);
3198
- INIT_PTR(lp, 0, p1, s1);
3199
- INIT_PTR(lp, 1, p2, s2);
3200
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3201
- for (; i--;) {
3202
- GET_DATA_STRIDE(p1, s1, dtype, x);
3203
- GET_DATA_STRIDE(p2, s2, dtype, y);
3204
- b = (m_ne(x, y)) ? 1 : 0;
3205
- STORE_BIT(a3, p3, b);
3206
- p3 += s3;
3207
- }
3208
- }
3209
-
3210
- static VALUE dfloat_ne_self(VALUE self, VALUE other) {
3211
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3212
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3213
- ndfunc_t ndf = { iter_dfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
3214
-
3215
- return na_ndloop(&ndf, 2, self, other);
3216
- }
3217
-
3218
- static VALUE dfloat_ne(VALUE self, VALUE other) {
3219
-
3220
- VALUE klass, v;
3221
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3222
- if (klass == cT) {
3223
- return dfloat_ne_self(self, other);
3224
- } else {
3225
- v = rb_funcall(klass, id_cast, 1, self);
3226
- return rb_funcall(v, id_ne, 1, other);
3227
- }
3228
- }
3229
-
3230
- static void iter_dfloat_nearly_eq(na_loop_t* const lp) {
3231
- size_t i;
3232
- char *p1, *p2;
3233
- BIT_DIGIT* a3;
3234
- size_t p3;
3235
- ssize_t s1, s2, s3;
3236
- dtype x, y;
3237
- BIT_DIGIT b;
3238
- INIT_COUNTER(lp, i);
3239
- INIT_PTR(lp, 0, p1, s1);
3240
- INIT_PTR(lp, 1, p2, s2);
3241
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3242
- for (; i--;) {
3243
- GET_DATA_STRIDE(p1, s1, dtype, x);
3244
- GET_DATA_STRIDE(p2, s2, dtype, y);
3245
- b = (m_nearly_eq(x, y)) ? 1 : 0;
3246
- STORE_BIT(a3, p3, b);
3247
- p3 += s3;
3248
- }
3249
- }
3250
-
3251
- static VALUE dfloat_nearly_eq_self(VALUE self, VALUE other) {
3252
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3253
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3254
- ndfunc_t ndf = { iter_dfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
3255
-
3256
- return na_ndloop(&ndf, 2, self, other);
3257
- }
3258
-
3259
- static VALUE dfloat_nearly_eq(VALUE self, VALUE other) {
3260
-
3261
- VALUE klass, v;
3262
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3263
- if (klass == cT) {
3264
- return dfloat_nearly_eq_self(self, other);
3265
- } else {
3266
- v = rb_funcall(klass, id_cast, 1, self);
3267
- return rb_funcall(v, id_nearly_eq, 1, other);
3268
- }
3269
- }
3270
-
3271
- static void iter_dfloat_floor(na_loop_t* const lp) {
3272
- size_t i, n;
3273
- char *p1, *p2;
3274
- ssize_t s1, s2;
3275
- size_t *idx1, *idx2;
3276
- dtype x;
3277
-
3278
- INIT_COUNTER(lp, n);
3279
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3280
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3281
-
3282
- if (idx1) {
3283
- if (idx2) {
3284
- for (i = 0; i < n; i++) {
3285
- GET_DATA_INDEX(p1, idx1, dtype, x);
3286
- x = m_floor(x);
3287
- SET_DATA_INDEX(p2, idx2, dtype, x);
3288
- }
3289
- } else {
3290
- for (i = 0; i < n; i++) {
3291
- GET_DATA_INDEX(p1, idx1, dtype, x);
3292
- x = m_floor(x);
3293
- SET_DATA_STRIDE(p2, s2, dtype, x);
3294
- }
3295
- }
3296
- } else {
3297
- if (idx2) {
3298
- for (i = 0; i < n; i++) {
3299
- GET_DATA_STRIDE(p1, s1, dtype, x);
3300
- x = m_floor(x);
3301
- SET_DATA_INDEX(p2, idx2, dtype, x);
3302
- }
3303
- } else {
3304
- //
3305
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3306
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3307
- for (i = 0; i < n; i++) {
3308
- ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
3309
- }
3310
- return;
3311
- }
3312
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3313
- //
3314
- for (i = 0; i < n; i++) {
3315
- *(dtype*)p2 = m_floor(*(dtype*)p1);
3316
- p1 += s1;
3317
- p2 += s2;
3318
- }
3319
- return;
3320
- //
3321
- }
3322
- }
3323
- for (i = 0; i < n; i++) {
3324
- GET_DATA_STRIDE(p1, s1, dtype, x);
3325
- x = m_floor(x);
3326
- SET_DATA_STRIDE(p2, s2, dtype, x);
3327
- }
3328
- //
3329
- }
3330
- }
3331
- }
3332
-
3333
- static VALUE dfloat_floor(VALUE self) {
3334
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3335
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3336
- ndfunc_t ndf = { iter_dfloat_floor, FULL_LOOP, 1, 1, ain, aout };
3337
-
3338
- return na_ndloop(&ndf, 1, self);
3339
- }
3340
-
3341
- static void iter_dfloat_round(na_loop_t* const lp) {
3342
- size_t i, n;
3343
- char *p1, *p2;
3344
- ssize_t s1, s2;
3345
- size_t *idx1, *idx2;
3346
- dtype x;
3347
-
3348
- INIT_COUNTER(lp, n);
3349
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3350
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3351
-
3352
- if (idx1) {
3353
- if (idx2) {
3354
- for (i = 0; i < n; i++) {
3355
- GET_DATA_INDEX(p1, idx1, dtype, x);
3356
- x = m_round(x);
3357
- SET_DATA_INDEX(p2, idx2, dtype, x);
3358
- }
3359
- } else {
3360
- for (i = 0; i < n; i++) {
3361
- GET_DATA_INDEX(p1, idx1, dtype, x);
3362
- x = m_round(x);
3363
- SET_DATA_STRIDE(p2, s2, dtype, x);
3364
- }
3365
- }
3366
- } else {
3367
- if (idx2) {
3368
- for (i = 0; i < n; i++) {
3369
- GET_DATA_STRIDE(p1, s1, dtype, x);
3370
- x = m_round(x);
3371
- SET_DATA_INDEX(p2, idx2, dtype, x);
3372
- }
3373
- } else {
3374
- //
3375
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3376
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3377
- for (i = 0; i < n; i++) {
3378
- ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
3379
- }
3380
- return;
3381
- }
3382
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3383
- //
3384
- for (i = 0; i < n; i++) {
3385
- *(dtype*)p2 = m_round(*(dtype*)p1);
3386
- p1 += s1;
3387
- p2 += s2;
3388
- }
3389
- return;
3390
- //
3391
- }
3392
- }
3393
- for (i = 0; i < n; i++) {
3394
- GET_DATA_STRIDE(p1, s1, dtype, x);
3395
- x = m_round(x);
3396
- SET_DATA_STRIDE(p2, s2, dtype, x);
3397
- }
3398
- //
3399
- }
3400
- }
3401
- }
3402
-
3403
- static VALUE dfloat_round(VALUE self) {
3404
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3405
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3406
- ndfunc_t ndf = { iter_dfloat_round, FULL_LOOP, 1, 1, ain, aout };
3407
-
3408
- return na_ndloop(&ndf, 1, self);
3409
- }
3410
-
3411
- static void iter_dfloat_ceil(na_loop_t* const lp) {
3412
- size_t i, n;
3413
- char *p1, *p2;
3414
- ssize_t s1, s2;
3415
- size_t *idx1, *idx2;
3416
- dtype x;
3417
-
3418
- INIT_COUNTER(lp, n);
3419
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3420
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3421
-
3422
- if (idx1) {
3423
- if (idx2) {
3424
- for (i = 0; i < n; i++) {
3425
- GET_DATA_INDEX(p1, idx1, dtype, x);
3426
- x = m_ceil(x);
3427
- SET_DATA_INDEX(p2, idx2, dtype, x);
3428
- }
3429
- } else {
3430
- for (i = 0; i < n; i++) {
3431
- GET_DATA_INDEX(p1, idx1, dtype, x);
3432
- x = m_ceil(x);
3433
- SET_DATA_STRIDE(p2, s2, dtype, x);
3434
- }
3435
- }
3436
- } else {
3437
- if (idx2) {
3438
- for (i = 0; i < n; i++) {
3439
- GET_DATA_STRIDE(p1, s1, dtype, x);
3440
- x = m_ceil(x);
3441
- SET_DATA_INDEX(p2, idx2, dtype, x);
3442
- }
3443
- } else {
3444
- //
3445
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3446
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3447
- for (i = 0; i < n; i++) {
3448
- ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
3449
- }
3450
- return;
3451
- }
3452
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3453
- //
3454
- for (i = 0; i < n; i++) {
3455
- *(dtype*)p2 = m_ceil(*(dtype*)p1);
3456
- p1 += s1;
3457
- p2 += s2;
3458
- }
3459
- return;
3460
- //
3461
- }
3462
- }
3463
- for (i = 0; i < n; i++) {
3464
- GET_DATA_STRIDE(p1, s1, dtype, x);
3465
- x = m_ceil(x);
3466
- SET_DATA_STRIDE(p2, s2, dtype, x);
3467
- }
3468
- //
3469
- }
3470
- }
3471
- }
3472
-
3473
- static VALUE dfloat_ceil(VALUE self) {
3474
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3475
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3476
- ndfunc_t ndf = { iter_dfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
3477
-
3478
- return na_ndloop(&ndf, 1, self);
3479
- }
3480
-
3481
- static void iter_dfloat_trunc(na_loop_t* const lp) {
3482
- size_t i, n;
3483
- char *p1, *p2;
3484
- ssize_t s1, s2;
3485
- size_t *idx1, *idx2;
3486
- dtype x;
3487
-
3488
- INIT_COUNTER(lp, n);
3489
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3490
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3491
-
3492
- if (idx1) {
3493
- if (idx2) {
3494
- for (i = 0; i < n; i++) {
3495
- GET_DATA_INDEX(p1, idx1, dtype, x);
3496
- x = m_trunc(x);
3497
- SET_DATA_INDEX(p2, idx2, dtype, x);
3498
- }
3499
- } else {
3500
- for (i = 0; i < n; i++) {
3501
- GET_DATA_INDEX(p1, idx1, dtype, x);
3502
- x = m_trunc(x);
3503
- SET_DATA_STRIDE(p2, s2, dtype, x);
3504
- }
3505
- }
3506
- } else {
3507
- if (idx2) {
3508
- for (i = 0; i < n; i++) {
3509
- GET_DATA_STRIDE(p1, s1, dtype, x);
3510
- x = m_trunc(x);
3511
- SET_DATA_INDEX(p2, idx2, dtype, x);
3512
- }
3513
- } else {
3514
- //
3515
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3516
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3517
- for (i = 0; i < n; i++) {
3518
- ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
3519
- }
3520
- return;
3521
- }
3522
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3523
- //
3524
- for (i = 0; i < n; i++) {
3525
- *(dtype*)p2 = m_trunc(*(dtype*)p1);
3526
- p1 += s1;
3527
- p2 += s2;
3528
- }
3529
- return;
3530
- //
3531
- }
3532
- }
3533
- for (i = 0; i < n; i++) {
3534
- GET_DATA_STRIDE(p1, s1, dtype, x);
3535
- x = m_trunc(x);
3536
- SET_DATA_STRIDE(p2, s2, dtype, x);
3537
- }
3538
- //
3539
- }
3540
- }
3541
- }
3542
-
3543
- static VALUE dfloat_trunc(VALUE self) {
3544
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3545
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3546
- ndfunc_t ndf = { iter_dfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
3547
-
3548
- return na_ndloop(&ndf, 1, self);
3549
- }
3550
-
3551
- static void iter_dfloat_rint(na_loop_t* const lp) {
3552
- size_t i, n;
3553
- char *p1, *p2;
3554
- ssize_t s1, s2;
3555
- size_t *idx1, *idx2;
3556
- dtype x;
3557
-
3558
- INIT_COUNTER(lp, n);
3559
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3560
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3561
-
3562
- if (idx1) {
3563
- if (idx2) {
3564
- for (i = 0; i < n; i++) {
3565
- GET_DATA_INDEX(p1, idx1, dtype, x);
3566
- x = m_rint(x);
3567
- SET_DATA_INDEX(p2, idx2, dtype, x);
3568
- }
3569
- } else {
3570
- for (i = 0; i < n; i++) {
3571
- GET_DATA_INDEX(p1, idx1, dtype, x);
3572
- x = m_rint(x);
3573
- SET_DATA_STRIDE(p2, s2, dtype, x);
3574
- }
3575
- }
3576
- } else {
3577
- if (idx2) {
3578
- for (i = 0; i < n; i++) {
3579
- GET_DATA_STRIDE(p1, s1, dtype, x);
3580
- x = m_rint(x);
3581
- SET_DATA_INDEX(p2, idx2, dtype, x);
3582
- }
3583
- } else {
3584
- //
3585
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3586
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3587
- for (i = 0; i < n; i++) {
3588
- ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
3589
- }
3590
- return;
3591
- }
3592
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3593
- //
3594
- for (i = 0; i < n; i++) {
3595
- *(dtype*)p2 = m_rint(*(dtype*)p1);
3596
- p1 += s1;
3597
- p2 += s2;
3598
- }
3599
- return;
3600
- //
3601
- }
3602
- }
3603
- for (i = 0; i < n; i++) {
3604
- GET_DATA_STRIDE(p1, s1, dtype, x);
3605
- x = m_rint(x);
3606
- SET_DATA_STRIDE(p2, s2, dtype, x);
3607
- }
3608
- //
3609
- }
3610
- }
3611
- }
3612
-
3613
- static VALUE dfloat_rint(VALUE self) {
3614
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3615
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3616
- ndfunc_t ndf = { iter_dfloat_rint, FULL_LOOP, 1, 1, ain, aout };
3617
-
3618
- return na_ndloop(&ndf, 1, self);
3619
- }
3620
-
3621
- #define check_intdivzero(y) \
3622
- {}
3623
-
3624
- static void iter_dfloat_copysign(na_loop_t* const lp) {
3625
- size_t i = 0;
3626
- size_t n;
3627
- char *p1, *p2, *p3;
3628
- ssize_t s1, s2, s3;
3629
-
3630
- INIT_COUNTER(lp, n);
3631
- INIT_PTR(lp, 0, p1, s1);
3632
- INIT_PTR(lp, 1, p2, s2);
3633
- INIT_PTR(lp, 2, p3, s3);
3634
-
3635
- //
3636
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
3637
- is_aligned(p3, sizeof(dtype))) {
3638
-
3639
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
3640
- if (p1 == p3) { // inplace case
3641
- for (; i < n; i++) {
3642
- check_intdivzero(((dtype*)p2)[i]);
3643
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3644
- }
3645
- } else {
3646
- for (; i < n; i++) {
3647
- check_intdivzero(((dtype*)p2)[i]);
3648
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3649
- }
3650
- }
3651
- return;
3652
- }
3653
-
3654
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
3655
- is_aligned_step(s3, sizeof(dtype))) {
3656
- //
3657
-
3658
- if (s2 == 0) { // Broadcasting from scalar value.
3659
- check_intdivzero(*(dtype*)p2);
3660
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
3661
- if (p1 == p3) { // inplace case
3662
- for (; i < n; i++) {
3663
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3664
- }
3665
- } else {
3666
- for (; i < n; i++) {
3667
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3668
- }
3669
- }
3670
- } else {
3671
- for (i = 0; i < n; i++) {
3672
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3673
- p1 += s1;
3674
- p3 += s3;
3675
- }
3676
- }
3677
- } else {
3678
- if (p1 == p3) { // inplace case
3679
- for (i = 0; i < n; i++) {
3680
- check_intdivzero(*(dtype*)p2);
3681
- *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3682
- p1 += s1;
3683
- p2 += s2;
3684
- }
3685
- } else {
3686
- for (i = 0; i < n; i++) {
3687
- check_intdivzero(*(dtype*)p2);
3688
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3689
- p1 += s1;
3690
- p2 += s2;
3691
- p3 += s3;
3692
- }
3693
- }
3694
- }
3695
-
3696
- return;
3697
- //
3698
- }
3699
- }
3700
- for (i = 0; i < n; i++) {
3701
- dtype x, y, z;
3702
- GET_DATA_STRIDE(p1, s1, dtype, x);
3703
- GET_DATA_STRIDE(p2, s2, dtype, y);
3704
- check_intdivzero(y);
3705
- z = m_copysign(x, y);
3706
- SET_DATA_STRIDE(p3, s3, dtype, z);
3707
- }
3708
- //
3709
- }
3710
- #undef check_intdivzero
3711
-
3712
- static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
3713
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3714
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3715
- ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
3716
-
3717
- return na_ndloop(&ndf, 2, self, other);
3718
- }
3719
-
3720
- static VALUE dfloat_copysign(VALUE self, VALUE other) {
3721
-
3722
- VALUE klass, v;
3723
-
3724
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3725
- if (klass == cT) {
3726
- return dfloat_copysign_self(self, other);
3727
- } else {
3728
- v = rb_funcall(klass, id_cast, 1, self);
3729
- return rb_funcall(v, id_copysign, 1, other);
3730
- }
3731
- }
3732
-
3733
- static void iter_dfloat_signbit(na_loop_t* const lp) {
3734
- size_t i;
3735
- char* p1;
3736
- BIT_DIGIT* a2;
3737
- size_t p2;
3738
- ssize_t s1, s2;
3739
- size_t* idx1;
3740
- dtype x;
3741
- BIT_DIGIT b;
3742
- INIT_COUNTER(lp, i);
3743
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3744
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3745
- if (idx1) {
3746
- for (; i--;) {
3747
- GET_DATA_INDEX(p1, idx1, dtype, x);
3748
- b = (m_signbit(x)) ? 1 : 0;
3749
- STORE_BIT(a2, p2, b);
3750
- p2 += s2;
3751
- }
3752
- } else {
3753
- for (; i--;) {
3754
- GET_DATA_STRIDE(p1, s1, dtype, x);
3755
- b = (m_signbit(x)) ? 1 : 0;
3756
- STORE_BIT(a2, p2, b);
3757
- p2 += s2;
3758
- }
3759
- }
3760
- }
3761
-
3762
- static VALUE dfloat_signbit(VALUE self) {
3763
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3764
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3765
- ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
3766
-
3767
- return na_ndloop(&ndf, 1, self);
3768
- }
3769
-
3770
- static void iter_dfloat_modf(na_loop_t* const lp) {
3771
- size_t i;
3772
- char *p1, *p2, *p3;
3773
- ssize_t s1, s2, s3;
3774
- dtype x, y, z;
3775
- INIT_COUNTER(lp, i);
3776
- INIT_PTR(lp, 0, p1, s1);
3777
- INIT_PTR(lp, 1, p2, s2);
3778
- INIT_PTR(lp, 2, p3, s3);
3779
- for (; i--;) {
3780
- GET_DATA_STRIDE(p1, s1, dtype, x);
3781
- m_modf(x, y, z);
3782
- SET_DATA_STRIDE(p2, s2, dtype, y);
3783
- SET_DATA_STRIDE(p3, s3, dtype, z);
3784
- }
3785
- }
3786
-
3787
- static VALUE dfloat_modf(VALUE self) {
3788
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3789
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
3790
- ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
3791
-
3792
- return na_ndloop(&ndf, 1, self);
3793
- }
3794
-
3795
- static void iter_dfloat_gt(na_loop_t* const lp) {
3796
- size_t i;
3797
- char *p1, *p2;
3798
- BIT_DIGIT* a3;
3799
- size_t p3;
3800
- ssize_t s1, s2, s3;
3801
- dtype x, y;
3802
- BIT_DIGIT b;
3803
- INIT_COUNTER(lp, i);
3804
- INIT_PTR(lp, 0, p1, s1);
3805
- INIT_PTR(lp, 1, p2, s2);
3806
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3807
- for (; i--;) {
3808
- GET_DATA_STRIDE(p1, s1, dtype, x);
3809
- GET_DATA_STRIDE(p2, s2, dtype, y);
3810
- b = (m_gt(x, y)) ? 1 : 0;
3811
- STORE_BIT(a3, p3, b);
3812
- p3 += s3;
3813
- }
3814
- }
3815
-
3816
- static VALUE dfloat_gt_self(VALUE self, VALUE other) {
3817
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3818
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3819
- ndfunc_t ndf = { iter_dfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
3820
-
3821
- return na_ndloop(&ndf, 2, self, other);
3822
- }
3823
-
3824
- static VALUE dfloat_gt(VALUE self, VALUE other) {
3825
-
3826
- VALUE klass, v;
3827
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3828
- if (klass == cT) {
3829
- return dfloat_gt_self(self, other);
3830
- } else {
3831
- v = rb_funcall(klass, id_cast, 1, self);
3832
- return rb_funcall(v, id_gt, 1, other);
3833
- }
3834
- }
3835
-
3836
- static void iter_dfloat_ge(na_loop_t* const lp) {
3837
- size_t i;
3838
- char *p1, *p2;
3839
- BIT_DIGIT* a3;
3840
- size_t p3;
3841
- ssize_t s1, s2, s3;
3842
- dtype x, y;
3843
- BIT_DIGIT b;
3844
- INIT_COUNTER(lp, i);
3845
- INIT_PTR(lp, 0, p1, s1);
3846
- INIT_PTR(lp, 1, p2, s2);
3847
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3848
- for (; i--;) {
3849
- GET_DATA_STRIDE(p1, s1, dtype, x);
3850
- GET_DATA_STRIDE(p2, s2, dtype, y);
3851
- b = (m_ge(x, y)) ? 1 : 0;
3852
- STORE_BIT(a3, p3, b);
3853
- p3 += s3;
3854
- }
3855
- }
3856
-
3857
- static VALUE dfloat_ge_self(VALUE self, VALUE other) {
3858
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3859
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3860
- ndfunc_t ndf = { iter_dfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
3861
-
3862
- return na_ndloop(&ndf, 2, self, other);
3863
- }
3864
-
3865
- static VALUE dfloat_ge(VALUE self, VALUE other) {
3866
-
3867
- VALUE klass, v;
3868
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3869
- if (klass == cT) {
3870
- return dfloat_ge_self(self, other);
3871
- } else {
3872
- v = rb_funcall(klass, id_cast, 1, self);
3873
- return rb_funcall(v, id_ge, 1, other);
3874
- }
3875
- }
3876
-
3877
- static void iter_dfloat_lt(na_loop_t* const lp) {
3878
- size_t i;
3879
- char *p1, *p2;
3880
- BIT_DIGIT* a3;
3881
- size_t p3;
3882
- ssize_t s1, s2, s3;
3883
- dtype x, y;
3884
- BIT_DIGIT b;
3885
- INIT_COUNTER(lp, i);
3886
- INIT_PTR(lp, 0, p1, s1);
3887
- INIT_PTR(lp, 1, p2, s2);
3888
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3889
- for (; i--;) {
3890
- GET_DATA_STRIDE(p1, s1, dtype, x);
3891
- GET_DATA_STRIDE(p2, s2, dtype, y);
3892
- b = (m_lt(x, y)) ? 1 : 0;
3893
- STORE_BIT(a3, p3, b);
3894
- p3 += s3;
3895
- }
3896
- }
3897
-
3898
- static VALUE dfloat_lt_self(VALUE self, VALUE other) {
3899
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3900
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3901
- ndfunc_t ndf = { iter_dfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
3902
-
3903
- return na_ndloop(&ndf, 2, self, other);
3904
- }
3905
-
3906
- static VALUE dfloat_lt(VALUE self, VALUE other) {
3907
-
3908
- VALUE klass, v;
3909
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3910
- if (klass == cT) {
3911
- return dfloat_lt_self(self, other);
3912
- } else {
3913
- v = rb_funcall(klass, id_cast, 1, self);
3914
- return rb_funcall(v, id_lt, 1, other);
3915
- }
3916
- }
3917
-
3918
- static void iter_dfloat_le(na_loop_t* const lp) {
3919
- size_t i;
3920
- char *p1, *p2;
3921
- BIT_DIGIT* a3;
3922
- size_t p3;
3923
- ssize_t s1, s2, s3;
3924
- dtype x, y;
3925
- BIT_DIGIT b;
3926
- INIT_COUNTER(lp, i);
3927
- INIT_PTR(lp, 0, p1, s1);
3928
- INIT_PTR(lp, 1, p2, s2);
3929
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3930
- for (; i--;) {
3931
- GET_DATA_STRIDE(p1, s1, dtype, x);
3932
- GET_DATA_STRIDE(p2, s2, dtype, y);
3933
- b = (m_le(x, y)) ? 1 : 0;
3934
- STORE_BIT(a3, p3, b);
3935
- p3 += s3;
3936
- }
3937
- }
3938
-
3939
- static VALUE dfloat_le_self(VALUE self, VALUE other) {
3940
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3941
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3942
- ndfunc_t ndf = { iter_dfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
3943
-
3944
- return na_ndloop(&ndf, 2, self, other);
3945
- }
3946
-
3947
- static VALUE dfloat_le(VALUE self, VALUE other) {
3948
-
3949
- VALUE klass, v;
3950
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3951
- if (klass == cT) {
3952
- return dfloat_le_self(self, other);
3953
- } else {
3954
- v = rb_funcall(klass, id_cast, 1, self);
3955
- return rb_funcall(v, id_le, 1, other);
3956
- }
3957
- }
3958
-
3959
- static void iter_dfloat_isnan(na_loop_t* const lp) {
3960
- size_t i;
3961
- char* p1;
3962
- BIT_DIGIT* a2;
3963
- size_t p2;
3964
- ssize_t s1, s2;
3965
- size_t* idx1;
3966
- dtype x;
3967
- BIT_DIGIT b;
3968
- INIT_COUNTER(lp, i);
3969
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3970
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3971
- if (idx1) {
3972
- for (; i--;) {
3973
- GET_DATA_INDEX(p1, idx1, dtype, x);
3974
- b = (m_isnan(x)) ? 1 : 0;
3975
- STORE_BIT(a2, p2, b);
3976
- p2 += s2;
3977
- }
3978
- } else {
3979
- for (; i--;) {
3980
- GET_DATA_STRIDE(p1, s1, dtype, x);
3981
- b = (m_isnan(x)) ? 1 : 0;
3982
- STORE_BIT(a2, p2, b);
3983
- p2 += s2;
1875
+ return;
1876
+ //
1877
+ }
1878
+ }
1879
+ for (i = 0; i < n; i++) {
1880
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1881
+ x = m_sign(x);
1882
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1883
+ }
1884
+ //
3984
1885
  }
3985
1886
  }
3986
1887
  }
3987
1888
 
3988
- static VALUE dfloat_isnan(VALUE self) {
1889
+ static VALUE dfloat_sign(VALUE self) {
3989
1890
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3990
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3991
- ndfunc_t ndf = { iter_dfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
1891
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1892
+ ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3992
1893
 
3993
1894
  return na_ndloop(&ndf, 1, self);
3994
1895
  }
3995
1896
 
3996
- static void iter_dfloat_isinf(na_loop_t* const lp) {
3997
- size_t i;
3998
- char* p1;
3999
- BIT_DIGIT* a2;
4000
- size_t p2;
1897
+ static void iter_dfloat_square(na_loop_t* const lp) {
1898
+ size_t i, n;
1899
+ char *p1, *p2;
4001
1900
  ssize_t s1, s2;
4002
- size_t* idx1;
1901
+ size_t *idx1, *idx2;
4003
1902
  dtype x;
4004
- BIT_DIGIT b;
4005
- INIT_COUNTER(lp, i);
1903
+
1904
+ INIT_COUNTER(lp, n);
4006
1905
  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4007
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
1906
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
1907
+
4008
1908
  if (idx1) {
4009
- for (; i--;) {
4010
- GET_DATA_INDEX(p1, idx1, dtype, x);
4011
- b = (m_isinf(x)) ? 1 : 0;
4012
- STORE_BIT(a2, p2, b);
4013
- p2 += s2;
1909
+ if (idx2) {
1910
+ for (i = 0; i < n; i++) {
1911
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1912
+ x = m_square(x);
1913
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1914
+ }
1915
+ } else {
1916
+ for (i = 0; i < n; i++) {
1917
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1918
+ x = m_square(x);
1919
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1920
+ }
4014
1921
  }
4015
1922
  } else {
4016
- for (; i--;) {
4017
- GET_DATA_STRIDE(p1, s1, dtype, x);
4018
- b = (m_isinf(x)) ? 1 : 0;
4019
- STORE_BIT(a2, p2, b);
4020
- p2 += s2;
1923
+ if (idx2) {
1924
+ for (i = 0; i < n; i++) {
1925
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1926
+ x = m_square(x);
1927
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1928
+ }
1929
+ } else {
1930
+ //
1931
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1932
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
1933
+ for (i = 0; i < n; i++) {
1934
+ ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
1935
+ }
1936
+ return;
1937
+ }
1938
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1939
+ //
1940
+ for (i = 0; i < n; i++) {
1941
+ *(dtype*)p2 = m_square(*(dtype*)p1);
1942
+ p1 += s1;
1943
+ p2 += s2;
1944
+ }
1945
+ return;
1946
+ //
1947
+ }
1948
+ }
1949
+ for (i = 0; i < n; i++) {
1950
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1951
+ x = m_square(x);
1952
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1953
+ }
1954
+ //
4021
1955
  }
4022
1956
  }
4023
1957
  }
4024
1958
 
4025
- static VALUE dfloat_isinf(VALUE self) {
1959
+ static VALUE dfloat_square(VALUE self) {
4026
1960
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4027
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4028
- ndfunc_t ndf = { iter_dfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
1961
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1962
+ ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
4029
1963
 
4030
1964
  return na_ndloop(&ndf, 1, self);
4031
1965
  }
4032
1966
 
4033
- static void iter_dfloat_isposinf(na_loop_t* const lp) {
4034
- size_t i;
4035
- char* p1;
4036
- BIT_DIGIT* a2;
4037
- size_t p2;
4038
- ssize_t s1, s2;
4039
- size_t* idx1;
4040
- dtype x;
4041
- BIT_DIGIT b;
4042
- INIT_COUNTER(lp, i);
4043
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4044
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4045
- if (idx1) {
4046
- for (; i--;) {
4047
- GET_DATA_INDEX(p1, idx1, dtype, x);
4048
- b = (m_isposinf(x)) ? 1 : 0;
4049
- STORE_BIT(a2, p2, b);
4050
- p2 += s2;
1967
+ #define check_intdivzero(y) \
1968
+ {}
1969
+
1970
+ static void iter_dfloat_copysign(na_loop_t* const lp) {
1971
+ size_t i = 0;
1972
+ size_t n;
1973
+ char *p1, *p2, *p3;
1974
+ ssize_t s1, s2, s3;
1975
+
1976
+ INIT_COUNTER(lp, n);
1977
+ INIT_PTR(lp, 0, p1, s1);
1978
+ INIT_PTR(lp, 1, p2, s2);
1979
+ INIT_PTR(lp, 2, p3, s3);
1980
+
1981
+ //
1982
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1983
+ is_aligned(p3, sizeof(dtype))) {
1984
+
1985
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1986
+ if (p1 == p3) { // inplace case
1987
+ for (; i < n; i++) {
1988
+ check_intdivzero(((dtype*)p2)[i]);
1989
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1990
+ }
1991
+ } else {
1992
+ for (; i < n; i++) {
1993
+ check_intdivzero(((dtype*)p2)[i]);
1994
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1995
+ }
1996
+ }
1997
+ return;
4051
1998
  }
4052
- } else {
4053
- for (; i--;) {
4054
- GET_DATA_STRIDE(p1, s1, dtype, x);
4055
- b = (m_isposinf(x)) ? 1 : 0;
4056
- STORE_BIT(a2, p2, b);
4057
- p2 += s2;
1999
+
2000
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2001
+ is_aligned_step(s3, sizeof(dtype))) {
2002
+ //
2003
+
2004
+ if (s2 == 0) { // Broadcasting from scalar value.
2005
+ check_intdivzero(*(dtype*)p2);
2006
+ if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2007
+ if (p1 == p3) { // inplace case
2008
+ for (; i < n; i++) {
2009
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2010
+ }
2011
+ } else {
2012
+ for (; i < n; i++) {
2013
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2014
+ }
2015
+ }
2016
+ } else {
2017
+ for (i = 0; i < n; i++) {
2018
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2019
+ p1 += s1;
2020
+ p3 += s3;
2021
+ }
2022
+ }
2023
+ } else {
2024
+ if (p1 == p3) { // inplace case
2025
+ for (i = 0; i < n; i++) {
2026
+ check_intdivzero(*(dtype*)p2);
2027
+ *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2028
+ p1 += s1;
2029
+ p2 += s2;
2030
+ }
2031
+ } else {
2032
+ for (i = 0; i < n; i++) {
2033
+ check_intdivzero(*(dtype*)p2);
2034
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2035
+ p1 += s1;
2036
+ p2 += s2;
2037
+ p3 += s3;
2038
+ }
2039
+ }
2040
+ }
2041
+
2042
+ return;
2043
+ //
4058
2044
  }
4059
2045
  }
2046
+ for (i = 0; i < n; i++) {
2047
+ dtype x, y, z;
2048
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2049
+ GET_DATA_STRIDE(p2, s2, dtype, y);
2050
+ check_intdivzero(y);
2051
+ z = m_copysign(x, y);
2052
+ SET_DATA_STRIDE(p3, s3, dtype, z);
2053
+ }
2054
+ //
4060
2055
  }
2056
+ #undef check_intdivzero
4061
2057
 
4062
- static VALUE dfloat_isposinf(VALUE self) {
4063
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4064
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4065
- ndfunc_t ndf = { iter_dfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
2058
+ static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
2059
+ ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2060
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2061
+ ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
4066
2062
 
4067
- return na_ndloop(&ndf, 1, self);
2063
+ return na_ndloop(&ndf, 2, self, other);
2064
+ }
2065
+
2066
+ static VALUE dfloat_copysign(VALUE self, VALUE other) {
2067
+
2068
+ VALUE klass, v;
2069
+
2070
+ klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2071
+ if (klass == cT) {
2072
+ return dfloat_copysign_self(self, other);
2073
+ } else {
2074
+ v = rb_funcall(klass, id_cast, 1, self);
2075
+ return rb_funcall(v, id_copysign, 1, other);
2076
+ }
4068
2077
  }
4069
2078
 
4070
- static void iter_dfloat_isneginf(na_loop_t* const lp) {
2079
+ static void iter_dfloat_signbit(na_loop_t* const lp) {
4071
2080
  size_t i;
4072
2081
  char* p1;
4073
2082
  BIT_DIGIT* a2;
@@ -4082,61 +2091,49 @@ static void iter_dfloat_isneginf(na_loop_t* const lp) {
4082
2091
  if (idx1) {
4083
2092
  for (; i--;) {
4084
2093
  GET_DATA_INDEX(p1, idx1, dtype, x);
4085
- b = (m_isneginf(x)) ? 1 : 0;
2094
+ b = (m_signbit(x)) ? 1 : 0;
4086
2095
  STORE_BIT(a2, p2, b);
4087
2096
  p2 += s2;
4088
2097
  }
4089
2098
  } else {
4090
2099
  for (; i--;) {
4091
2100
  GET_DATA_STRIDE(p1, s1, dtype, x);
4092
- b = (m_isneginf(x)) ? 1 : 0;
2101
+ b = (m_signbit(x)) ? 1 : 0;
4093
2102
  STORE_BIT(a2, p2, b);
4094
2103
  p2 += s2;
4095
2104
  }
4096
2105
  }
4097
2106
  }
4098
2107
 
4099
- static VALUE dfloat_isneginf(VALUE self) {
2108
+ static VALUE dfloat_signbit(VALUE self) {
4100
2109
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4101
2110
  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4102
- ndfunc_t ndf = { iter_dfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
2111
+ ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
4103
2112
 
4104
2113
  return na_ndloop(&ndf, 1, self);
4105
2114
  }
4106
2115
 
4107
- static void iter_dfloat_isfinite(na_loop_t* const lp) {
2116
+ static void iter_dfloat_modf(na_loop_t* const lp) {
4108
2117
  size_t i;
4109
- char* p1;
4110
- BIT_DIGIT* a2;
4111
- size_t p2;
4112
- ssize_t s1, s2;
4113
- size_t* idx1;
4114
- dtype x;
4115
- BIT_DIGIT b;
2118
+ char *p1, *p2, *p3;
2119
+ ssize_t s1, s2, s3;
2120
+ dtype x, y, z;
4116
2121
  INIT_COUNTER(lp, i);
4117
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4118
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4119
- if (idx1) {
4120
- for (; i--;) {
4121
- GET_DATA_INDEX(p1, idx1, dtype, x);
4122
- b = (m_isfinite(x)) ? 1 : 0;
4123
- STORE_BIT(a2, p2, b);
4124
- p2 += s2;
4125
- }
4126
- } else {
4127
- for (; i--;) {
4128
- GET_DATA_STRIDE(p1, s1, dtype, x);
4129
- b = (m_isfinite(x)) ? 1 : 0;
4130
- STORE_BIT(a2, p2, b);
4131
- p2 += s2;
4132
- }
2122
+ INIT_PTR(lp, 0, p1, s1);
2123
+ INIT_PTR(lp, 1, p2, s2);
2124
+ INIT_PTR(lp, 2, p3, s3);
2125
+ for (; i--;) {
2126
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2127
+ m_modf(x, y, z);
2128
+ SET_DATA_STRIDE(p2, s2, dtype, y);
2129
+ SET_DATA_STRIDE(p3, s3, dtype, z);
4133
2130
  }
4134
2131
  }
4135
2132
 
4136
- static VALUE dfloat_isfinite(VALUE self) {
2133
+ static VALUE dfloat_modf(VALUE self) {
4137
2134
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4138
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4139
- ndfunc_t ndf = { iter_dfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
2135
+ ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2136
+ ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
4140
2137
 
4141
2138
  return na_ndloop(&ndf, 1, self);
4142
2139
  }