numo-narray-alt 0.9.11 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -1
  3. data/README.md +7 -0
  4. data/ext/numo/narray/numo/narray.h +2 -2
  5. data/ext/numo/narray/numo/types/robj_macro.h +1 -1
  6. data/ext/numo/narray/src/mh/bincount.h +233 -0
  7. data/ext/numo/narray/src/mh/bit/and.h +225 -0
  8. data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
  9. data/ext/numo/narray/src/mh/bit/not.h +173 -0
  10. data/ext/numo/narray/src/mh/bit/or.h +225 -0
  11. data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
  12. data/ext/numo/narray/src/mh/bit/xor.h +225 -0
  13. data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
  14. data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
  15. data/ext/numo/narray/src/mh/comp/eq.h +26 -0
  16. data/ext/numo/narray/src/mh/comp/ge.h +26 -0
  17. data/ext/numo/narray/src/mh/comp/gt.h +26 -0
  18. data/ext/numo/narray/src/mh/comp/le.h +26 -0
  19. data/ext/numo/narray/src/mh/comp/lt.h +26 -0
  20. data/ext/numo/narray/src/mh/comp/ne.h +26 -0
  21. data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
  22. data/ext/numo/narray/src/mh/divmod.h +142 -0
  23. data/ext/numo/narray/src/mh/eye.h +1 -1
  24. data/ext/numo/narray/src/mh/fill.h +94 -0
  25. data/ext/numo/narray/src/mh/format.h +108 -0
  26. data/ext/numo/narray/src/mh/format_to_a.h +89 -0
  27. data/ext/numo/narray/src/mh/inspect.h +33 -0
  28. data/ext/numo/narray/src/mh/isfinite.h +42 -0
  29. data/ext/numo/narray/src/mh/isinf.h +42 -0
  30. data/ext/numo/narray/src/mh/isnan.h +42 -0
  31. data/ext/numo/narray/src/mh/isneginf.h +42 -0
  32. data/ext/numo/narray/src/mh/isposinf.h +42 -0
  33. data/ext/numo/narray/src/mh/math/acos.h +2 -2
  34. data/ext/numo/narray/src/mh/math/acosh.h +2 -2
  35. data/ext/numo/narray/src/mh/math/asin.h +2 -2
  36. data/ext/numo/narray/src/mh/math/asinh.h +2 -2
  37. data/ext/numo/narray/src/mh/math/atan.h +2 -2
  38. data/ext/numo/narray/src/mh/math/atan2.h +3 -3
  39. data/ext/numo/narray/src/mh/math/atanh.h +2 -2
  40. data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
  41. data/ext/numo/narray/src/mh/math/cos.h +2 -2
  42. data/ext/numo/narray/src/mh/math/cosh.h +2 -2
  43. data/ext/numo/narray/src/mh/math/erf.h +2 -2
  44. data/ext/numo/narray/src/mh/math/erfc.h +2 -2
  45. data/ext/numo/narray/src/mh/math/exp.h +2 -2
  46. data/ext/numo/narray/src/mh/math/exp10.h +2 -2
  47. data/ext/numo/narray/src/mh/math/exp2.h +2 -2
  48. data/ext/numo/narray/src/mh/math/expm1.h +2 -2
  49. data/ext/numo/narray/src/mh/math/frexp.h +3 -3
  50. data/ext/numo/narray/src/mh/math/hypot.h +3 -3
  51. data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
  52. data/ext/numo/narray/src/mh/math/log.h +2 -2
  53. data/ext/numo/narray/src/mh/math/log10.h +2 -2
  54. data/ext/numo/narray/src/mh/math/log1p.h +2 -2
  55. data/ext/numo/narray/src/mh/math/log2.h +2 -2
  56. data/ext/numo/narray/src/mh/math/sin.h +2 -2
  57. data/ext/numo/narray/src/mh/math/sinc.h +2 -2
  58. data/ext/numo/narray/src/mh/math/sinh.h +2 -2
  59. data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
  60. data/ext/numo/narray/src/mh/math/tan.h +2 -2
  61. data/ext/numo/narray/src/mh/math/tanh.h +2 -2
  62. data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
  63. data/ext/numo/narray/src/mh/op/add.h +78 -0
  64. data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
  65. data/ext/numo/narray/src/mh/op/div.h +118 -0
  66. data/ext/numo/narray/src/mh/op/mod.h +108 -0
  67. data/ext/numo/narray/src/mh/op/mul.h +78 -0
  68. data/ext/numo/narray/src/mh/op/sub.h +78 -0
  69. data/ext/numo/narray/src/mh/rand.h +2 -2
  70. data/ext/numo/narray/src/mh/round/ceil.h +11 -0
  71. data/ext/numo/narray/src/mh/round/floor.h +11 -0
  72. data/ext/numo/narray/src/mh/round/rint.h +9 -0
  73. data/ext/numo/narray/src/mh/round/round.h +11 -0
  74. data/ext/numo/narray/src/mh/round/trunc.h +11 -0
  75. data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
  76. data/ext/numo/narray/src/mh/to_a.h +78 -0
  77. data/ext/numo/narray/src/t_bit.c +45 -234
  78. data/ext/numo/narray/src/t_dcomplex.c +584 -1809
  79. data/ext/numo/narray/src/t_dfloat.c +429 -2432
  80. data/ext/numo/narray/src/t_int16.c +481 -2283
  81. data/ext/numo/narray/src/t_int32.c +481 -2283
  82. data/ext/numo/narray/src/t_int64.c +481 -2283
  83. data/ext/numo/narray/src/t_int8.c +408 -1873
  84. data/ext/numo/narray/src/t_robject.c +448 -1977
  85. data/ext/numo/narray/src/t_scomplex.c +584 -1809
  86. data/ext/numo/narray/src/t_sfloat.c +429 -2434
  87. data/ext/numo/narray/src/t_uint16.c +480 -2278
  88. data/ext/numo/narray/src/t_uint32.c +480 -2278
  89. data/ext/numo/narray/src/t_uint64.c +480 -2278
  90. data/ext/numo/narray/src/t_uint8.c +407 -1868
  91. metadata +41 -2
@@ -42,7 +42,36 @@ static ID id_to_a;
42
42
  VALUE cT;
43
43
  extern VALUE cRT;
44
44
 
45
+ #include "mh/coerce_cast.h"
46
+ #include "mh/to_a.h"
47
+ #include "mh/fill.h"
48
+ #include "mh/format.h"
49
+ #include "mh/format_to_a.h"
50
+ #include "mh/inspect.h"
51
+ #include "mh/op/add.h"
52
+ #include "mh/op/sub.h"
53
+ #include "mh/op/mul.h"
54
+ #include "mh/op/div.h"
55
+ #include "mh/op/mod.h"
56
+ #include "mh/divmod.h"
57
+ #include "mh/round/floor.h"
58
+ #include "mh/round/round.h"
59
+ #include "mh/round/ceil.h"
60
+ #include "mh/round/trunc.h"
61
+ #include "mh/round/rint.h"
62
+ #include "mh/comp/eq.h"
63
+ #include "mh/comp/ne.h"
64
+ #include "mh/comp/nearly_eq.h"
65
+ #include "mh/comp/gt.h"
66
+ #include "mh/comp/ge.h"
67
+ #include "mh/comp/lt.h"
68
+ #include "mh/comp/le.h"
45
69
  #include "mh/clip.h"
70
+ #include "mh/isnan.h"
71
+ #include "mh/isinf.h"
72
+ #include "mh/isposinf.h"
73
+ #include "mh/isneginf.h"
74
+ #include "mh/isfinite.h"
46
75
  #include "mh/sum.h"
47
76
  #include "mh/prod.h"
48
77
  #include "mh/mean.h"
@@ -98,7 +127,43 @@ extern VALUE cRT;
98
127
 
99
128
  typedef float sfloat; // Type aliases for shorter notation
100
129
  // following the codebase naming convention.
130
+ DEF_NARRAY_COERCE_CAST_METHOD_FUNC(sfloat)
131
+ DEF_NARRAY_TO_A_METHOD_FUNC(sfloat)
132
+ DEF_NARRAY_FILL_METHOD_FUNC(sfloat)
133
+ DEF_NARRAY_FORMAT_METHOD_FUNC(sfloat)
134
+ DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(sfloat)
135
+ DEF_NARRAY_INSPECT_METHOD_FUNC(sfloat)
136
+ #ifdef __SSE2__
137
+ DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
138
+ DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
139
+ DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
140
+ DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
141
+ #else
142
+ DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
143
+ DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
144
+ DEF_NARRAY_MUL_METHOD_FUNC(sfloat, numo_cSFloat)
145
+ DEF_NARRAY_FLT_DIV_METHOD_FUNC(sfloat, numo_cSFloat)
146
+ #endif
147
+ DEF_NARRAY_FLT_MOD_METHOD_FUNC(sfloat, numo_cSFloat)
148
+ DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(sfloat, numo_cSFloat)
149
+ DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(sfloat, numo_cSFloat)
150
+ DEF_NARRAY_FLT_ROUND_METHOD_FUNC(sfloat, numo_cSFloat)
151
+ DEF_NARRAY_FLT_CEIL_METHOD_FUNC(sfloat, numo_cSFloat)
152
+ DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(sfloat, numo_cSFloat)
153
+ DEF_NARRAY_FLT_RINT_METHOD_FUNC(sfloat, numo_cSFloat)
154
+ DEF_NARRAY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
155
+ DEF_NARRAY_NE_METHOD_FUNC(sfloat, numo_cSFloat)
156
+ DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
157
+ DEF_NARRAY_GT_METHOD_FUNC(sfloat, numo_cSFloat)
158
+ DEF_NARRAY_GE_METHOD_FUNC(sfloat, numo_cSFloat)
159
+ DEF_NARRAY_LT_METHOD_FUNC(sfloat, numo_cSFloat)
160
+ DEF_NARRAY_LE_METHOD_FUNC(sfloat, numo_cSFloat)
101
161
  DEF_NARRAY_CLIP_METHOD_FUNC(sfloat, numo_cSFloat)
162
+ DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(sfloat, numo_cSFloat)
163
+ DEF_NARRAY_FLT_ISINF_METHOD_FUNC(sfloat, numo_cSFloat)
164
+ DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(sfloat, numo_cSFloat)
165
+ DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(sfloat, numo_cSFloat)
166
+ DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(sfloat, numo_cSFloat)
102
167
  DEF_NARRAY_FLT_SUM_METHOD_FUNC(sfloat, numo_cSFloat)
103
168
  DEF_NARRAY_FLT_PROD_METHOD_FUNC(sfloat, numo_cSFloat)
104
169
  DEF_NARRAY_FLT_MEAN_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
@@ -1275,171 +1340,6 @@ static VALUE sfloat_aset(int argc, VALUE* argv, VALUE self) {
1275
1340
  return argv[argc];
1276
1341
  }
1277
1342
 
1278
- static VALUE sfloat_coerce_cast(VALUE self, VALUE type) {
1279
- return Qnil;
1280
- }
1281
-
1282
- static void iter_sfloat_to_a(na_loop_t* const lp) {
1283
- size_t i, s1;
1284
- char* p1;
1285
- size_t* idx1;
1286
- dtype x;
1287
- volatile VALUE a, y;
1288
-
1289
- INIT_COUNTER(lp, i);
1290
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1291
- a = rb_ary_new2(i);
1292
- rb_ary_push(lp->args[1].value, a);
1293
- if (idx1) {
1294
- for (; i--;) {
1295
- GET_DATA_INDEX(p1, idx1, dtype, x);
1296
- y = m_data_to_num(x);
1297
- rb_ary_push(a, y);
1298
- }
1299
- } else {
1300
- for (; i--;) {
1301
- GET_DATA_STRIDE(p1, s1, dtype, x);
1302
- y = m_data_to_num(x);
1303
- rb_ary_push(a, y);
1304
- }
1305
- }
1306
- }
1307
-
1308
- static VALUE sfloat_to_a(VALUE self) {
1309
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1310
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1311
- ndfunc_t ndf = { iter_sfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1312
- return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
1313
- }
1314
-
1315
- static void iter_sfloat_fill(na_loop_t* const lp) {
1316
- size_t i;
1317
- char* p1;
1318
- ssize_t s1;
1319
- size_t* idx1;
1320
- VALUE x = lp->option;
1321
- dtype y;
1322
- INIT_COUNTER(lp, i);
1323
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1324
- y = m_num_to_data(x);
1325
- if (idx1) {
1326
- for (; i--;) {
1327
- SET_DATA_INDEX(p1, idx1, dtype, y);
1328
- }
1329
- } else {
1330
- for (; i--;) {
1331
- SET_DATA_STRIDE(p1, s1, dtype, y);
1332
- }
1333
- }
1334
- }
1335
-
1336
- static VALUE sfloat_fill(VALUE self, VALUE val) {
1337
- ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
1338
- ndfunc_t ndf = { iter_sfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
1339
-
1340
- na_ndloop(&ndf, 2, self, val);
1341
- return self;
1342
- }
1343
-
1344
- static VALUE format_sfloat(VALUE fmt, dtype* x) {
1345
- // fix-me
1346
- char s[48];
1347
- int n;
1348
-
1349
- if (NIL_P(fmt)) {
1350
- n = m_sprintf(s, *x);
1351
- return rb_str_new(s, n);
1352
- }
1353
- return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
1354
- }
1355
-
1356
- static void iter_sfloat_format(na_loop_t* const lp) {
1357
- size_t i;
1358
- char *p1, *p2;
1359
- ssize_t s1, s2;
1360
- size_t* idx1;
1361
- dtype* x;
1362
- VALUE y;
1363
- VALUE fmt = lp->option;
1364
- INIT_COUNTER(lp, i);
1365
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1366
- INIT_PTR(lp, 1, p2, s2);
1367
- if (idx1) {
1368
- for (; i--;) {
1369
- x = (dtype*)(p1 + *idx1);
1370
- idx1++;
1371
- y = format_sfloat(fmt, x);
1372
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1373
- }
1374
- } else {
1375
- for (; i--;) {
1376
- x = (dtype*)p1;
1377
- p1 += s1;
1378
- y = format_sfloat(fmt, x);
1379
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1380
- }
1381
- }
1382
- }
1383
-
1384
- static VALUE sfloat_format(int argc, VALUE* argv, VALUE self) {
1385
- VALUE fmt = Qnil;
1386
-
1387
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
1388
- ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
1389
- ndfunc_t ndf = { iter_sfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
1390
-
1391
- rb_scan_args(argc, argv, "01", &fmt);
1392
- return na_ndloop(&ndf, 2, self, fmt);
1393
- }
1394
-
1395
- static void iter_sfloat_format_to_a(na_loop_t* const lp) {
1396
- size_t i;
1397
- char* p1;
1398
- ssize_t s1;
1399
- size_t* idx1;
1400
- dtype* x;
1401
- VALUE y;
1402
- volatile VALUE a;
1403
- VALUE fmt = lp->option;
1404
- INIT_COUNTER(lp, i);
1405
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1406
- a = rb_ary_new2(i);
1407
- rb_ary_push(lp->args[1].value, a);
1408
- if (idx1) {
1409
- for (; i--;) {
1410
- x = (dtype*)(p1 + *idx1);
1411
- idx1++;
1412
- y = format_sfloat(fmt, x);
1413
- rb_ary_push(a, y);
1414
- }
1415
- } else {
1416
- for (; i--;) {
1417
- x = (dtype*)p1;
1418
- p1 += s1;
1419
- y = format_sfloat(fmt, x);
1420
- rb_ary_push(a, y);
1421
- }
1422
- }
1423
- }
1424
-
1425
- static VALUE sfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
1426
- VALUE fmt = Qnil;
1427
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1428
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1429
- ndfunc_t ndf = { iter_sfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1430
-
1431
- rb_scan_args(argc, argv, "01", &fmt);
1432
- return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
1433
- }
1434
-
1435
- static VALUE iter_sfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
1436
- return format_sfloat(fmt, (dtype*)(ptr + pos));
1437
- }
1438
-
1439
- static VALUE sfloat_inspect(VALUE ary) {
1440
- return na_ndloop_inspect(ary, iter_sfloat_inspect, Qnil);
1441
- }
1442
-
1443
1343
  static void iter_sfloat_each(na_loop_t* const lp) {
1444
1344
  size_t i, s1;
1445
1345
  char* p1;
@@ -1722,2354 +1622,461 @@ static VALUE sfloat_abs(VALUE self) {
1722
1622
  return na_ndloop(&ndf, 1, self);
1723
1623
  }
1724
1624
 
1725
- #define check_intdivzero(y) \
1726
- {}
1727
-
1728
- static void iter_sfloat_add(na_loop_t* const lp) {
1729
- size_t i = 0;
1730
- size_t n;
1625
+ static void iter_sfloat_pow(na_loop_t* const lp) {
1626
+ size_t i;
1731
1627
  char *p1, *p2, *p3;
1732
1628
  ssize_t s1, s2, s3;
1733
-
1734
- #ifdef __SSE2__
1735
- size_t cnt;
1736
- size_t cnt_simd_loop = -1;
1737
-
1738
- __m128 a;
1739
- __m128 b;
1740
-
1741
- size_t num_pack; // Number of elements packed for SIMD.
1742
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1743
- #endif
1744
- INIT_COUNTER(lp, n);
1629
+ dtype x, y;
1630
+ INIT_COUNTER(lp, i);
1745
1631
  INIT_PTR(lp, 0, p1, s1);
1746
1632
  INIT_PTR(lp, 1, p2, s2);
1747
1633
  INIT_PTR(lp, 2, p3, s3);
1748
-
1749
- //
1750
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1751
- is_aligned(p3, sizeof(dtype))) {
1752
-
1753
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1754
- #ifdef __SSE2__
1755
- // Check number of elements. & Check same alignment.
1756
- if ((n >= num_pack) &&
1757
- is_same_aligned3(
1758
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1759
- )) {
1760
- // Calculate up to the position just before the start of SIMD computation.
1761
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1762
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1763
- );
1764
- #endif
1765
- if (p1 == p3) { // inplace case
1766
- #ifdef __SSE2__
1767
- for (; i < cnt; i++) {
1768
- #else
1769
- for (; i < n; i++) {
1770
- check_intdivzero(((dtype*)p2)[i]);
1771
- #endif
1772
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1773
- }
1774
- } else {
1775
- #ifdef __SSE2__
1776
- for (; i < cnt; i++) {
1777
- #else
1778
- for (; i < n; i++) {
1779
- check_intdivzero(((dtype*)p2)[i]);
1780
- #endif
1781
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1782
- }
1783
- }
1784
-
1785
- #ifdef __SSE2__
1786
- // Get the count of SIMD computation loops.
1787
- cnt_simd_loop = (n - i) % num_pack;
1788
-
1789
- // SIMD computation.
1790
- if (p1 == p3) { // inplace case
1791
- for (; i < n - cnt_simd_loop; i += num_pack) {
1792
- a = _mm_load_ps(&((dtype*)p1)[i]);
1793
- b = _mm_load_ps(&((dtype*)p2)[i]);
1794
- a = _mm_add_ps(a, b);
1795
- _mm_store_ps(&((dtype*)p1)[i], a);
1796
- }
1797
- } else {
1798
- for (; i < n - cnt_simd_loop; i += num_pack) {
1799
- a = _mm_load_ps(&((dtype*)p1)[i]);
1800
- b = _mm_load_ps(&((dtype*)p2)[i]);
1801
- a = _mm_add_ps(a, b);
1802
- _mm_stream_ps(&((dtype*)p3)[i], a);
1803
- }
1804
- }
1805
- }
1806
-
1807
- // Compute the remainder of the SIMD operation.
1808
- if (cnt_simd_loop != 0) {
1809
- if (p1 == p3) { // inplace case
1810
- for (; i < n; i++) {
1811
- check_intdivzero(((dtype*)p2)[i]);
1812
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1813
- }
1814
- } else {
1815
- for (; i < n; i++) {
1816
- check_intdivzero(((dtype*)p2)[i]);
1817
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1818
- }
1819
- }
1820
- }
1821
- #endif
1822
- return;
1823
- }
1824
-
1825
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
1826
- is_aligned_step(s3, sizeof(dtype))) {
1827
- //
1828
-
1829
- if (s2 == 0) { // Broadcasting from scalar value.
1830
- check_intdivzero(*(dtype*)p2);
1831
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
1832
- #ifdef __SSE2__
1833
- // Broadcast a scalar value and use it for SIMD computation.
1834
- b = _mm_load1_ps(&((dtype*)p2)[0]);
1835
-
1836
- // Check number of elements. & Check same alignment.
1837
- if ((n >= num_pack) &&
1838
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
1839
- // Calculate up to the position just before the start of SIMD computation.
1840
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1841
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1842
- );
1843
- #endif
1844
- if (p1 == p3) { // inplace case
1845
- #ifdef __SSE2__
1846
- for (; i < cnt; i++) {
1847
- #else
1848
- for (; i < n; i++) {
1849
- #endif
1850
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1851
- }
1852
- } else {
1853
- #ifdef __SSE2__
1854
- for (; i < cnt; i++) {
1855
- #else
1856
- for (; i < n; i++) {
1857
- #endif
1858
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1859
- }
1860
- }
1861
-
1862
- #ifdef __SSE2__
1863
- // Get the count of SIMD computation loops.
1864
- cnt_simd_loop = (n - i) % num_pack;
1865
-
1866
- // SIMD computation.
1867
- if (p1 == p3) { // inplace case
1868
- for (; i < n - cnt_simd_loop; i += num_pack) {
1869
- a = _mm_load_ps(&((dtype*)p1)[i]);
1870
- a = _mm_add_ps(a, b);
1871
- _mm_store_ps(&((dtype*)p1)[i], a);
1872
- }
1873
- } else {
1874
- for (; i < n - cnt_simd_loop; i += num_pack) {
1875
- a = _mm_load_ps(&((dtype*)p1)[i]);
1876
- a = _mm_add_ps(a, b);
1877
- _mm_stream_ps(&((dtype*)p3)[i], a);
1878
- }
1879
- }
1880
- }
1881
-
1882
- // Compute the remainder of the SIMD operation.
1883
- if (cnt_simd_loop != 0) {
1884
- if (p1 == p3) { // inplace case
1885
- for (; i < n; i++) {
1886
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1887
- }
1888
- } else {
1889
- for (; i < n; i++) {
1890
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1891
- }
1892
- }
1893
- }
1894
- #endif
1895
- } else {
1896
- for (i = 0; i < n; i++) {
1897
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1898
- p1 += s1;
1899
- p3 += s3;
1900
- }
1901
- }
1902
- } else {
1903
- if (p1 == p3) { // inplace case
1904
- for (i = 0; i < n; i++) {
1905
- check_intdivzero(*(dtype*)p2);
1906
- *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
1907
- p1 += s1;
1908
- p2 += s2;
1909
- }
1910
- } else {
1911
- for (i = 0; i < n; i++) {
1912
- check_intdivzero(*(dtype*)p2);
1913
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1914
- p1 += s1;
1915
- p2 += s2;
1916
- p3 += s3;
1917
- }
1918
- }
1919
- }
1920
-
1921
- return;
1922
- //
1923
- }
1924
- }
1925
- for (i = 0; i < n; i++) {
1926
- dtype x, y, z;
1634
+ for (; i--;) {
1927
1635
  GET_DATA_STRIDE(p1, s1, dtype, x);
1928
1636
  GET_DATA_STRIDE(p2, s2, dtype, y);
1929
- check_intdivzero(y);
1930
- z = m_add(x, y);
1931
- SET_DATA_STRIDE(p3, s3, dtype, z);
1637
+ x = m_pow(x, y);
1638
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1639
+ }
1640
+ }
1641
+
1642
+ static void iter_sfloat_pow_int32(na_loop_t* const lp) {
1643
+ size_t i;
1644
+ char *p1, *p2, *p3;
1645
+ ssize_t s1, s2, s3;
1646
+ dtype x;
1647
+ int32_t y;
1648
+ INIT_COUNTER(lp, i);
1649
+ INIT_PTR(lp, 0, p1, s1);
1650
+ INIT_PTR(lp, 1, p2, s2);
1651
+ INIT_PTR(lp, 2, p3, s3);
1652
+ for (; i--;) {
1653
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1654
+ GET_DATA_STRIDE(p2, s2, int32_t, y);
1655
+ x = m_pow_int(x, y);
1656
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1932
1657
  }
1933
- //
1934
1658
  }
1935
- #undef check_intdivzero
1936
1659
 
1937
- static VALUE sfloat_add_self(VALUE self, VALUE other) {
1660
+ static VALUE sfloat_pow_self(VALUE self, VALUE other) {
1938
1661
  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1662
+ ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
1939
1663
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1940
- ndfunc_t ndf = { iter_sfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
1664
+ ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
1665
+ ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
1941
1666
 
1942
- return na_ndloop(&ndf, 2, self, other);
1667
+ // fixme : use na.integer?
1668
+ if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
1669
+ return na_ndloop(&ndf_i, 2, self, other);
1670
+ } else {
1671
+ return na_ndloop(&ndf, 2, self, other);
1672
+ }
1943
1673
  }
1944
1674
 
1945
- static VALUE sfloat_add(VALUE self, VALUE other) {
1675
+ static VALUE sfloat_pow(VALUE self, VALUE other) {
1946
1676
 
1947
1677
  VALUE klass, v;
1948
-
1949
1678
  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
1950
1679
  if (klass == cT) {
1951
- return sfloat_add_self(self, other);
1680
+ return sfloat_pow_self(self, other);
1952
1681
  } else {
1953
1682
  v = rb_funcall(klass, id_cast, 1, self);
1954
- return rb_funcall(v, '+', 1, other);
1683
+ return rb_funcall(v, id_pow, 1, other);
1955
1684
  }
1956
1685
  }
1957
1686
 
1958
- #define check_intdivzero(y) \
1959
- {}
1960
-
1961
- static void iter_sfloat_sub(na_loop_t* const lp) {
1962
- size_t i = 0;
1963
- size_t n;
1964
- char *p1, *p2, *p3;
1965
- ssize_t s1, s2, s3;
1966
-
1967
- #ifdef __SSE2__
1968
- size_t cnt;
1969
- size_t cnt_simd_loop = -1;
1970
-
1971
- __m128 a;
1972
- __m128 b;
1687
+ static void iter_sfloat_minus(na_loop_t* const lp) {
1688
+ size_t i, n;
1689
+ char *p1, *p2;
1690
+ ssize_t s1, s2;
1691
+ size_t *idx1, *idx2;
1692
+ dtype x;
1973
1693
 
1974
- size_t num_pack; // Number of elements packed for SIMD.
1975
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1976
- #endif
1977
1694
  INIT_COUNTER(lp, n);
1978
- INIT_PTR(lp, 0, p1, s1);
1979
- INIT_PTR(lp, 1, p2, s2);
1980
- INIT_PTR(lp, 2, p3, s3);
1981
-
1982
- //
1983
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1984
- is_aligned(p3, sizeof(dtype))) {
1985
-
1986
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1987
- #ifdef __SSE2__
1988
- // Check number of elements. & Check same alignment.
1989
- if ((n >= num_pack) &&
1990
- is_same_aligned3(
1991
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1992
- )) {
1993
- // Calculate up to the position just before the start of SIMD computation.
1994
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1995
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1996
- );
1997
- #endif
1998
- if (p1 == p3) { // inplace case
1999
- #ifdef __SSE2__
2000
- for (; i < cnt; i++) {
2001
- #else
2002
- for (; i < n; i++) {
2003
- check_intdivzero(((dtype*)p2)[i]);
2004
- #endif
2005
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2006
- }
2007
- } else {
2008
- #ifdef __SSE2__
2009
- for (; i < cnt; i++) {
2010
- #else
2011
- for (; i < n; i++) {
2012
- check_intdivzero(((dtype*)p2)[i]);
2013
- #endif
2014
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2015
- }
2016
- }
2017
-
2018
- #ifdef __SSE2__
2019
- // Get the count of SIMD computation loops.
2020
- cnt_simd_loop = (n - i) % num_pack;
1695
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1696
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2021
1697
 
2022
- // SIMD computation.
2023
- if (p1 == p3) { // inplace case
2024
- for (; i < n - cnt_simd_loop; i += num_pack) {
2025
- a = _mm_load_ps(&((dtype*)p1)[i]);
2026
- b = _mm_load_ps(&((dtype*)p2)[i]);
2027
- a = _mm_sub_ps(a, b);
2028
- _mm_store_ps(&((dtype*)p1)[i], a);
2029
- }
2030
- } else {
2031
- for (; i < n - cnt_simd_loop; i += num_pack) {
2032
- a = _mm_load_ps(&((dtype*)p1)[i]);
2033
- b = _mm_load_ps(&((dtype*)p2)[i]);
2034
- a = _mm_sub_ps(a, b);
2035
- _mm_stream_ps(&((dtype*)p3)[i], a);
2036
- }
2037
- }
1698
+ if (idx1) {
1699
+ if (idx2) {
1700
+ for (i = 0; i < n; i++) {
1701
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1702
+ x = m_minus(x);
1703
+ SET_DATA_INDEX(p2, idx2, dtype, x);
2038
1704
  }
2039
-
2040
- // Compute the remainder of the SIMD operation.
2041
- if (cnt_simd_loop != 0) {
2042
- if (p1 == p3) { // inplace case
2043
- for (; i < n; i++) {
2044
- check_intdivzero(((dtype*)p2)[i]);
2045
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2046
- }
2047
- } else {
2048
- for (; i < n; i++) {
2049
- check_intdivzero(((dtype*)p2)[i]);
2050
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2051
- }
2052
- }
1705
+ } else {
1706
+ for (i = 0; i < n; i++) {
1707
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1708
+ x = m_minus(x);
1709
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2053
1710
  }
2054
- #endif
2055
- return;
2056
1711
  }
2057
-
2058
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2059
- is_aligned_step(s3, sizeof(dtype))) {
1712
+ } else {
1713
+ if (idx2) {
1714
+ for (i = 0; i < n; i++) {
1715
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1716
+ x = m_minus(x);
1717
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1718
+ }
1719
+ } else {
2060
1720
  //
2061
-
2062
- if (s2 == 0) { // Broadcasting from scalar value.
2063
- check_intdivzero(*(dtype*)p2);
2064
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2065
- #ifdef __SSE2__
2066
- // Broadcast a scalar value and use it for SIMD computation.
2067
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2068
-
2069
- // Check number of elements. & Check same alignment.
2070
- if ((n >= num_pack) &&
2071
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2072
- // Calculate up to the position just before the start of SIMD computation.
2073
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2074
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2075
- );
2076
- #endif
2077
- if (p1 == p3) { // inplace case
2078
- #ifdef __SSE2__
2079
- for (; i < cnt; i++) {
2080
- #else
2081
- for (; i < n; i++) {
2082
- #endif
2083
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2084
- }
2085
- } else {
2086
- #ifdef __SSE2__
2087
- for (; i < cnt; i++) {
2088
- #else
2089
- for (; i < n; i++) {
2090
- #endif
2091
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2092
- }
2093
- }
2094
-
2095
- #ifdef __SSE2__
2096
- // Get the count of SIMD computation loops.
2097
- cnt_simd_loop = (n - i) % num_pack;
2098
-
2099
- // SIMD computation.
2100
- if (p1 == p3) { // inplace case
2101
- for (; i < n - cnt_simd_loop; i += num_pack) {
2102
- a = _mm_load_ps(&((dtype*)p1)[i]);
2103
- a = _mm_sub_ps(a, b);
2104
- _mm_store_ps(&((dtype*)p1)[i], a);
2105
- }
2106
- } else {
2107
- for (; i < n - cnt_simd_loop; i += num_pack) {
2108
- a = _mm_load_ps(&((dtype*)p1)[i]);
2109
- a = _mm_sub_ps(a, b);
2110
- _mm_stream_ps(&((dtype*)p3)[i], a);
2111
- }
2112
- }
2113
- }
2114
-
2115
- // Compute the remainder of the SIMD operation.
2116
- if (cnt_simd_loop != 0) {
2117
- if (p1 == p3) { // inplace case
2118
- for (; i < n; i++) {
2119
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2120
- }
2121
- } else {
2122
- for (; i < n; i++) {
2123
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2124
- }
2125
- }
2126
- }
2127
- #endif
2128
- } else {
1721
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1722
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2129
1723
  for (i = 0; i < n; i++) {
2130
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2131
- p1 += s1;
2132
- p3 += s3;
1724
+ ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2133
1725
  }
1726
+ return;
2134
1727
  }
2135
- } else {
2136
- if (p1 == p3) { // inplace case
2137
- for (i = 0; i < n; i++) {
2138
- check_intdivzero(*(dtype*)p2);
2139
- *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
2140
- p1 += s1;
2141
- p2 += s2;
2142
- }
2143
- } else {
1728
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1729
+ //
2144
1730
  for (i = 0; i < n; i++) {
2145
- check_intdivzero(*(dtype*)p2);
2146
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
1731
+ *(dtype*)p2 = m_minus(*(dtype*)p1);
2147
1732
  p1 += s1;
2148
1733
  p2 += s2;
2149
- p3 += s3;
2150
1734
  }
1735
+ return;
1736
+ //
2151
1737
  }
2152
1738
  }
2153
-
2154
- return;
1739
+ for (i = 0; i < n; i++) {
1740
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1741
+ x = m_minus(x);
1742
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1743
+ }
2155
1744
  //
2156
1745
  }
2157
1746
  }
2158
- for (i = 0; i < n; i++) {
2159
- dtype x, y, z;
2160
- GET_DATA_STRIDE(p1, s1, dtype, x);
2161
- GET_DATA_STRIDE(p2, s2, dtype, y);
2162
- check_intdivzero(y);
2163
- z = m_sub(x, y);
2164
- SET_DATA_STRIDE(p3, s3, dtype, z);
2165
- }
2166
- //
2167
1747
  }
2168
- #undef check_intdivzero
2169
1748
 
2170
- static VALUE sfloat_sub_self(VALUE self, VALUE other) {
2171
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1749
+ static VALUE sfloat_minus(VALUE self) {
1750
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2172
1751
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2173
- ndfunc_t ndf = { iter_sfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
2174
-
2175
- return na_ndloop(&ndf, 2, self, other);
2176
- }
2177
-
2178
- static VALUE sfloat_sub(VALUE self, VALUE other) {
2179
-
2180
- VALUE klass, v;
1752
+ ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2181
1753
 
2182
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2183
- if (klass == cT) {
2184
- return sfloat_sub_self(self, other);
2185
- } else {
2186
- v = rb_funcall(klass, id_cast, 1, self);
2187
- return rb_funcall(v, '-', 1, other);
2188
- }
1754
+ return na_ndloop(&ndf, 1, self);
2189
1755
  }
2190
1756
 
2191
- #define check_intdivzero(y) \
2192
- {}
2193
-
2194
- static void iter_sfloat_mul(na_loop_t* const lp) {
2195
- size_t i = 0;
2196
- size_t n;
2197
- char *p1, *p2, *p3;
2198
- ssize_t s1, s2, s3;
2199
-
2200
- #ifdef __SSE2__
2201
- size_t cnt;
2202
- size_t cnt_simd_loop = -1;
2203
-
2204
- __m128 a;
2205
- __m128 b;
1757
+ static void iter_sfloat_reciprocal(na_loop_t* const lp) {
1758
+ size_t i, n;
1759
+ char *p1, *p2;
1760
+ ssize_t s1, s2;
1761
+ size_t *idx1, *idx2;
1762
+ dtype x;
2206
1763
 
2207
- size_t num_pack; // Number of elements packed for SIMD.
2208
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2209
- #endif
2210
1764
  INIT_COUNTER(lp, n);
2211
- INIT_PTR(lp, 0, p1, s1);
2212
- INIT_PTR(lp, 1, p2, s2);
2213
- INIT_PTR(lp, 2, p3, s3);
2214
-
2215
- //
2216
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2217
- is_aligned(p3, sizeof(dtype))) {
1765
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1766
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2218
1767
 
2219
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2220
- #ifdef __SSE2__
2221
- // Check number of elements. & Check same alignment.
2222
- if ((n >= num_pack) &&
2223
- is_same_aligned3(
2224
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2225
- )) {
2226
- // Calculate up to the position just before the start of SIMD computation.
2227
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2228
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2229
- );
2230
- #endif
2231
- if (p1 == p3) { // inplace case
2232
- #ifdef __SSE2__
2233
- for (; i < cnt; i++) {
2234
- #else
2235
- for (; i < n; i++) {
2236
- check_intdivzero(((dtype*)p2)[i]);
2237
- #endif
2238
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2239
- }
2240
- } else {
2241
- #ifdef __SSE2__
2242
- for (; i < cnt; i++) {
2243
- #else
2244
- for (; i < n; i++) {
2245
- check_intdivzero(((dtype*)p2)[i]);
2246
- #endif
2247
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
1768
+ if (idx1) {
1769
+ if (idx2) {
1770
+ for (i = 0; i < n; i++) {
1771
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1772
+ x = m_reciprocal(x);
1773
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1774
+ }
1775
+ } else {
1776
+ for (i = 0; i < n; i++) {
1777
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1778
+ x = m_reciprocal(x);
1779
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1780
+ }
1781
+ }
1782
+ } else {
1783
+ if (idx2) {
1784
+ for (i = 0; i < n; i++) {
1785
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1786
+ x = m_reciprocal(x);
1787
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1788
+ }
1789
+ } else {
1790
+ //
1791
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1792
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
1793
+ for (i = 0; i < n; i++) {
1794
+ ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2248
1795
  }
1796
+ return;
2249
1797
  }
2250
-
2251
- #ifdef __SSE2__
2252
- // Get the count of SIMD computation loops.
2253
- cnt_simd_loop = (n - i) % num_pack;
2254
-
2255
- // SIMD computation.
2256
- if (p1 == p3) { // inplace case
2257
- for (; i < n - cnt_simd_loop; i += num_pack) {
2258
- a = _mm_load_ps(&((dtype*)p1)[i]);
2259
- b = _mm_load_ps(&((dtype*)p2)[i]);
2260
- a = _mm_mul_ps(a, b);
2261
- _mm_store_ps(&((dtype*)p1)[i], a);
2262
- }
2263
- } else {
2264
- for (; i < n - cnt_simd_loop; i += num_pack) {
2265
- a = _mm_load_ps(&((dtype*)p1)[i]);
2266
- b = _mm_load_ps(&((dtype*)p2)[i]);
2267
- a = _mm_mul_ps(a, b);
2268
- _mm_stream_ps(&((dtype*)p3)[i], a);
1798
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1799
+ //
1800
+ for (i = 0; i < n; i++) {
1801
+ *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
1802
+ p1 += s1;
1803
+ p2 += s2;
2269
1804
  }
1805
+ return;
1806
+ //
2270
1807
  }
2271
1808
  }
2272
-
2273
- // Compute the remainder of the SIMD operation.
2274
- if (cnt_simd_loop != 0) {
2275
- if (p1 == p3) { // inplace case
2276
- for (; i < n; i++) {
2277
- check_intdivzero(((dtype*)p2)[i]);
2278
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2279
- }
2280
- } else {
2281
- for (; i < n; i++) {
2282
- check_intdivzero(((dtype*)p2)[i]);
2283
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2284
- }
2285
- }
1809
+ for (i = 0; i < n; i++) {
1810
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1811
+ x = m_reciprocal(x);
1812
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2286
1813
  }
2287
- #endif
2288
- return;
2289
- }
2290
-
2291
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2292
- is_aligned_step(s3, sizeof(dtype))) {
2293
1814
  //
1815
+ }
1816
+ }
1817
+ }
2294
1818
 
2295
- if (s2 == 0) { // Broadcasting from scalar value.
2296
- check_intdivzero(*(dtype*)p2);
2297
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2298
- #ifdef __SSE2__
2299
- // Broadcast a scalar value and use it for SIMD computation.
2300
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2301
-
2302
- // Check number of elements. & Check same alignment.
2303
- if ((n >= num_pack) &&
2304
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2305
- // Calculate up to the position just before the start of SIMD computation.
2306
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2307
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2308
- );
2309
- #endif
2310
- if (p1 == p3) { // inplace case
2311
- #ifdef __SSE2__
2312
- for (; i < cnt; i++) {
2313
- #else
2314
- for (; i < n; i++) {
2315
- #endif
2316
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2317
- }
2318
- } else {
2319
- #ifdef __SSE2__
2320
- for (; i < cnt; i++) {
2321
- #else
2322
- for (; i < n; i++) {
2323
- #endif
2324
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2325
- }
2326
- }
2327
-
2328
- #ifdef __SSE2__
2329
- // Get the count of SIMD computation loops.
2330
- cnt_simd_loop = (n - i) % num_pack;
2331
-
2332
- // SIMD computation.
2333
- if (p1 == p3) { // inplace case
2334
- for (; i < n - cnt_simd_loop; i += num_pack) {
2335
- a = _mm_load_ps(&((dtype*)p1)[i]);
2336
- a = _mm_mul_ps(a, b);
2337
- _mm_store_ps(&((dtype*)p1)[i], a);
2338
- }
2339
- } else {
2340
- for (; i < n - cnt_simd_loop; i += num_pack) {
2341
- a = _mm_load_ps(&((dtype*)p1)[i]);
2342
- a = _mm_mul_ps(a, b);
2343
- _mm_stream_ps(&((dtype*)p3)[i], a);
2344
- }
2345
- }
2346
- }
2347
-
2348
- // Compute the remainder of the SIMD operation.
2349
- if (cnt_simd_loop != 0) {
2350
- if (p1 == p3) { // inplace case
2351
- for (; i < n; i++) {
2352
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2353
- }
2354
- } else {
2355
- for (; i < n; i++) {
2356
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2357
- }
2358
- }
2359
- }
2360
- #endif
2361
- } else {
2362
- for (i = 0; i < n; i++) {
2363
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2364
- p1 += s1;
2365
- p3 += s3;
2366
- }
2367
- }
2368
- } else {
2369
- if (p1 == p3) { // inplace case
2370
- for (i = 0; i < n; i++) {
2371
- check_intdivzero(*(dtype*)p2);
2372
- *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
2373
- p1 += s1;
2374
- p2 += s2;
2375
- }
2376
- } else {
2377
- for (i = 0; i < n; i++) {
2378
- check_intdivzero(*(dtype*)p2);
2379
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2380
- p1 += s1;
2381
- p2 += s2;
2382
- p3 += s3;
2383
- }
2384
- }
2385
- }
2386
-
2387
- return;
2388
- //
2389
- }
2390
- }
2391
- for (i = 0; i < n; i++) {
2392
- dtype x, y, z;
2393
- GET_DATA_STRIDE(p1, s1, dtype, x);
2394
- GET_DATA_STRIDE(p2, s2, dtype, y);
2395
- check_intdivzero(y);
2396
- z = m_mul(x, y);
2397
- SET_DATA_STRIDE(p3, s3, dtype, z);
2398
- }
2399
- //
2400
- }
2401
- #undef check_intdivzero
2402
-
2403
- static VALUE sfloat_mul_self(VALUE self, VALUE other) {
2404
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1819
+ static VALUE sfloat_reciprocal(VALUE self) {
1820
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2405
1821
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2406
- ndfunc_t ndf = { iter_sfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
2407
-
2408
- return na_ndloop(&ndf, 2, self, other);
2409
- }
2410
-
2411
- static VALUE sfloat_mul(VALUE self, VALUE other) {
2412
-
2413
- VALUE klass, v;
1822
+ ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
2414
1823
 
2415
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2416
- if (klass == cT) {
2417
- return sfloat_mul_self(self, other);
2418
- } else {
2419
- v = rb_funcall(klass, id_cast, 1, self);
2420
- return rb_funcall(v, '*', 1, other);
2421
- }
1824
+ return na_ndloop(&ndf, 1, self);
2422
1825
  }
2423
1826
 
2424
- #define check_intdivzero(y) \
2425
- {}
2426
-
2427
- static void iter_sfloat_div(na_loop_t* const lp) {
2428
- size_t i = 0;
2429
- size_t n;
2430
- char *p1, *p2, *p3;
2431
- ssize_t s1, s2, s3;
2432
-
2433
- #ifdef __SSE2__
2434
- size_t cnt;
2435
- size_t cnt_simd_loop = -1;
2436
-
2437
- __m128 a;
2438
- __m128 b;
1827
+ static void iter_sfloat_sign(na_loop_t* const lp) {
1828
+ size_t i, n;
1829
+ char *p1, *p2;
1830
+ ssize_t s1, s2;
1831
+ size_t *idx1, *idx2;
1832
+ dtype x;
2439
1833
 
2440
- size_t num_pack; // Number of elements packed for SIMD.
2441
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2442
- #endif
2443
1834
  INIT_COUNTER(lp, n);
2444
- INIT_PTR(lp, 0, p1, s1);
2445
- INIT_PTR(lp, 1, p2, s2);
2446
- INIT_PTR(lp, 2, p3, s3);
2447
-
2448
- //
2449
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2450
- is_aligned(p3, sizeof(dtype))) {
2451
-
2452
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2453
- #ifdef __SSE2__
2454
- // Check number of elements. & Check same alignment.
2455
- if ((n >= num_pack) &&
2456
- is_same_aligned3(
2457
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2458
- )) {
2459
- // Calculate up to the position just before the start of SIMD computation.
2460
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2461
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2462
- );
2463
- #endif
2464
- if (p1 == p3) { // inplace case
2465
- #ifdef __SSE2__
2466
- for (; i < cnt; i++) {
2467
- #else
2468
- for (; i < n; i++) {
2469
- check_intdivzero(((dtype*)p2)[i]);
2470
- #endif
2471
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2472
- }
2473
- } else {
2474
- #ifdef __SSE2__
2475
- for (; i < cnt; i++) {
2476
- #else
2477
- for (; i < n; i++) {
2478
- check_intdivzero(((dtype*)p2)[i]);
2479
- #endif
2480
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2481
- }
2482
- }
2483
-
2484
- #ifdef __SSE2__
2485
- // Get the count of SIMD computation loops.
2486
- cnt_simd_loop = (n - i) % num_pack;
1835
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1836
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2487
1837
 
2488
- // SIMD computation.
2489
- if (p1 == p3) { // inplace case
2490
- for (; i < n - cnt_simd_loop; i += num_pack) {
2491
- a = _mm_load_ps(&((dtype*)p1)[i]);
2492
- b = _mm_load_ps(&((dtype*)p2)[i]);
2493
- a = _mm_div_ps(a, b);
2494
- _mm_store_ps(&((dtype*)p1)[i], a);
2495
- }
2496
- } else {
2497
- for (; i < n - cnt_simd_loop; i += num_pack) {
2498
- a = _mm_load_ps(&((dtype*)p1)[i]);
2499
- b = _mm_load_ps(&((dtype*)p2)[i]);
2500
- a = _mm_div_ps(a, b);
2501
- _mm_stream_ps(&((dtype*)p3)[i], a);
2502
- }
2503
- }
1838
+ if (idx1) {
1839
+ if (idx2) {
1840
+ for (i = 0; i < n; i++) {
1841
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1842
+ x = m_sign(x);
1843
+ SET_DATA_INDEX(p2, idx2, dtype, x);
2504
1844
  }
2505
-
2506
- // Compute the remainder of the SIMD operation.
2507
- if (cnt_simd_loop != 0) {
2508
- if (p1 == p3) { // inplace case
2509
- for (; i < n; i++) {
2510
- check_intdivzero(((dtype*)p2)[i]);
2511
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2512
- }
2513
- } else {
2514
- for (; i < n; i++) {
2515
- check_intdivzero(((dtype*)p2)[i]);
2516
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2517
- }
2518
- }
1845
+ } else {
1846
+ for (i = 0; i < n; i++) {
1847
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1848
+ x = m_sign(x);
1849
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2519
1850
  }
2520
- #endif
2521
- return;
2522
1851
  }
2523
-
2524
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2525
- is_aligned_step(s3, sizeof(dtype))) {
1852
+ } else {
1853
+ if (idx2) {
1854
+ for (i = 0; i < n; i++) {
1855
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1856
+ x = m_sign(x);
1857
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1858
+ }
1859
+ } else {
2526
1860
  //
2527
-
2528
- if (s2 == 0) { // Broadcasting from scalar value.
2529
- check_intdivzero(*(dtype*)p2);
2530
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2531
- #ifdef __SSE2__
2532
- // Broadcast a scalar value and use it for SIMD computation.
2533
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2534
-
2535
- // Check number of elements. & Check same alignment.
2536
- if ((n >= num_pack) &&
2537
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2538
- // Calculate up to the position just before the start of SIMD computation.
2539
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2540
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2541
- );
2542
- #endif
2543
- if (p1 == p3) { // inplace case
2544
- #ifdef __SSE2__
2545
- for (; i < cnt; i++) {
2546
- #else
2547
- for (; i < n; i++) {
2548
- #endif
2549
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2550
- }
2551
- } else {
2552
- #ifdef __SSE2__
2553
- for (; i < cnt; i++) {
2554
- #else
2555
- for (; i < n; i++) {
2556
- #endif
2557
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2558
- }
2559
- }
2560
-
2561
- #ifdef __SSE2__
2562
- // Get the count of SIMD computation loops.
2563
- cnt_simd_loop = (n - i) % num_pack;
2564
-
2565
- // SIMD computation.
2566
- if (p1 == p3) { // inplace case
2567
- for (; i < n - cnt_simd_loop; i += num_pack) {
2568
- a = _mm_load_ps(&((dtype*)p1)[i]);
2569
- a = _mm_div_ps(a, b);
2570
- _mm_store_ps(&((dtype*)p1)[i], a);
2571
- }
2572
- } else {
2573
- for (; i < n - cnt_simd_loop; i += num_pack) {
2574
- a = _mm_load_ps(&((dtype*)p1)[i]);
2575
- a = _mm_div_ps(a, b);
2576
- _mm_stream_ps(&((dtype*)p3)[i], a);
2577
- }
2578
- }
2579
- }
2580
-
2581
- // Compute the remainder of the SIMD operation.
2582
- if (cnt_simd_loop != 0) {
2583
- if (p1 == p3) { // inplace case
2584
- for (; i < n; i++) {
2585
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2586
- }
2587
- } else {
2588
- for (; i < n; i++) {
2589
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2590
- }
2591
- }
2592
- }
2593
- #endif
2594
- } else {
1861
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1862
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2595
1863
  for (i = 0; i < n; i++) {
2596
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2597
- p1 += s1;
2598
- p3 += s3;
1864
+ ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
2599
1865
  }
1866
+ return;
2600
1867
  }
2601
- } else {
2602
- if (p1 == p3) { // inplace case
2603
- for (i = 0; i < n; i++) {
2604
- check_intdivzero(*(dtype*)p2);
2605
- *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
2606
- p1 += s1;
2607
- p2 += s2;
2608
- }
2609
- } else {
1868
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1869
+ //
2610
1870
  for (i = 0; i < n; i++) {
2611
- check_intdivzero(*(dtype*)p2);
2612
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
1871
+ *(dtype*)p2 = m_sign(*(dtype*)p1);
2613
1872
  p1 += s1;
2614
1873
  p2 += s2;
2615
- p3 += s3;
2616
1874
  }
2617
- }
2618
- }
2619
-
2620
- return;
2621
- //
2622
- }
2623
- }
2624
- for (i = 0; i < n; i++) {
2625
- dtype x, y, z;
2626
- GET_DATA_STRIDE(p1, s1, dtype, x);
2627
- GET_DATA_STRIDE(p2, s2, dtype, y);
2628
- check_intdivzero(y);
2629
- z = m_div(x, y);
2630
- SET_DATA_STRIDE(p3, s3, dtype, z);
2631
- }
2632
- //
2633
- }
2634
- #undef check_intdivzero
2635
-
2636
- static VALUE sfloat_div_self(VALUE self, VALUE other) {
2637
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2638
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2639
- ndfunc_t ndf = { iter_sfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
2640
-
2641
- return na_ndloop(&ndf, 2, self, other);
2642
- }
2643
-
2644
- static VALUE sfloat_div(VALUE self, VALUE other) {
2645
-
2646
- VALUE klass, v;
2647
-
2648
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2649
- if (klass == cT) {
2650
- return sfloat_div_self(self, other);
2651
- } else {
2652
- v = rb_funcall(klass, id_cast, 1, self);
2653
- return rb_funcall(v, '/', 1, other);
2654
- }
2655
- }
2656
-
2657
- #define check_intdivzero(y) \
2658
- {}
2659
-
2660
- static void iter_sfloat_mod(na_loop_t* const lp) {
2661
- size_t i = 0;
2662
- size_t n;
2663
- char *p1, *p2, *p3;
2664
- ssize_t s1, s2, s3;
2665
-
2666
- INIT_COUNTER(lp, n);
2667
- INIT_PTR(lp, 0, p1, s1);
2668
- INIT_PTR(lp, 1, p2, s2);
2669
- INIT_PTR(lp, 2, p3, s3);
2670
-
2671
- //
2672
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2673
- is_aligned(p3, sizeof(dtype))) {
2674
-
2675
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2676
- if (p1 == p3) { // inplace case
2677
- for (; i < n; i++) {
2678
- check_intdivzero(((dtype*)p2)[i]);
2679
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2680
- }
2681
- } else {
2682
- for (; i < n; i++) {
2683
- check_intdivzero(((dtype*)p2)[i]);
2684
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2685
- }
2686
- }
2687
- return;
2688
- }
2689
-
2690
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2691
- is_aligned_step(s3, sizeof(dtype))) {
2692
- //
2693
-
2694
- if (s2 == 0) { // Broadcasting from scalar value.
2695
- check_intdivzero(*(dtype*)p2);
2696
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2697
- if (p1 == p3) { // inplace case
2698
- for (; i < n; i++) {
2699
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2700
- }
2701
- } else {
2702
- for (; i < n; i++) {
2703
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2704
- }
2705
- }
2706
- } else {
2707
- for (i = 0; i < n; i++) {
2708
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2709
- p1 += s1;
2710
- p3 += s3;
2711
- }
2712
- }
2713
- } else {
2714
- if (p1 == p3) { // inplace case
2715
- for (i = 0; i < n; i++) {
2716
- check_intdivzero(*(dtype*)p2);
2717
- *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
2718
- p1 += s1;
2719
- p2 += s2;
2720
- }
2721
- } else {
2722
- for (i = 0; i < n; i++) {
2723
- check_intdivzero(*(dtype*)p2);
2724
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2725
- p1 += s1;
2726
- p2 += s2;
2727
- p3 += s3;
2728
- }
2729
- }
2730
- }
2731
-
2732
- return;
2733
- //
2734
- }
2735
- }
2736
- for (i = 0; i < n; i++) {
2737
- dtype x, y, z;
2738
- GET_DATA_STRIDE(p1, s1, dtype, x);
2739
- GET_DATA_STRIDE(p2, s2, dtype, y);
2740
- check_intdivzero(y);
2741
- z = m_mod(x, y);
2742
- SET_DATA_STRIDE(p3, s3, dtype, z);
2743
- }
2744
- //
2745
- }
2746
- #undef check_intdivzero
2747
-
2748
- static VALUE sfloat_mod_self(VALUE self, VALUE other) {
2749
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2750
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2751
- ndfunc_t ndf = { iter_sfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
2752
-
2753
- return na_ndloop(&ndf, 2, self, other);
2754
- }
2755
-
2756
- static VALUE sfloat_mod(VALUE self, VALUE other) {
2757
-
2758
- VALUE klass, v;
2759
-
2760
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2761
- if (klass == cT) {
2762
- return sfloat_mod_self(self, other);
2763
- } else {
2764
- v = rb_funcall(klass, id_cast, 1, self);
2765
- return rb_funcall(v, '%', 1, other);
2766
- }
2767
- }
2768
-
2769
- static void iter_sfloat_divmod(na_loop_t* const lp) {
2770
- size_t i, n;
2771
- char *p1, *p2, *p3, *p4;
2772
- ssize_t s1, s2, s3, s4;
2773
- dtype x, y, a, b;
2774
- INIT_COUNTER(lp, n);
2775
- INIT_PTR(lp, 0, p1, s1);
2776
- INIT_PTR(lp, 1, p2, s2);
2777
- INIT_PTR(lp, 2, p3, s3);
2778
- INIT_PTR(lp, 3, p4, s4);
2779
- for (i = n; i--;) {
2780
- GET_DATA_STRIDE(p1, s1, dtype, x);
2781
- GET_DATA_STRIDE(p2, s2, dtype, y);
2782
- m_divmod(x, y, a, b);
2783
- SET_DATA_STRIDE(p3, s3, dtype, a);
2784
- SET_DATA_STRIDE(p4, s4, dtype, b);
2785
- }
2786
- }
2787
-
2788
- static VALUE sfloat_divmod_self(VALUE self, VALUE other) {
2789
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2790
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2791
- ndfunc_t ndf = { iter_sfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
2792
-
2793
- return na_ndloop(&ndf, 2, self, other);
2794
- }
2795
-
2796
- static VALUE sfloat_divmod(VALUE self, VALUE other) {
2797
-
2798
- VALUE klass, v;
2799
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2800
- if (klass == cT) {
2801
- return sfloat_divmod_self(self, other);
2802
- } else {
2803
- v = rb_funcall(klass, id_cast, 1, self);
2804
- return rb_funcall(v, id_divmod, 1, other);
2805
- }
2806
- }
2807
-
2808
- static void iter_sfloat_pow(na_loop_t* const lp) {
2809
- size_t i;
2810
- char *p1, *p2, *p3;
2811
- ssize_t s1, s2, s3;
2812
- dtype x, y;
2813
- INIT_COUNTER(lp, i);
2814
- INIT_PTR(lp, 0, p1, s1);
2815
- INIT_PTR(lp, 1, p2, s2);
2816
- INIT_PTR(lp, 2, p3, s3);
2817
- for (; i--;) {
2818
- GET_DATA_STRIDE(p1, s1, dtype, x);
2819
- GET_DATA_STRIDE(p2, s2, dtype, y);
2820
- x = m_pow(x, y);
2821
- SET_DATA_STRIDE(p3, s3, dtype, x);
2822
- }
2823
- }
2824
-
2825
- static void iter_sfloat_pow_int32(na_loop_t* const lp) {
2826
- size_t i;
2827
- char *p1, *p2, *p3;
2828
- ssize_t s1, s2, s3;
2829
- dtype x;
2830
- int32_t y;
2831
- INIT_COUNTER(lp, i);
2832
- INIT_PTR(lp, 0, p1, s1);
2833
- INIT_PTR(lp, 1, p2, s2);
2834
- INIT_PTR(lp, 2, p3, s3);
2835
- for (; i--;) {
2836
- GET_DATA_STRIDE(p1, s1, dtype, x);
2837
- GET_DATA_STRIDE(p2, s2, int32_t, y);
2838
- x = m_pow_int(x, y);
2839
- SET_DATA_STRIDE(p3, s3, dtype, x);
2840
- }
2841
- }
2842
-
2843
- static VALUE sfloat_pow_self(VALUE self, VALUE other) {
2844
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2845
- ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
2846
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2847
- ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
2848
- ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
2849
-
2850
- // fixme : use na.integer?
2851
- if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
2852
- return na_ndloop(&ndf_i, 2, self, other);
2853
- } else {
2854
- return na_ndloop(&ndf, 2, self, other);
2855
- }
2856
- }
2857
-
2858
- static VALUE sfloat_pow(VALUE self, VALUE other) {
2859
-
2860
- VALUE klass, v;
2861
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2862
- if (klass == cT) {
2863
- return sfloat_pow_self(self, other);
2864
- } else {
2865
- v = rb_funcall(klass, id_cast, 1, self);
2866
- return rb_funcall(v, id_pow, 1, other);
2867
- }
2868
- }
2869
-
2870
- static void iter_sfloat_minus(na_loop_t* const lp) {
2871
- size_t i, n;
2872
- char *p1, *p2;
2873
- ssize_t s1, s2;
2874
- size_t *idx1, *idx2;
2875
- dtype x;
2876
-
2877
- INIT_COUNTER(lp, n);
2878
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2879
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2880
-
2881
- if (idx1) {
2882
- if (idx2) {
2883
- for (i = 0; i < n; i++) {
2884
- GET_DATA_INDEX(p1, idx1, dtype, x);
2885
- x = m_minus(x);
2886
- SET_DATA_INDEX(p2, idx2, dtype, x);
2887
- }
2888
- } else {
2889
- for (i = 0; i < n; i++) {
2890
- GET_DATA_INDEX(p1, idx1, dtype, x);
2891
- x = m_minus(x);
2892
- SET_DATA_STRIDE(p2, s2, dtype, x);
2893
- }
2894
- }
2895
- } else {
2896
- if (idx2) {
2897
- for (i = 0; i < n; i++) {
2898
- GET_DATA_STRIDE(p1, s1, dtype, x);
2899
- x = m_minus(x);
2900
- SET_DATA_INDEX(p2, idx2, dtype, x);
2901
- }
2902
- } else {
2903
- //
2904
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2905
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2906
- for (i = 0; i < n; i++) {
2907
- ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2908
- }
2909
- return;
2910
- }
2911
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2912
- //
2913
- for (i = 0; i < n; i++) {
2914
- *(dtype*)p2 = m_minus(*(dtype*)p1);
2915
- p1 += s1;
2916
- p2 += s2;
2917
- }
2918
- return;
2919
- //
2920
- }
2921
- }
2922
- for (i = 0; i < n; i++) {
2923
- GET_DATA_STRIDE(p1, s1, dtype, x);
2924
- x = m_minus(x);
2925
- SET_DATA_STRIDE(p2, s2, dtype, x);
2926
- }
2927
- //
2928
- }
2929
- }
2930
- }
2931
-
2932
- static VALUE sfloat_minus(VALUE self) {
2933
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2934
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2935
- ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2936
-
2937
- return na_ndloop(&ndf, 1, self);
2938
- }
2939
-
2940
- static void iter_sfloat_reciprocal(na_loop_t* const lp) {
2941
- size_t i, n;
2942
- char *p1, *p2;
2943
- ssize_t s1, s2;
2944
- size_t *idx1, *idx2;
2945
- dtype x;
2946
-
2947
- INIT_COUNTER(lp, n);
2948
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2949
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2950
-
2951
- if (idx1) {
2952
- if (idx2) {
2953
- for (i = 0; i < n; i++) {
2954
- GET_DATA_INDEX(p1, idx1, dtype, x);
2955
- x = m_reciprocal(x);
2956
- SET_DATA_INDEX(p2, idx2, dtype, x);
2957
- }
2958
- } else {
2959
- for (i = 0; i < n; i++) {
2960
- GET_DATA_INDEX(p1, idx1, dtype, x);
2961
- x = m_reciprocal(x);
2962
- SET_DATA_STRIDE(p2, s2, dtype, x);
2963
- }
2964
- }
2965
- } else {
2966
- if (idx2) {
2967
- for (i = 0; i < n; i++) {
2968
- GET_DATA_STRIDE(p1, s1, dtype, x);
2969
- x = m_reciprocal(x);
2970
- SET_DATA_INDEX(p2, idx2, dtype, x);
2971
- }
2972
- } else {
2973
- //
2974
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2975
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2976
- for (i = 0; i < n; i++) {
2977
- ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2978
- }
2979
- return;
2980
- }
2981
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2982
- //
2983
- for (i = 0; i < n; i++) {
2984
- *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
2985
- p1 += s1;
2986
- p2 += s2;
2987
- }
2988
- return;
2989
- //
2990
- }
2991
- }
2992
- for (i = 0; i < n; i++) {
2993
- GET_DATA_STRIDE(p1, s1, dtype, x);
2994
- x = m_reciprocal(x);
2995
- SET_DATA_STRIDE(p2, s2, dtype, x);
2996
- }
2997
- //
2998
- }
2999
- }
3000
- }
3001
-
3002
- static VALUE sfloat_reciprocal(VALUE self) {
3003
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3004
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3005
- ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
3006
-
3007
- return na_ndloop(&ndf, 1, self);
3008
- }
3009
-
3010
- static void iter_sfloat_sign(na_loop_t* const lp) {
3011
- size_t i, n;
3012
- char *p1, *p2;
3013
- ssize_t s1, s2;
3014
- size_t *idx1, *idx2;
3015
- dtype x;
3016
-
3017
- INIT_COUNTER(lp, n);
3018
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3019
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3020
-
3021
- if (idx1) {
3022
- if (idx2) {
3023
- for (i = 0; i < n; i++) {
3024
- GET_DATA_INDEX(p1, idx1, dtype, x);
3025
- x = m_sign(x);
3026
- SET_DATA_INDEX(p2, idx2, dtype, x);
3027
- }
3028
- } else {
3029
- for (i = 0; i < n; i++) {
3030
- GET_DATA_INDEX(p1, idx1, dtype, x);
3031
- x = m_sign(x);
3032
- SET_DATA_STRIDE(p2, s2, dtype, x);
3033
- }
3034
- }
3035
- } else {
3036
- if (idx2) {
3037
- for (i = 0; i < n; i++) {
3038
- GET_DATA_STRIDE(p1, s1, dtype, x);
3039
- x = m_sign(x);
3040
- SET_DATA_INDEX(p2, idx2, dtype, x);
3041
- }
3042
- } else {
3043
- //
3044
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3045
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3046
- for (i = 0; i < n; i++) {
3047
- ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
3048
- }
3049
- return;
3050
- }
3051
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3052
- //
3053
- for (i = 0; i < n; i++) {
3054
- *(dtype*)p2 = m_sign(*(dtype*)p1);
3055
- p1 += s1;
3056
- p2 += s2;
3057
- }
3058
- return;
3059
- //
3060
- }
3061
- }
3062
- for (i = 0; i < n; i++) {
3063
- GET_DATA_STRIDE(p1, s1, dtype, x);
3064
- x = m_sign(x);
3065
- SET_DATA_STRIDE(p2, s2, dtype, x);
3066
- }
3067
- //
3068
- }
3069
- }
3070
- }
3071
-
3072
- static VALUE sfloat_sign(VALUE self) {
3073
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3074
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3075
- ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3076
-
3077
- return na_ndloop(&ndf, 1, self);
3078
- }
3079
-
3080
- static void iter_sfloat_square(na_loop_t* const lp) {
3081
- size_t i, n;
3082
- char *p1, *p2;
3083
- ssize_t s1, s2;
3084
- size_t *idx1, *idx2;
3085
- dtype x;
3086
-
3087
- INIT_COUNTER(lp, n);
3088
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3089
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3090
-
3091
- if (idx1) {
3092
- if (idx2) {
3093
- for (i = 0; i < n; i++) {
3094
- GET_DATA_INDEX(p1, idx1, dtype, x);
3095
- x = m_square(x);
3096
- SET_DATA_INDEX(p2, idx2, dtype, x);
3097
- }
3098
- } else {
3099
- for (i = 0; i < n; i++) {
3100
- GET_DATA_INDEX(p1, idx1, dtype, x);
3101
- x = m_square(x);
3102
- SET_DATA_STRIDE(p2, s2, dtype, x);
3103
- }
3104
- }
3105
- } else {
3106
- if (idx2) {
3107
- for (i = 0; i < n; i++) {
3108
- GET_DATA_STRIDE(p1, s1, dtype, x);
3109
- x = m_square(x);
3110
- SET_DATA_INDEX(p2, idx2, dtype, x);
3111
- }
3112
- } else {
3113
- //
3114
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3115
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3116
- for (i = 0; i < n; i++) {
3117
- ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
3118
- }
3119
- return;
3120
- }
3121
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3122
- //
3123
- for (i = 0; i < n; i++) {
3124
- *(dtype*)p2 = m_square(*(dtype*)p1);
3125
- p1 += s1;
3126
- p2 += s2;
3127
- }
3128
- return;
3129
- //
3130
- }
3131
- }
3132
- for (i = 0; i < n; i++) {
3133
- GET_DATA_STRIDE(p1, s1, dtype, x);
3134
- x = m_square(x);
3135
- SET_DATA_STRIDE(p2, s2, dtype, x);
3136
- }
3137
- //
3138
- }
3139
- }
3140
- }
3141
-
3142
- static VALUE sfloat_square(VALUE self) {
3143
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3144
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3145
- ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
3146
-
3147
- return na_ndloop(&ndf, 1, self);
3148
- }
3149
-
3150
- static void iter_sfloat_eq(na_loop_t* const lp) {
3151
- size_t i;
3152
- char *p1, *p2;
3153
- BIT_DIGIT* a3;
3154
- size_t p3;
3155
- ssize_t s1, s2, s3;
3156
- dtype x, y;
3157
- BIT_DIGIT b;
3158
- INIT_COUNTER(lp, i);
3159
- INIT_PTR(lp, 0, p1, s1);
3160
- INIT_PTR(lp, 1, p2, s2);
3161
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3162
- for (; i--;) {
3163
- GET_DATA_STRIDE(p1, s1, dtype, x);
3164
- GET_DATA_STRIDE(p2, s2, dtype, y);
3165
- b = (m_eq(x, y)) ? 1 : 0;
3166
- STORE_BIT(a3, p3, b);
3167
- p3 += s3;
3168
- }
3169
- }
3170
-
3171
- static VALUE sfloat_eq_self(VALUE self, VALUE other) {
3172
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3173
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3174
- ndfunc_t ndf = { iter_sfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
3175
-
3176
- return na_ndloop(&ndf, 2, self, other);
3177
- }
3178
-
3179
- static VALUE sfloat_eq(VALUE self, VALUE other) {
3180
-
3181
- VALUE klass, v;
3182
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3183
- if (klass == cT) {
3184
- return sfloat_eq_self(self, other);
3185
- } else {
3186
- v = rb_funcall(klass, id_cast, 1, self);
3187
- return rb_funcall(v, id_eq, 1, other);
3188
- }
3189
- }
3190
-
3191
- static void iter_sfloat_ne(na_loop_t* const lp) {
3192
- size_t i;
3193
- char *p1, *p2;
3194
- BIT_DIGIT* a3;
3195
- size_t p3;
3196
- ssize_t s1, s2, s3;
3197
- dtype x, y;
3198
- BIT_DIGIT b;
3199
- INIT_COUNTER(lp, i);
3200
- INIT_PTR(lp, 0, p1, s1);
3201
- INIT_PTR(lp, 1, p2, s2);
3202
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3203
- for (; i--;) {
3204
- GET_DATA_STRIDE(p1, s1, dtype, x);
3205
- GET_DATA_STRIDE(p2, s2, dtype, y);
3206
- b = (m_ne(x, y)) ? 1 : 0;
3207
- STORE_BIT(a3, p3, b);
3208
- p3 += s3;
3209
- }
3210
- }
3211
-
3212
- static VALUE sfloat_ne_self(VALUE self, VALUE other) {
3213
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3214
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3215
- ndfunc_t ndf = { iter_sfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
3216
-
3217
- return na_ndloop(&ndf, 2, self, other);
3218
- }
3219
-
3220
- static VALUE sfloat_ne(VALUE self, VALUE other) {
3221
-
3222
- VALUE klass, v;
3223
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3224
- if (klass == cT) {
3225
- return sfloat_ne_self(self, other);
3226
- } else {
3227
- v = rb_funcall(klass, id_cast, 1, self);
3228
- return rb_funcall(v, id_ne, 1, other);
3229
- }
3230
- }
3231
-
3232
- static void iter_sfloat_nearly_eq(na_loop_t* const lp) {
3233
- size_t i;
3234
- char *p1, *p2;
3235
- BIT_DIGIT* a3;
3236
- size_t p3;
3237
- ssize_t s1, s2, s3;
3238
- dtype x, y;
3239
- BIT_DIGIT b;
3240
- INIT_COUNTER(lp, i);
3241
- INIT_PTR(lp, 0, p1, s1);
3242
- INIT_PTR(lp, 1, p2, s2);
3243
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3244
- for (; i--;) {
3245
- GET_DATA_STRIDE(p1, s1, dtype, x);
3246
- GET_DATA_STRIDE(p2, s2, dtype, y);
3247
- b = (m_nearly_eq(x, y)) ? 1 : 0;
3248
- STORE_BIT(a3, p3, b);
3249
- p3 += s3;
3250
- }
3251
- }
3252
-
3253
- static VALUE sfloat_nearly_eq_self(VALUE self, VALUE other) {
3254
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3255
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3256
- ndfunc_t ndf = { iter_sfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
3257
-
3258
- return na_ndloop(&ndf, 2, self, other);
3259
- }
3260
-
3261
- static VALUE sfloat_nearly_eq(VALUE self, VALUE other) {
3262
-
3263
- VALUE klass, v;
3264
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3265
- if (klass == cT) {
3266
- return sfloat_nearly_eq_self(self, other);
3267
- } else {
3268
- v = rb_funcall(klass, id_cast, 1, self);
3269
- return rb_funcall(v, id_nearly_eq, 1, other);
3270
- }
3271
- }
3272
-
3273
- static void iter_sfloat_floor(na_loop_t* const lp) {
3274
- size_t i, n;
3275
- char *p1, *p2;
3276
- ssize_t s1, s2;
3277
- size_t *idx1, *idx2;
3278
- dtype x;
3279
-
3280
- INIT_COUNTER(lp, n);
3281
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3282
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3283
-
3284
- if (idx1) {
3285
- if (idx2) {
3286
- for (i = 0; i < n; i++) {
3287
- GET_DATA_INDEX(p1, idx1, dtype, x);
3288
- x = m_floor(x);
3289
- SET_DATA_INDEX(p2, idx2, dtype, x);
3290
- }
3291
- } else {
3292
- for (i = 0; i < n; i++) {
3293
- GET_DATA_INDEX(p1, idx1, dtype, x);
3294
- x = m_floor(x);
3295
- SET_DATA_STRIDE(p2, s2, dtype, x);
3296
- }
3297
- }
3298
- } else {
3299
- if (idx2) {
3300
- for (i = 0; i < n; i++) {
3301
- GET_DATA_STRIDE(p1, s1, dtype, x);
3302
- x = m_floor(x);
3303
- SET_DATA_INDEX(p2, idx2, dtype, x);
3304
- }
3305
- } else {
3306
- //
3307
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3308
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3309
- for (i = 0; i < n; i++) {
3310
- ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
3311
- }
3312
- return;
3313
- }
3314
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3315
- //
3316
- for (i = 0; i < n; i++) {
3317
- *(dtype*)p2 = m_floor(*(dtype*)p1);
3318
- p1 += s1;
3319
- p2 += s2;
3320
- }
3321
- return;
3322
- //
3323
- }
3324
- }
3325
- for (i = 0; i < n; i++) {
3326
- GET_DATA_STRIDE(p1, s1, dtype, x);
3327
- x = m_floor(x);
3328
- SET_DATA_STRIDE(p2, s2, dtype, x);
3329
- }
3330
- //
3331
- }
3332
- }
3333
- }
3334
-
3335
- static VALUE sfloat_floor(VALUE self) {
3336
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3337
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3338
- ndfunc_t ndf = { iter_sfloat_floor, FULL_LOOP, 1, 1, ain, aout };
3339
-
3340
- return na_ndloop(&ndf, 1, self);
3341
- }
3342
-
3343
- static void iter_sfloat_round(na_loop_t* const lp) {
3344
- size_t i, n;
3345
- char *p1, *p2;
3346
- ssize_t s1, s2;
3347
- size_t *idx1, *idx2;
3348
- dtype x;
3349
-
3350
- INIT_COUNTER(lp, n);
3351
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3352
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3353
-
3354
- if (idx1) {
3355
- if (idx2) {
3356
- for (i = 0; i < n; i++) {
3357
- GET_DATA_INDEX(p1, idx1, dtype, x);
3358
- x = m_round(x);
3359
- SET_DATA_INDEX(p2, idx2, dtype, x);
3360
- }
3361
- } else {
3362
- for (i = 0; i < n; i++) {
3363
- GET_DATA_INDEX(p1, idx1, dtype, x);
3364
- x = m_round(x);
3365
- SET_DATA_STRIDE(p2, s2, dtype, x);
3366
- }
3367
- }
3368
- } else {
3369
- if (idx2) {
3370
- for (i = 0; i < n; i++) {
3371
- GET_DATA_STRIDE(p1, s1, dtype, x);
3372
- x = m_round(x);
3373
- SET_DATA_INDEX(p2, idx2, dtype, x);
3374
- }
3375
- } else {
3376
- //
3377
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3378
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3379
- for (i = 0; i < n; i++) {
3380
- ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
3381
- }
3382
- return;
3383
- }
3384
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3385
- //
3386
- for (i = 0; i < n; i++) {
3387
- *(dtype*)p2 = m_round(*(dtype*)p1);
3388
- p1 += s1;
3389
- p2 += s2;
3390
- }
3391
- return;
3392
- //
3393
- }
3394
- }
3395
- for (i = 0; i < n; i++) {
3396
- GET_DATA_STRIDE(p1, s1, dtype, x);
3397
- x = m_round(x);
3398
- SET_DATA_STRIDE(p2, s2, dtype, x);
3399
- }
3400
- //
3401
- }
3402
- }
3403
- }
3404
-
3405
- static VALUE sfloat_round(VALUE self) {
3406
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3407
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3408
- ndfunc_t ndf = { iter_sfloat_round, FULL_LOOP, 1, 1, ain, aout };
3409
-
3410
- return na_ndloop(&ndf, 1, self);
3411
- }
3412
-
3413
- static void iter_sfloat_ceil(na_loop_t* const lp) {
3414
- size_t i, n;
3415
- char *p1, *p2;
3416
- ssize_t s1, s2;
3417
- size_t *idx1, *idx2;
3418
- dtype x;
3419
-
3420
- INIT_COUNTER(lp, n);
3421
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3422
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3423
-
3424
- if (idx1) {
3425
- if (idx2) {
3426
- for (i = 0; i < n; i++) {
3427
- GET_DATA_INDEX(p1, idx1, dtype, x);
3428
- x = m_ceil(x);
3429
- SET_DATA_INDEX(p2, idx2, dtype, x);
3430
- }
3431
- } else {
3432
- for (i = 0; i < n; i++) {
3433
- GET_DATA_INDEX(p1, idx1, dtype, x);
3434
- x = m_ceil(x);
3435
- SET_DATA_STRIDE(p2, s2, dtype, x);
3436
- }
3437
- }
3438
- } else {
3439
- if (idx2) {
3440
- for (i = 0; i < n; i++) {
3441
- GET_DATA_STRIDE(p1, s1, dtype, x);
3442
- x = m_ceil(x);
3443
- SET_DATA_INDEX(p2, idx2, dtype, x);
3444
- }
3445
- } else {
3446
- //
3447
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3448
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3449
- for (i = 0; i < n; i++) {
3450
- ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
3451
- }
3452
- return;
3453
- }
3454
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3455
- //
3456
- for (i = 0; i < n; i++) {
3457
- *(dtype*)p2 = m_ceil(*(dtype*)p1);
3458
- p1 += s1;
3459
- p2 += s2;
3460
- }
3461
- return;
3462
- //
3463
- }
3464
- }
3465
- for (i = 0; i < n; i++) {
3466
- GET_DATA_STRIDE(p1, s1, dtype, x);
3467
- x = m_ceil(x);
3468
- SET_DATA_STRIDE(p2, s2, dtype, x);
3469
- }
3470
- //
3471
- }
3472
- }
3473
- }
3474
-
3475
- static VALUE sfloat_ceil(VALUE self) {
3476
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3477
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3478
- ndfunc_t ndf = { iter_sfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
3479
-
3480
- return na_ndloop(&ndf, 1, self);
3481
- }
3482
-
3483
- static void iter_sfloat_trunc(na_loop_t* const lp) {
3484
- size_t i, n;
3485
- char *p1, *p2;
3486
- ssize_t s1, s2;
3487
- size_t *idx1, *idx2;
3488
- dtype x;
3489
-
3490
- INIT_COUNTER(lp, n);
3491
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3492
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3493
-
3494
- if (idx1) {
3495
- if (idx2) {
3496
- for (i = 0; i < n; i++) {
3497
- GET_DATA_INDEX(p1, idx1, dtype, x);
3498
- x = m_trunc(x);
3499
- SET_DATA_INDEX(p2, idx2, dtype, x);
3500
- }
3501
- } else {
3502
- for (i = 0; i < n; i++) {
3503
- GET_DATA_INDEX(p1, idx1, dtype, x);
3504
- x = m_trunc(x);
3505
- SET_DATA_STRIDE(p2, s2, dtype, x);
3506
- }
3507
- }
3508
- } else {
3509
- if (idx2) {
3510
- for (i = 0; i < n; i++) {
3511
- GET_DATA_STRIDE(p1, s1, dtype, x);
3512
- x = m_trunc(x);
3513
- SET_DATA_INDEX(p2, idx2, dtype, x);
3514
- }
3515
- } else {
3516
- //
3517
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3518
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3519
- for (i = 0; i < n; i++) {
3520
- ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
3521
- }
3522
- return;
3523
- }
3524
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3525
- //
3526
- for (i = 0; i < n; i++) {
3527
- *(dtype*)p2 = m_trunc(*(dtype*)p1);
3528
- p1 += s1;
3529
- p2 += s2;
3530
- }
3531
- return;
3532
- //
3533
- }
3534
- }
3535
- for (i = 0; i < n; i++) {
3536
- GET_DATA_STRIDE(p1, s1, dtype, x);
3537
- x = m_trunc(x);
3538
- SET_DATA_STRIDE(p2, s2, dtype, x);
3539
- }
3540
- //
3541
- }
3542
- }
3543
- }
3544
-
3545
- static VALUE sfloat_trunc(VALUE self) {
3546
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3547
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3548
- ndfunc_t ndf = { iter_sfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
3549
-
3550
- return na_ndloop(&ndf, 1, self);
3551
- }
3552
-
3553
- static void iter_sfloat_rint(na_loop_t* const lp) {
3554
- size_t i, n;
3555
- char *p1, *p2;
3556
- ssize_t s1, s2;
3557
- size_t *idx1, *idx2;
3558
- dtype x;
3559
-
3560
- INIT_COUNTER(lp, n);
3561
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3562
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3563
-
3564
- if (idx1) {
3565
- if (idx2) {
3566
- for (i = 0; i < n; i++) {
3567
- GET_DATA_INDEX(p1, idx1, dtype, x);
3568
- x = m_rint(x);
3569
- SET_DATA_INDEX(p2, idx2, dtype, x);
3570
- }
3571
- } else {
3572
- for (i = 0; i < n; i++) {
3573
- GET_DATA_INDEX(p1, idx1, dtype, x);
3574
- x = m_rint(x);
3575
- SET_DATA_STRIDE(p2, s2, dtype, x);
3576
- }
3577
- }
3578
- } else {
3579
- if (idx2) {
3580
- for (i = 0; i < n; i++) {
3581
- GET_DATA_STRIDE(p1, s1, dtype, x);
3582
- x = m_rint(x);
3583
- SET_DATA_INDEX(p2, idx2, dtype, x);
3584
- }
3585
- } else {
3586
- //
3587
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3588
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3589
- for (i = 0; i < n; i++) {
3590
- ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
3591
- }
3592
- return;
3593
- }
3594
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3595
- //
3596
- for (i = 0; i < n; i++) {
3597
- *(dtype*)p2 = m_rint(*(dtype*)p1);
3598
- p1 += s1;
3599
- p2 += s2;
3600
- }
3601
- return;
3602
- //
3603
- }
3604
- }
3605
- for (i = 0; i < n; i++) {
3606
- GET_DATA_STRIDE(p1, s1, dtype, x);
3607
- x = m_rint(x);
3608
- SET_DATA_STRIDE(p2, s2, dtype, x);
3609
- }
3610
- //
3611
- }
3612
- }
3613
- }
3614
-
3615
- static VALUE sfloat_rint(VALUE self) {
3616
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3617
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3618
- ndfunc_t ndf = { iter_sfloat_rint, FULL_LOOP, 1, 1, ain, aout };
3619
-
3620
- return na_ndloop(&ndf, 1, self);
3621
- }
3622
-
3623
- #define check_intdivzero(y) \
3624
- {}
3625
-
3626
- static void iter_sfloat_copysign(na_loop_t* const lp) {
3627
- size_t i = 0;
3628
- size_t n;
3629
- char *p1, *p2, *p3;
3630
- ssize_t s1, s2, s3;
3631
-
3632
- INIT_COUNTER(lp, n);
3633
- INIT_PTR(lp, 0, p1, s1);
3634
- INIT_PTR(lp, 1, p2, s2);
3635
- INIT_PTR(lp, 2, p3, s3);
3636
-
3637
- //
3638
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
3639
- is_aligned(p3, sizeof(dtype))) {
3640
-
3641
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
3642
- if (p1 == p3) { // inplace case
3643
- for (; i < n; i++) {
3644
- check_intdivzero(((dtype*)p2)[i]);
3645
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3646
- }
3647
- } else {
3648
- for (; i < n; i++) {
3649
- check_intdivzero(((dtype*)p2)[i]);
3650
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3651
- }
3652
- }
3653
- return;
3654
- }
3655
-
3656
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
3657
- is_aligned_step(s3, sizeof(dtype))) {
3658
- //
3659
-
3660
- if (s2 == 0) { // Broadcasting from scalar value.
3661
- check_intdivzero(*(dtype*)p2);
3662
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
3663
- if (p1 == p3) { // inplace case
3664
- for (; i < n; i++) {
3665
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3666
- }
3667
- } else {
3668
- for (; i < n; i++) {
3669
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3670
- }
3671
- }
3672
- } else {
3673
- for (i = 0; i < n; i++) {
3674
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3675
- p1 += s1;
3676
- p3 += s3;
3677
- }
3678
- }
3679
- } else {
3680
- if (p1 == p3) { // inplace case
3681
- for (i = 0; i < n; i++) {
3682
- check_intdivzero(*(dtype*)p2);
3683
- *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3684
- p1 += s1;
3685
- p2 += s2;
3686
- }
3687
- } else {
3688
- for (i = 0; i < n; i++) {
3689
- check_intdivzero(*(dtype*)p2);
3690
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3691
- p1 += s1;
3692
- p2 += s2;
3693
- p3 += s3;
3694
- }
3695
- }
3696
- }
3697
-
3698
- return;
3699
- //
3700
- }
3701
- }
3702
- for (i = 0; i < n; i++) {
3703
- dtype x, y, z;
3704
- GET_DATA_STRIDE(p1, s1, dtype, x);
3705
- GET_DATA_STRIDE(p2, s2, dtype, y);
3706
- check_intdivzero(y);
3707
- z = m_copysign(x, y);
3708
- SET_DATA_STRIDE(p3, s3, dtype, z);
3709
- }
3710
- //
3711
- }
3712
- #undef check_intdivzero
3713
-
3714
- static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
3715
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3716
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3717
- ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
3718
-
3719
- return na_ndloop(&ndf, 2, self, other);
3720
- }
3721
-
3722
- static VALUE sfloat_copysign(VALUE self, VALUE other) {
3723
-
3724
- VALUE klass, v;
3725
-
3726
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3727
- if (klass == cT) {
3728
- return sfloat_copysign_self(self, other);
3729
- } else {
3730
- v = rb_funcall(klass, id_cast, 1, self);
3731
- return rb_funcall(v, id_copysign, 1, other);
3732
- }
3733
- }
3734
-
3735
- static void iter_sfloat_signbit(na_loop_t* const lp) {
3736
- size_t i;
3737
- char* p1;
3738
- BIT_DIGIT* a2;
3739
- size_t p2;
3740
- ssize_t s1, s2;
3741
- size_t* idx1;
3742
- dtype x;
3743
- BIT_DIGIT b;
3744
- INIT_COUNTER(lp, i);
3745
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3746
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3747
- if (idx1) {
3748
- for (; i--;) {
3749
- GET_DATA_INDEX(p1, idx1, dtype, x);
3750
- b = (m_signbit(x)) ? 1 : 0;
3751
- STORE_BIT(a2, p2, b);
3752
- p2 += s2;
3753
- }
3754
- } else {
3755
- for (; i--;) {
3756
- GET_DATA_STRIDE(p1, s1, dtype, x);
3757
- b = (m_signbit(x)) ? 1 : 0;
3758
- STORE_BIT(a2, p2, b);
3759
- p2 += s2;
3760
- }
3761
- }
3762
- }
3763
-
3764
- static VALUE sfloat_signbit(VALUE self) {
3765
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3766
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3767
- ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
3768
-
3769
- return na_ndloop(&ndf, 1, self);
3770
- }
3771
-
3772
- static void iter_sfloat_modf(na_loop_t* const lp) {
3773
- size_t i;
3774
- char *p1, *p2, *p3;
3775
- ssize_t s1, s2, s3;
3776
- dtype x, y, z;
3777
- INIT_COUNTER(lp, i);
3778
- INIT_PTR(lp, 0, p1, s1);
3779
- INIT_PTR(lp, 1, p2, s2);
3780
- INIT_PTR(lp, 2, p3, s3);
3781
- for (; i--;) {
3782
- GET_DATA_STRIDE(p1, s1, dtype, x);
3783
- m_modf(x, y, z);
3784
- SET_DATA_STRIDE(p2, s2, dtype, y);
3785
- SET_DATA_STRIDE(p3, s3, dtype, z);
3786
- }
3787
- }
3788
-
3789
- static VALUE sfloat_modf(VALUE self) {
3790
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3791
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
3792
- ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
3793
-
3794
- return na_ndloop(&ndf, 1, self);
3795
- }
3796
-
3797
- static void iter_sfloat_gt(na_loop_t* const lp) {
3798
- size_t i;
3799
- char *p1, *p2;
3800
- BIT_DIGIT* a3;
3801
- size_t p3;
3802
- ssize_t s1, s2, s3;
3803
- dtype x, y;
3804
- BIT_DIGIT b;
3805
- INIT_COUNTER(lp, i);
3806
- INIT_PTR(lp, 0, p1, s1);
3807
- INIT_PTR(lp, 1, p2, s2);
3808
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3809
- for (; i--;) {
3810
- GET_DATA_STRIDE(p1, s1, dtype, x);
3811
- GET_DATA_STRIDE(p2, s2, dtype, y);
3812
- b = (m_gt(x, y)) ? 1 : 0;
3813
- STORE_BIT(a3, p3, b);
3814
- p3 += s3;
3815
- }
3816
- }
3817
-
3818
- static VALUE sfloat_gt_self(VALUE self, VALUE other) {
3819
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3820
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3821
- ndfunc_t ndf = { iter_sfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
3822
-
3823
- return na_ndloop(&ndf, 2, self, other);
3824
- }
3825
-
3826
- static VALUE sfloat_gt(VALUE self, VALUE other) {
3827
-
3828
- VALUE klass, v;
3829
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3830
- if (klass == cT) {
3831
- return sfloat_gt_self(self, other);
3832
- } else {
3833
- v = rb_funcall(klass, id_cast, 1, self);
3834
- return rb_funcall(v, id_gt, 1, other);
3835
- }
3836
- }
3837
-
3838
- static void iter_sfloat_ge(na_loop_t* const lp) {
3839
- size_t i;
3840
- char *p1, *p2;
3841
- BIT_DIGIT* a3;
3842
- size_t p3;
3843
- ssize_t s1, s2, s3;
3844
- dtype x, y;
3845
- BIT_DIGIT b;
3846
- INIT_COUNTER(lp, i);
3847
- INIT_PTR(lp, 0, p1, s1);
3848
- INIT_PTR(lp, 1, p2, s2);
3849
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3850
- for (; i--;) {
3851
- GET_DATA_STRIDE(p1, s1, dtype, x);
3852
- GET_DATA_STRIDE(p2, s2, dtype, y);
3853
- b = (m_ge(x, y)) ? 1 : 0;
3854
- STORE_BIT(a3, p3, b);
3855
- p3 += s3;
3856
- }
3857
- }
3858
-
3859
- static VALUE sfloat_ge_self(VALUE self, VALUE other) {
3860
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3861
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3862
- ndfunc_t ndf = { iter_sfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
3863
-
3864
- return na_ndloop(&ndf, 2, self, other);
3865
- }
3866
-
3867
- static VALUE sfloat_ge(VALUE self, VALUE other) {
3868
-
3869
- VALUE klass, v;
3870
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3871
- if (klass == cT) {
3872
- return sfloat_ge_self(self, other);
3873
- } else {
3874
- v = rb_funcall(klass, id_cast, 1, self);
3875
- return rb_funcall(v, id_ge, 1, other);
3876
- }
3877
- }
3878
-
3879
- static void iter_sfloat_lt(na_loop_t* const lp) {
3880
- size_t i;
3881
- char *p1, *p2;
3882
- BIT_DIGIT* a3;
3883
- size_t p3;
3884
- ssize_t s1, s2, s3;
3885
- dtype x, y;
3886
- BIT_DIGIT b;
3887
- INIT_COUNTER(lp, i);
3888
- INIT_PTR(lp, 0, p1, s1);
3889
- INIT_PTR(lp, 1, p2, s2);
3890
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3891
- for (; i--;) {
3892
- GET_DATA_STRIDE(p1, s1, dtype, x);
3893
- GET_DATA_STRIDE(p2, s2, dtype, y);
3894
- b = (m_lt(x, y)) ? 1 : 0;
3895
- STORE_BIT(a3, p3, b);
3896
- p3 += s3;
3897
- }
3898
- }
3899
-
3900
- static VALUE sfloat_lt_self(VALUE self, VALUE other) {
3901
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3902
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3903
- ndfunc_t ndf = { iter_sfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
3904
-
3905
- return na_ndloop(&ndf, 2, self, other);
3906
- }
3907
-
3908
- static VALUE sfloat_lt(VALUE self, VALUE other) {
3909
-
3910
- VALUE klass, v;
3911
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3912
- if (klass == cT) {
3913
- return sfloat_lt_self(self, other);
3914
- } else {
3915
- v = rb_funcall(klass, id_cast, 1, self);
3916
- return rb_funcall(v, id_lt, 1, other);
3917
- }
3918
- }
3919
-
3920
- static void iter_sfloat_le(na_loop_t* const lp) {
3921
- size_t i;
3922
- char *p1, *p2;
3923
- BIT_DIGIT* a3;
3924
- size_t p3;
3925
- ssize_t s1, s2, s3;
3926
- dtype x, y;
3927
- BIT_DIGIT b;
3928
- INIT_COUNTER(lp, i);
3929
- INIT_PTR(lp, 0, p1, s1);
3930
- INIT_PTR(lp, 1, p2, s2);
3931
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3932
- for (; i--;) {
3933
- GET_DATA_STRIDE(p1, s1, dtype, x);
3934
- GET_DATA_STRIDE(p2, s2, dtype, y);
3935
- b = (m_le(x, y)) ? 1 : 0;
3936
- STORE_BIT(a3, p3, b);
3937
- p3 += s3;
3938
- }
3939
- }
3940
-
3941
- static VALUE sfloat_le_self(VALUE self, VALUE other) {
3942
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3943
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3944
- ndfunc_t ndf = { iter_sfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
3945
-
3946
- return na_ndloop(&ndf, 2, self, other);
3947
- }
3948
-
3949
- static VALUE sfloat_le(VALUE self, VALUE other) {
3950
-
3951
- VALUE klass, v;
3952
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3953
- if (klass == cT) {
3954
- return sfloat_le_self(self, other);
3955
- } else {
3956
- v = rb_funcall(klass, id_cast, 1, self);
3957
- return rb_funcall(v, id_le, 1, other);
3958
- }
3959
- }
3960
-
3961
- static void iter_sfloat_isnan(na_loop_t* const lp) {
3962
- size_t i;
3963
- char* p1;
3964
- BIT_DIGIT* a2;
3965
- size_t p2;
3966
- ssize_t s1, s2;
3967
- size_t* idx1;
3968
- dtype x;
3969
- BIT_DIGIT b;
3970
- INIT_COUNTER(lp, i);
3971
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3972
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3973
- if (idx1) {
3974
- for (; i--;) {
3975
- GET_DATA_INDEX(p1, idx1, dtype, x);
3976
- b = (m_isnan(x)) ? 1 : 0;
3977
- STORE_BIT(a2, p2, b);
3978
- p2 += s2;
3979
- }
3980
- } else {
3981
- for (; i--;) {
3982
- GET_DATA_STRIDE(p1, s1, dtype, x);
3983
- b = (m_isnan(x)) ? 1 : 0;
3984
- STORE_BIT(a2, p2, b);
3985
- p2 += s2;
1875
+ return;
1876
+ //
1877
+ }
1878
+ }
1879
+ for (i = 0; i < n; i++) {
1880
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1881
+ x = m_sign(x);
1882
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1883
+ }
1884
+ //
3986
1885
  }
3987
1886
  }
3988
1887
  }
3989
1888
 
3990
- static VALUE sfloat_isnan(VALUE self) {
1889
+ static VALUE sfloat_sign(VALUE self) {
3991
1890
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3992
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3993
- ndfunc_t ndf = { iter_sfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
1891
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1892
+ ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3994
1893
 
3995
1894
  return na_ndloop(&ndf, 1, self);
3996
1895
  }
3997
1896
 
3998
- static void iter_sfloat_isinf(na_loop_t* const lp) {
3999
- size_t i;
4000
- char* p1;
4001
- BIT_DIGIT* a2;
4002
- size_t p2;
1897
+ static void iter_sfloat_square(na_loop_t* const lp) {
1898
+ size_t i, n;
1899
+ char *p1, *p2;
4003
1900
  ssize_t s1, s2;
4004
- size_t* idx1;
1901
+ size_t *idx1, *idx2;
4005
1902
  dtype x;
4006
- BIT_DIGIT b;
4007
- INIT_COUNTER(lp, i);
1903
+
1904
+ INIT_COUNTER(lp, n);
4008
1905
  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4009
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
1906
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
1907
+
4010
1908
  if (idx1) {
4011
- for (; i--;) {
4012
- GET_DATA_INDEX(p1, idx1, dtype, x);
4013
- b = (m_isinf(x)) ? 1 : 0;
4014
- STORE_BIT(a2, p2, b);
4015
- p2 += s2;
1909
+ if (idx2) {
1910
+ for (i = 0; i < n; i++) {
1911
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1912
+ x = m_square(x);
1913
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1914
+ }
1915
+ } else {
1916
+ for (i = 0; i < n; i++) {
1917
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1918
+ x = m_square(x);
1919
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1920
+ }
4016
1921
  }
4017
1922
  } else {
4018
- for (; i--;) {
4019
- GET_DATA_STRIDE(p1, s1, dtype, x);
4020
- b = (m_isinf(x)) ? 1 : 0;
4021
- STORE_BIT(a2, p2, b);
4022
- p2 += s2;
1923
+ if (idx2) {
1924
+ for (i = 0; i < n; i++) {
1925
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1926
+ x = m_square(x);
1927
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1928
+ }
1929
+ } else {
1930
+ //
1931
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1932
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
1933
+ for (i = 0; i < n; i++) {
1934
+ ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
1935
+ }
1936
+ return;
1937
+ }
1938
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1939
+ //
1940
+ for (i = 0; i < n; i++) {
1941
+ *(dtype*)p2 = m_square(*(dtype*)p1);
1942
+ p1 += s1;
1943
+ p2 += s2;
1944
+ }
1945
+ return;
1946
+ //
1947
+ }
1948
+ }
1949
+ for (i = 0; i < n; i++) {
1950
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1951
+ x = m_square(x);
1952
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1953
+ }
1954
+ //
4023
1955
  }
4024
1956
  }
4025
1957
  }
4026
1958
 
4027
- static VALUE sfloat_isinf(VALUE self) {
1959
+ static VALUE sfloat_square(VALUE self) {
4028
1960
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4029
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4030
- ndfunc_t ndf = { iter_sfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
1961
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1962
+ ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
4031
1963
 
4032
1964
  return na_ndloop(&ndf, 1, self);
4033
1965
  }
4034
1966
 
4035
- static void iter_sfloat_isposinf(na_loop_t* const lp) {
4036
- size_t i;
4037
- char* p1;
4038
- BIT_DIGIT* a2;
4039
- size_t p2;
4040
- ssize_t s1, s2;
4041
- size_t* idx1;
4042
- dtype x;
4043
- BIT_DIGIT b;
4044
- INIT_COUNTER(lp, i);
4045
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4046
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4047
- if (idx1) {
4048
- for (; i--;) {
4049
- GET_DATA_INDEX(p1, idx1, dtype, x);
4050
- b = (m_isposinf(x)) ? 1 : 0;
4051
- STORE_BIT(a2, p2, b);
4052
- p2 += s2;
1967
+ #define check_intdivzero(y) \
1968
+ {}
1969
+
1970
+ static void iter_sfloat_copysign(na_loop_t* const lp) {
1971
+ size_t i = 0;
1972
+ size_t n;
1973
+ char *p1, *p2, *p3;
1974
+ ssize_t s1, s2, s3;
1975
+
1976
+ INIT_COUNTER(lp, n);
1977
+ INIT_PTR(lp, 0, p1, s1);
1978
+ INIT_PTR(lp, 1, p2, s2);
1979
+ INIT_PTR(lp, 2, p3, s3);
1980
+
1981
+ //
1982
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1983
+ is_aligned(p3, sizeof(dtype))) {
1984
+
1985
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1986
+ if (p1 == p3) { // inplace case
1987
+ for (; i < n; i++) {
1988
+ check_intdivzero(((dtype*)p2)[i]);
1989
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1990
+ }
1991
+ } else {
1992
+ for (; i < n; i++) {
1993
+ check_intdivzero(((dtype*)p2)[i]);
1994
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1995
+ }
1996
+ }
1997
+ return;
4053
1998
  }
4054
- } else {
4055
- for (; i--;) {
4056
- GET_DATA_STRIDE(p1, s1, dtype, x);
4057
- b = (m_isposinf(x)) ? 1 : 0;
4058
- STORE_BIT(a2, p2, b);
4059
- p2 += s2;
1999
+
2000
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2001
+ is_aligned_step(s3, sizeof(dtype))) {
2002
+ //
2003
+
2004
+ if (s2 == 0) { // Broadcasting from scalar value.
2005
+ check_intdivzero(*(dtype*)p2);
2006
+ if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2007
+ if (p1 == p3) { // inplace case
2008
+ for (; i < n; i++) {
2009
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2010
+ }
2011
+ } else {
2012
+ for (; i < n; i++) {
2013
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2014
+ }
2015
+ }
2016
+ } else {
2017
+ for (i = 0; i < n; i++) {
2018
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2019
+ p1 += s1;
2020
+ p3 += s3;
2021
+ }
2022
+ }
2023
+ } else {
2024
+ if (p1 == p3) { // inplace case
2025
+ for (i = 0; i < n; i++) {
2026
+ check_intdivzero(*(dtype*)p2);
2027
+ *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2028
+ p1 += s1;
2029
+ p2 += s2;
2030
+ }
2031
+ } else {
2032
+ for (i = 0; i < n; i++) {
2033
+ check_intdivzero(*(dtype*)p2);
2034
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2035
+ p1 += s1;
2036
+ p2 += s2;
2037
+ p3 += s3;
2038
+ }
2039
+ }
2040
+ }
2041
+
2042
+ return;
2043
+ //
4060
2044
  }
4061
2045
  }
2046
+ for (i = 0; i < n; i++) {
2047
+ dtype x, y, z;
2048
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2049
+ GET_DATA_STRIDE(p2, s2, dtype, y);
2050
+ check_intdivzero(y);
2051
+ z = m_copysign(x, y);
2052
+ SET_DATA_STRIDE(p3, s3, dtype, z);
2053
+ }
2054
+ //
4062
2055
  }
2056
+ #undef check_intdivzero
4063
2057
 
4064
- static VALUE sfloat_isposinf(VALUE self) {
4065
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4066
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4067
- ndfunc_t ndf = { iter_sfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
2058
+ static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
2059
+ ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2060
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2061
+ ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
4068
2062
 
4069
- return na_ndloop(&ndf, 1, self);
2063
+ return na_ndloop(&ndf, 2, self, other);
2064
+ }
2065
+
2066
+ static VALUE sfloat_copysign(VALUE self, VALUE other) {
2067
+
2068
+ VALUE klass, v;
2069
+
2070
+ klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2071
+ if (klass == cT) {
2072
+ return sfloat_copysign_self(self, other);
2073
+ } else {
2074
+ v = rb_funcall(klass, id_cast, 1, self);
2075
+ return rb_funcall(v, id_copysign, 1, other);
2076
+ }
4070
2077
  }
4071
2078
 
4072
- static void iter_sfloat_isneginf(na_loop_t* const lp) {
2079
+ static void iter_sfloat_signbit(na_loop_t* const lp) {
4073
2080
  size_t i;
4074
2081
  char* p1;
4075
2082
  BIT_DIGIT* a2;
@@ -4084,61 +2091,49 @@ static void iter_sfloat_isneginf(na_loop_t* const lp) {
4084
2091
  if (idx1) {
4085
2092
  for (; i--;) {
4086
2093
  GET_DATA_INDEX(p1, idx1, dtype, x);
4087
- b = (m_isneginf(x)) ? 1 : 0;
2094
+ b = (m_signbit(x)) ? 1 : 0;
4088
2095
  STORE_BIT(a2, p2, b);
4089
2096
  p2 += s2;
4090
2097
  }
4091
2098
  } else {
4092
2099
  for (; i--;) {
4093
2100
  GET_DATA_STRIDE(p1, s1, dtype, x);
4094
- b = (m_isneginf(x)) ? 1 : 0;
2101
+ b = (m_signbit(x)) ? 1 : 0;
4095
2102
  STORE_BIT(a2, p2, b);
4096
2103
  p2 += s2;
4097
2104
  }
4098
2105
  }
4099
2106
  }
4100
2107
 
4101
- static VALUE sfloat_isneginf(VALUE self) {
2108
+ static VALUE sfloat_signbit(VALUE self) {
4102
2109
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4103
2110
  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4104
- ndfunc_t ndf = { iter_sfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
2111
+ ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
4105
2112
 
4106
2113
  return na_ndloop(&ndf, 1, self);
4107
2114
  }
4108
2115
 
4109
- static void iter_sfloat_isfinite(na_loop_t* const lp) {
2116
+ static void iter_sfloat_modf(na_loop_t* const lp) {
4110
2117
  size_t i;
4111
- char* p1;
4112
- BIT_DIGIT* a2;
4113
- size_t p2;
4114
- ssize_t s1, s2;
4115
- size_t* idx1;
4116
- dtype x;
4117
- BIT_DIGIT b;
2118
+ char *p1, *p2, *p3;
2119
+ ssize_t s1, s2, s3;
2120
+ dtype x, y, z;
4118
2121
  INIT_COUNTER(lp, i);
4119
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4120
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4121
- if (idx1) {
4122
- for (; i--;) {
4123
- GET_DATA_INDEX(p1, idx1, dtype, x);
4124
- b = (m_isfinite(x)) ? 1 : 0;
4125
- STORE_BIT(a2, p2, b);
4126
- p2 += s2;
4127
- }
4128
- } else {
4129
- for (; i--;) {
4130
- GET_DATA_STRIDE(p1, s1, dtype, x);
4131
- b = (m_isfinite(x)) ? 1 : 0;
4132
- STORE_BIT(a2, p2, b);
4133
- p2 += s2;
4134
- }
2122
+ INIT_PTR(lp, 0, p1, s1);
2123
+ INIT_PTR(lp, 1, p2, s2);
2124
+ INIT_PTR(lp, 2, p3, s3);
2125
+ for (; i--;) {
2126
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2127
+ m_modf(x, y, z);
2128
+ SET_DATA_STRIDE(p2, s2, dtype, y);
2129
+ SET_DATA_STRIDE(p3, s3, dtype, z);
4135
2130
  }
4136
2131
  }
4137
2132
 
4138
- static VALUE sfloat_isfinite(VALUE self) {
2133
+ static VALUE sfloat_modf(VALUE self) {
4139
2134
  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4140
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4141
- ndfunc_t ndf = { iter_sfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
2135
+ ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2136
+ ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
4142
2137
 
4143
2138
  return na_ndloop(&ndf, 1, self);
4144
2139
  }