numo-narray-alt 0.9.11 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -1
  3. data/README.md +7 -0
  4. data/ext/numo/narray/numo/narray.h +2 -2
  5. data/ext/numo/narray/numo/types/robj_macro.h +1 -1
  6. data/ext/numo/narray/src/mh/bincount.h +233 -0
  7. data/ext/numo/narray/src/mh/bit/and.h +225 -0
  8. data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
  9. data/ext/numo/narray/src/mh/bit/not.h +173 -0
  10. data/ext/numo/narray/src/mh/bit/or.h +225 -0
  11. data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
  12. data/ext/numo/narray/src/mh/bit/xor.h +225 -0
  13. data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
  14. data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
  15. data/ext/numo/narray/src/mh/comp/eq.h +26 -0
  16. data/ext/numo/narray/src/mh/comp/ge.h +26 -0
  17. data/ext/numo/narray/src/mh/comp/gt.h +26 -0
  18. data/ext/numo/narray/src/mh/comp/le.h +26 -0
  19. data/ext/numo/narray/src/mh/comp/lt.h +26 -0
  20. data/ext/numo/narray/src/mh/comp/ne.h +26 -0
  21. data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
  22. data/ext/numo/narray/src/mh/divmod.h +142 -0
  23. data/ext/numo/narray/src/mh/eye.h +1 -1
  24. data/ext/numo/narray/src/mh/fill.h +94 -0
  25. data/ext/numo/narray/src/mh/format.h +108 -0
  26. data/ext/numo/narray/src/mh/format_to_a.h +89 -0
  27. data/ext/numo/narray/src/mh/inspect.h +33 -0
  28. data/ext/numo/narray/src/mh/isfinite.h +42 -0
  29. data/ext/numo/narray/src/mh/isinf.h +42 -0
  30. data/ext/numo/narray/src/mh/isnan.h +42 -0
  31. data/ext/numo/narray/src/mh/isneginf.h +42 -0
  32. data/ext/numo/narray/src/mh/isposinf.h +42 -0
  33. data/ext/numo/narray/src/mh/math/acos.h +2 -2
  34. data/ext/numo/narray/src/mh/math/acosh.h +2 -2
  35. data/ext/numo/narray/src/mh/math/asin.h +2 -2
  36. data/ext/numo/narray/src/mh/math/asinh.h +2 -2
  37. data/ext/numo/narray/src/mh/math/atan.h +2 -2
  38. data/ext/numo/narray/src/mh/math/atan2.h +3 -3
  39. data/ext/numo/narray/src/mh/math/atanh.h +2 -2
  40. data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
  41. data/ext/numo/narray/src/mh/math/cos.h +2 -2
  42. data/ext/numo/narray/src/mh/math/cosh.h +2 -2
  43. data/ext/numo/narray/src/mh/math/erf.h +2 -2
  44. data/ext/numo/narray/src/mh/math/erfc.h +2 -2
  45. data/ext/numo/narray/src/mh/math/exp.h +2 -2
  46. data/ext/numo/narray/src/mh/math/exp10.h +2 -2
  47. data/ext/numo/narray/src/mh/math/exp2.h +2 -2
  48. data/ext/numo/narray/src/mh/math/expm1.h +2 -2
  49. data/ext/numo/narray/src/mh/math/frexp.h +3 -3
  50. data/ext/numo/narray/src/mh/math/hypot.h +3 -3
  51. data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
  52. data/ext/numo/narray/src/mh/math/log.h +2 -2
  53. data/ext/numo/narray/src/mh/math/log10.h +2 -2
  54. data/ext/numo/narray/src/mh/math/log1p.h +2 -2
  55. data/ext/numo/narray/src/mh/math/log2.h +2 -2
  56. data/ext/numo/narray/src/mh/math/sin.h +2 -2
  57. data/ext/numo/narray/src/mh/math/sinc.h +2 -2
  58. data/ext/numo/narray/src/mh/math/sinh.h +2 -2
  59. data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
  60. data/ext/numo/narray/src/mh/math/tan.h +2 -2
  61. data/ext/numo/narray/src/mh/math/tanh.h +2 -2
  62. data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
  63. data/ext/numo/narray/src/mh/minus.h +125 -0
  64. data/ext/numo/narray/src/mh/op/add.h +78 -0
  65. data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
  66. data/ext/numo/narray/src/mh/op/div.h +118 -0
  67. data/ext/numo/narray/src/mh/op/mod.h +108 -0
  68. data/ext/numo/narray/src/mh/op/mul.h +78 -0
  69. data/ext/numo/narray/src/mh/op/sub.h +78 -0
  70. data/ext/numo/narray/src/mh/pow.h +197 -0
  71. data/ext/numo/narray/src/mh/rand.h +2 -2
  72. data/ext/numo/narray/src/mh/rand_norm.h +125 -0
  73. data/ext/numo/narray/src/mh/reciprocal.h +125 -0
  74. data/ext/numo/narray/src/mh/round/ceil.h +11 -0
  75. data/ext/numo/narray/src/mh/round/floor.h +11 -0
  76. data/ext/numo/narray/src/mh/round/rint.h +9 -0
  77. data/ext/numo/narray/src/mh/round/round.h +11 -0
  78. data/ext/numo/narray/src/mh/round/trunc.h +11 -0
  79. data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
  80. data/ext/numo/narray/src/mh/sign.h +125 -0
  81. data/ext/numo/narray/src/mh/square.h +125 -0
  82. data/ext/numo/narray/src/mh/to_a.h +78 -0
  83. data/ext/numo/narray/src/t_bit.c +45 -234
  84. data/ext/numo/narray/src/t_dcomplex.c +457 -2075
  85. data/ext/numo/narray/src/t_dfloat.c +154 -2560
  86. data/ext/numo/narray/src/t_int16.c +408 -2542
  87. data/ext/numo/narray/src/t_int32.c +408 -2542
  88. data/ext/numo/narray/src/t_int64.c +408 -2542
  89. data/ext/numo/narray/src/t_int8.c +409 -2138
  90. data/ext/numo/narray/src/t_robject.c +376 -2161
  91. data/ext/numo/narray/src/t_scomplex.c +435 -2053
  92. data/ext/numo/narray/src/t_sfloat.c +149 -2557
  93. data/ext/numo/narray/src/t_uint16.c +407 -2537
  94. data/ext/numo/narray/src/t_uint32.c +407 -2537
  95. data/ext/numo/narray/src/t_uint64.c +407 -2537
  96. data/ext/numo/narray/src/t_uint8.c +407 -2132
  97. metadata +47 -2
@@ -42,7 +42,41 @@ static ID id_to_a;
42
42
  VALUE cT;
43
43
  extern VALUE cRT;
44
44
 
45
+ #include "mh/coerce_cast.h"
46
+ #include "mh/to_a.h"
47
+ #include "mh/fill.h"
48
+ #include "mh/format.h"
49
+ #include "mh/format_to_a.h"
50
+ #include "mh/inspect.h"
51
+ #include "mh/op/add.h"
52
+ #include "mh/op/sub.h"
53
+ #include "mh/op/mul.h"
54
+ #include "mh/op/div.h"
55
+ #include "mh/op/mod.h"
56
+ #include "mh/divmod.h"
57
+ #include "mh/pow.h"
58
+ #include "mh/minus.h"
59
+ #include "mh/reciprocal.h"
60
+ #include "mh/sign.h"
61
+ #include "mh/square.h"
62
+ #include "mh/round/floor.h"
63
+ #include "mh/round/round.h"
64
+ #include "mh/round/ceil.h"
65
+ #include "mh/round/trunc.h"
66
+ #include "mh/round/rint.h"
67
+ #include "mh/comp/eq.h"
68
+ #include "mh/comp/ne.h"
69
+ #include "mh/comp/nearly_eq.h"
70
+ #include "mh/comp/gt.h"
71
+ #include "mh/comp/ge.h"
72
+ #include "mh/comp/lt.h"
73
+ #include "mh/comp/le.h"
45
74
  #include "mh/clip.h"
75
+ #include "mh/isnan.h"
76
+ #include "mh/isinf.h"
77
+ #include "mh/isposinf.h"
78
+ #include "mh/isneginf.h"
79
+ #include "mh/isfinite.h"
46
80
  #include "mh/sum.h"
47
81
  #include "mh/prod.h"
48
82
  #include "mh/mean.h"
@@ -66,6 +100,7 @@ extern VALUE cRT;
66
100
  #include "mh/logseq.h"
67
101
  #include "mh/eye.h"
68
102
  #include "mh/rand.h"
103
+ #include "mh/rand_norm.h"
69
104
  #include "mh/math/sqrt.h"
70
105
  #include "mh/math/cbrt.h"
71
106
  #include "mh/math/log.h"
@@ -98,7 +133,48 @@ extern VALUE cRT;
98
133
 
99
134
  typedef float sfloat; // Type aliases for shorter notation
100
135
  // following the codebase naming convention.
136
+ DEF_NARRAY_COERCE_CAST_METHOD_FUNC(sfloat)
137
+ DEF_NARRAY_TO_A_METHOD_FUNC(sfloat)
138
+ DEF_NARRAY_FILL_METHOD_FUNC(sfloat)
139
+ DEF_NARRAY_FORMAT_METHOD_FUNC(sfloat)
140
+ DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(sfloat)
141
+ DEF_NARRAY_INSPECT_METHOD_FUNC(sfloat)
142
+ #ifdef __SSE2__
143
+ DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
144
+ DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
145
+ DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
146
+ DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
147
+ #else
148
+ DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
149
+ DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
150
+ DEF_NARRAY_MUL_METHOD_FUNC(sfloat, numo_cSFloat)
151
+ DEF_NARRAY_FLT_DIV_METHOD_FUNC(sfloat, numo_cSFloat)
152
+ #endif
153
+ DEF_NARRAY_FLT_MOD_METHOD_FUNC(sfloat, numo_cSFloat)
154
+ DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(sfloat, numo_cSFloat)
155
+ DEF_NARRAY_POW_METHOD_FUNC(sfloat, numo_cSFloat)
156
+ DEF_NARRAY_MINUS_METHOD_FUNC(sfloat, numo_cSFloat)
157
+ DEF_NARRAY_RECIPROCAL_METHOD_FUNC(sfloat, numo_cSFloat)
158
+ DEF_NARRAY_SIGN_METHOD_FUNC(sfloat, numo_cSFloat)
159
+ DEF_NARRAY_SQUARE_METHOD_FUNC(sfloat, numo_cSFloat)
160
+ DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(sfloat, numo_cSFloat)
161
+ DEF_NARRAY_FLT_ROUND_METHOD_FUNC(sfloat, numo_cSFloat)
162
+ DEF_NARRAY_FLT_CEIL_METHOD_FUNC(sfloat, numo_cSFloat)
163
+ DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(sfloat, numo_cSFloat)
164
+ DEF_NARRAY_FLT_RINT_METHOD_FUNC(sfloat, numo_cSFloat)
165
+ DEF_NARRAY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
166
+ DEF_NARRAY_NE_METHOD_FUNC(sfloat, numo_cSFloat)
167
+ DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
168
+ DEF_NARRAY_GT_METHOD_FUNC(sfloat, numo_cSFloat)
169
+ DEF_NARRAY_GE_METHOD_FUNC(sfloat, numo_cSFloat)
170
+ DEF_NARRAY_LT_METHOD_FUNC(sfloat, numo_cSFloat)
171
+ DEF_NARRAY_LE_METHOD_FUNC(sfloat, numo_cSFloat)
101
172
  DEF_NARRAY_CLIP_METHOD_FUNC(sfloat, numo_cSFloat)
173
+ DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(sfloat, numo_cSFloat)
174
+ DEF_NARRAY_FLT_ISINF_METHOD_FUNC(sfloat, numo_cSFloat)
175
+ DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(sfloat, numo_cSFloat)
176
+ DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(sfloat, numo_cSFloat)
177
+ DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(sfloat, numo_cSFloat)
102
178
  DEF_NARRAY_FLT_SUM_METHOD_FUNC(sfloat, numo_cSFloat)
103
179
  DEF_NARRAY_FLT_PROD_METHOD_FUNC(sfloat, numo_cSFloat)
104
180
  DEF_NARRAY_FLT_MEAN_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
@@ -122,6 +198,7 @@ DEF_NARRAY_FLT_SEQ_METHOD_FUNC(sfloat)
122
198
  DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(sfloat)
123
199
  DEF_NARRAY_EYE_METHOD_FUNC(sfloat)
124
200
  DEF_NARRAY_FLT_RAND_METHOD_FUNC(sfloat)
201
+ DEF_NARRAY_FLT_RAND_NORM_METHOD_FUNC(sfloat)
125
202
  #ifdef __SSE2__
126
203
  DEF_NARRAY_FLT_SQRT_SSE2_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
127
204
  #else
@@ -1275,171 +1352,6 @@ static VALUE sfloat_aset(int argc, VALUE* argv, VALUE self) {
1275
1352
  return argv[argc];
1276
1353
  }
1277
1354
 
1278
- static VALUE sfloat_coerce_cast(VALUE self, VALUE type) {
1279
- return Qnil;
1280
- }
1281
-
1282
- static void iter_sfloat_to_a(na_loop_t* const lp) {
1283
- size_t i, s1;
1284
- char* p1;
1285
- size_t* idx1;
1286
- dtype x;
1287
- volatile VALUE a, y;
1288
-
1289
- INIT_COUNTER(lp, i);
1290
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1291
- a = rb_ary_new2(i);
1292
- rb_ary_push(lp->args[1].value, a);
1293
- if (idx1) {
1294
- for (; i--;) {
1295
- GET_DATA_INDEX(p1, idx1, dtype, x);
1296
- y = m_data_to_num(x);
1297
- rb_ary_push(a, y);
1298
- }
1299
- } else {
1300
- for (; i--;) {
1301
- GET_DATA_STRIDE(p1, s1, dtype, x);
1302
- y = m_data_to_num(x);
1303
- rb_ary_push(a, y);
1304
- }
1305
- }
1306
- }
1307
-
1308
- static VALUE sfloat_to_a(VALUE self) {
1309
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1310
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1311
- ndfunc_t ndf = { iter_sfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1312
- return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
1313
- }
1314
-
1315
- static void iter_sfloat_fill(na_loop_t* const lp) {
1316
- size_t i;
1317
- char* p1;
1318
- ssize_t s1;
1319
- size_t* idx1;
1320
- VALUE x = lp->option;
1321
- dtype y;
1322
- INIT_COUNTER(lp, i);
1323
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1324
- y = m_num_to_data(x);
1325
- if (idx1) {
1326
- for (; i--;) {
1327
- SET_DATA_INDEX(p1, idx1, dtype, y);
1328
- }
1329
- } else {
1330
- for (; i--;) {
1331
- SET_DATA_STRIDE(p1, s1, dtype, y);
1332
- }
1333
- }
1334
- }
1335
-
1336
- static VALUE sfloat_fill(VALUE self, VALUE val) {
1337
- ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
1338
- ndfunc_t ndf = { iter_sfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
1339
-
1340
- na_ndloop(&ndf, 2, self, val);
1341
- return self;
1342
- }
1343
-
1344
- static VALUE format_sfloat(VALUE fmt, dtype* x) {
1345
- // fix-me
1346
- char s[48];
1347
- int n;
1348
-
1349
- if (NIL_P(fmt)) {
1350
- n = m_sprintf(s, *x);
1351
- return rb_str_new(s, n);
1352
- }
1353
- return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
1354
- }
1355
-
1356
- static void iter_sfloat_format(na_loop_t* const lp) {
1357
- size_t i;
1358
- char *p1, *p2;
1359
- ssize_t s1, s2;
1360
- size_t* idx1;
1361
- dtype* x;
1362
- VALUE y;
1363
- VALUE fmt = lp->option;
1364
- INIT_COUNTER(lp, i);
1365
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1366
- INIT_PTR(lp, 1, p2, s2);
1367
- if (idx1) {
1368
- for (; i--;) {
1369
- x = (dtype*)(p1 + *idx1);
1370
- idx1++;
1371
- y = format_sfloat(fmt, x);
1372
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1373
- }
1374
- } else {
1375
- for (; i--;) {
1376
- x = (dtype*)p1;
1377
- p1 += s1;
1378
- y = format_sfloat(fmt, x);
1379
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1380
- }
1381
- }
1382
- }
1383
-
1384
- static VALUE sfloat_format(int argc, VALUE* argv, VALUE self) {
1385
- VALUE fmt = Qnil;
1386
-
1387
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
1388
- ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
1389
- ndfunc_t ndf = { iter_sfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
1390
-
1391
- rb_scan_args(argc, argv, "01", &fmt);
1392
- return na_ndloop(&ndf, 2, self, fmt);
1393
- }
1394
-
1395
- static void iter_sfloat_format_to_a(na_loop_t* const lp) {
1396
- size_t i;
1397
- char* p1;
1398
- ssize_t s1;
1399
- size_t* idx1;
1400
- dtype* x;
1401
- VALUE y;
1402
- volatile VALUE a;
1403
- VALUE fmt = lp->option;
1404
- INIT_COUNTER(lp, i);
1405
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1406
- a = rb_ary_new2(i);
1407
- rb_ary_push(lp->args[1].value, a);
1408
- if (idx1) {
1409
- for (; i--;) {
1410
- x = (dtype*)(p1 + *idx1);
1411
- idx1++;
1412
- y = format_sfloat(fmt, x);
1413
- rb_ary_push(a, y);
1414
- }
1415
- } else {
1416
- for (; i--;) {
1417
- x = (dtype*)p1;
1418
- p1 += s1;
1419
- y = format_sfloat(fmt, x);
1420
- rb_ary_push(a, y);
1421
- }
1422
- }
1423
- }
1424
-
1425
- static VALUE sfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
1426
- VALUE fmt = Qnil;
1427
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1428
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1429
- ndfunc_t ndf = { iter_sfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1430
-
1431
- rb_scan_args(argc, argv, "01", &fmt);
1432
- return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
1433
- }
1434
-
1435
- static VALUE iter_sfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
1436
- return format_sfloat(fmt, (dtype*)(ptr + pos));
1437
- }
1438
-
1439
- static VALUE sfloat_inspect(VALUE ary) {
1440
- return na_ndloop_inspect(ary, iter_sfloat_inspect, Qnil);
1441
- }
1442
-
1443
1355
  static void iter_sfloat_each(na_loop_t* const lp) {
1444
1356
  size_t i, s1;
1445
1357
  char* p1;
@@ -1725,22 +1637,12 @@ static VALUE sfloat_abs(VALUE self) {
1725
1637
  #define check_intdivzero(y) \
1726
1638
  {}
1727
1639
 
1728
- static void iter_sfloat_add(na_loop_t* const lp) {
1640
+ static void iter_sfloat_copysign(na_loop_t* const lp) {
1729
1641
  size_t i = 0;
1730
1642
  size_t n;
1731
1643
  char *p1, *p2, *p3;
1732
1644
  ssize_t s1, s2, s3;
1733
1645
 
1734
- #ifdef __SSE2__
1735
- size_t cnt;
1736
- size_t cnt_simd_loop = -1;
1737
-
1738
- __m128 a;
1739
- __m128 b;
1740
-
1741
- size_t num_pack; // Number of elements packed for SIMD.
1742
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1743
- #endif
1744
1646
  INIT_COUNTER(lp, n);
1745
1647
  INIT_PTR(lp, 0, p1, s1);
1746
1648
  INIT_PTR(lp, 1, p2, s2);
@@ -1751,74 +1653,17 @@ static void iter_sfloat_add(na_loop_t* const lp) {
1751
1653
  is_aligned(p3, sizeof(dtype))) {
1752
1654
 
1753
1655
  if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1754
- #ifdef __SSE2__
1755
- // Check number of elements. & Check same alignment.
1756
- if ((n >= num_pack) &&
1757
- is_same_aligned3(
1758
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1759
- )) {
1760
- // Calculate up to the position just before the start of SIMD computation.
1761
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1762
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1763
- );
1764
- #endif
1765
- if (p1 == p3) { // inplace case
1766
- #ifdef __SSE2__
1767
- for (; i < cnt; i++) {
1768
- #else
1656
+ if (p1 == p3) { // inplace case
1769
1657
  for (; i < n; i++) {
1770
1658
  check_intdivzero(((dtype*)p2)[i]);
1771
- #endif
1772
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1773
- }
1774
- } else {
1775
- #ifdef __SSE2__
1776
- for (; i < cnt; i++) {
1777
- #else
1659
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1660
+ }
1661
+ } else {
1778
1662
  for (; i < n; i++) {
1779
1663
  check_intdivzero(((dtype*)p2)[i]);
1780
- #endif
1781
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1782
- }
1783
- }
1784
-
1785
- #ifdef __SSE2__
1786
- // Get the count of SIMD computation loops.
1787
- cnt_simd_loop = (n - i) % num_pack;
1788
-
1789
- // SIMD computation.
1790
- if (p1 == p3) { // inplace case
1791
- for (; i < n - cnt_simd_loop; i += num_pack) {
1792
- a = _mm_load_ps(&((dtype*)p1)[i]);
1793
- b = _mm_load_ps(&((dtype*)p2)[i]);
1794
- a = _mm_add_ps(a, b);
1795
- _mm_store_ps(&((dtype*)p1)[i], a);
1796
- }
1797
- } else {
1798
- for (; i < n - cnt_simd_loop; i += num_pack) {
1799
- a = _mm_load_ps(&((dtype*)p1)[i]);
1800
- b = _mm_load_ps(&((dtype*)p2)[i]);
1801
- a = _mm_add_ps(a, b);
1802
- _mm_stream_ps(&((dtype*)p3)[i], a);
1803
- }
1804
- }
1805
- }
1806
-
1807
- // Compute the remainder of the SIMD operation.
1808
- if (cnt_simd_loop != 0) {
1809
- if (p1 == p3) { // inplace case
1810
- for (; i < n; i++) {
1811
- check_intdivzero(((dtype*)p2)[i]);
1812
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1813
- }
1814
- } else {
1815
- for (; i < n; i++) {
1816
- check_intdivzero(((dtype*)p2)[i]);
1817
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1818
- }
1664
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1819
1665
  }
1820
1666
  }
1821
- #endif
1822
1667
  return;
1823
1668
  }
1824
1669
 
@@ -1829,72 +1674,18 @@ static void iter_sfloat_add(na_loop_t* const lp) {
1829
1674
  if (s2 == 0) { // Broadcasting from scalar value.
1830
1675
  check_intdivzero(*(dtype*)p2);
1831
1676
  if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
1832
- #ifdef __SSE2__
1833
- // Broadcast a scalar value and use it for SIMD computation.
1834
- b = _mm_load1_ps(&((dtype*)p2)[0]);
1835
-
1836
- // Check number of elements. & Check same alignment.
1837
- if ((n >= num_pack) &&
1838
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
1839
- // Calculate up to the position just before the start of SIMD computation.
1840
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1841
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1842
- );
1843
- #endif
1844
- if (p1 == p3) { // inplace case
1845
- #ifdef __SSE2__
1846
- for (; i < cnt; i++) {
1847
- #else
1848
- for (; i < n; i++) {
1849
- #endif
1850
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1851
- }
1852
- } else {
1853
- #ifdef __SSE2__
1854
- for (; i < cnt; i++) {
1855
- #else
1677
+ if (p1 == p3) { // inplace case
1856
1678
  for (; i < n; i++) {
1857
- #endif
1858
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1859
- }
1860
- }
1861
-
1862
- #ifdef __SSE2__
1863
- // Get the count of SIMD computation loops.
1864
- cnt_simd_loop = (n - i) % num_pack;
1865
-
1866
- // SIMD computation.
1867
- if (p1 == p3) { // inplace case
1868
- for (; i < n - cnt_simd_loop; i += num_pack) {
1869
- a = _mm_load_ps(&((dtype*)p1)[i]);
1870
- a = _mm_add_ps(a, b);
1871
- _mm_store_ps(&((dtype*)p1)[i], a);
1872
- }
1873
- } else {
1874
- for (; i < n - cnt_simd_loop; i += num_pack) {
1875
- a = _mm_load_ps(&((dtype*)p1)[i]);
1876
- a = _mm_add_ps(a, b);
1877
- _mm_stream_ps(&((dtype*)p3)[i], a);
1878
- }
1679
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
1879
1680
  }
1880
- }
1881
-
1882
- // Compute the remainder of the SIMD operation.
1883
- if (cnt_simd_loop != 0) {
1884
- if (p1 == p3) { // inplace case
1885
- for (; i < n; i++) {
1886
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1887
- }
1888
- } else {
1889
- for (; i < n; i++) {
1890
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1891
- }
1681
+ } else {
1682
+ for (; i < n; i++) {
1683
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
1892
1684
  }
1893
1685
  }
1894
- #endif
1895
1686
  } else {
1896
1687
  for (i = 0; i < n; i++) {
1897
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1688
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
1898
1689
  p1 += s1;
1899
1690
  p3 += s3;
1900
1691
  }
@@ -1903,14 +1694,14 @@ static void iter_sfloat_add(na_loop_t* const lp) {
1903
1694
  if (p1 == p3) { // inplace case
1904
1695
  for (i = 0; i < n; i++) {
1905
1696
  check_intdivzero(*(dtype*)p2);
1906
- *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
1697
+ *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
1907
1698
  p1 += s1;
1908
1699
  p2 += s2;
1909
1700
  }
1910
1701
  } else {
1911
1702
  for (i = 0; i < n; i++) {
1912
1703
  check_intdivzero(*(dtype*)p2);
1913
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1704
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
1914
1705
  p1 += s1;
1915
1706
  p2 += s2;
1916
1707
  p3 += s3;
@@ -1927,2295 +1718,96 @@ static void iter_sfloat_add(na_loop_t* const lp) {
1927
1718
  GET_DATA_STRIDE(p1, s1, dtype, x);
1928
1719
  GET_DATA_STRIDE(p2, s2, dtype, y);
1929
1720
  check_intdivzero(y);
1930
- z = m_add(x, y);
1721
+ z = m_copysign(x, y);
1931
1722
  SET_DATA_STRIDE(p3, s3, dtype, z);
1932
1723
  }
1933
1724
  //
1934
1725
  }
1935
1726
  #undef check_intdivzero
1936
1727
 
1937
- static VALUE sfloat_add_self(VALUE self, VALUE other) {
1728
+ static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
1938
1729
  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1939
1730
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1940
- ndfunc_t ndf = { iter_sfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
1731
+ ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
1941
1732
 
1942
1733
  return na_ndloop(&ndf, 2, self, other);
1943
1734
  }
1944
1735
 
1945
- static VALUE sfloat_add(VALUE self, VALUE other) {
1736
+ static VALUE sfloat_copysign(VALUE self, VALUE other) {
1946
1737
 
1947
1738
  VALUE klass, v;
1948
1739
 
1949
1740
  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
1950
1741
  if (klass == cT) {
1951
- return sfloat_add_self(self, other);
1742
+ return sfloat_copysign_self(self, other);
1952
1743
  } else {
1953
1744
  v = rb_funcall(klass, id_cast, 1, self);
1954
- return rb_funcall(v, '+', 1, other);
1745
+ return rb_funcall(v, id_copysign, 1, other);
1955
1746
  }
1956
1747
  }
1957
1748
 
1958
- #define check_intdivzero(y) \
1959
- {}
1960
-
1961
- static void iter_sfloat_sub(na_loop_t* const lp) {
1962
- size_t i = 0;
1963
- size_t n;
1964
- char *p1, *p2, *p3;
1965
- ssize_t s1, s2, s3;
1749
+ static void iter_sfloat_signbit(na_loop_t* const lp) {
1750
+ size_t i;
1751
+ char* p1;
1752
+ BIT_DIGIT* a2;
1753
+ size_t p2;
1754
+ ssize_t s1, s2;
1755
+ size_t* idx1;
1756
+ dtype x;
1757
+ BIT_DIGIT b;
1758
+ INIT_COUNTER(lp, i);
1759
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1760
+ INIT_PTR_BIT(lp, 1, a2, p2, s2);
1761
+ if (idx1) {
1762
+ for (; i--;) {
1763
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1764
+ b = (m_signbit(x)) ? 1 : 0;
1765
+ STORE_BIT(a2, p2, b);
1766
+ p2 += s2;
1767
+ }
1768
+ } else {
1769
+ for (; i--;) {
1770
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1771
+ b = (m_signbit(x)) ? 1 : 0;
1772
+ STORE_BIT(a2, p2, b);
1773
+ p2 += s2;
1774
+ }
1775
+ }
1776
+ }
1966
1777
 
1967
- #ifdef __SSE2__
1968
- size_t cnt;
1969
- size_t cnt_simd_loop = -1;
1778
+ static VALUE sfloat_signbit(VALUE self) {
1779
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
1780
+ ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
1781
+ ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
1970
1782
 
1971
- __m128 a;
1972
- __m128 b;
1783
+ return na_ndloop(&ndf, 1, self);
1784
+ }
1973
1785
 
1974
- size_t num_pack; // Number of elements packed for SIMD.
1975
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1976
- #endif
1977
- INIT_COUNTER(lp, n);
1786
+ static void iter_sfloat_modf(na_loop_t* const lp) {
1787
+ size_t i;
1788
+ char *p1, *p2, *p3;
1789
+ ssize_t s1, s2, s3;
1790
+ dtype x, y, z;
1791
+ INIT_COUNTER(lp, i);
1978
1792
  INIT_PTR(lp, 0, p1, s1);
1979
1793
  INIT_PTR(lp, 1, p2, s2);
1980
1794
  INIT_PTR(lp, 2, p3, s3);
1795
+ for (; i--;) {
1796
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1797
+ m_modf(x, y, z);
1798
+ SET_DATA_STRIDE(p2, s2, dtype, y);
1799
+ SET_DATA_STRIDE(p3, s3, dtype, z);
1800
+ }
1801
+ }
1981
1802
 
1982
- //
1983
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1984
- is_aligned(p3, sizeof(dtype))) {
1985
-
1986
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1987
- #ifdef __SSE2__
1988
- // Check number of elements. & Check same alignment.
1989
- if ((n >= num_pack) &&
1990
- is_same_aligned3(
1991
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1992
- )) {
1993
- // Calculate up to the position just before the start of SIMD computation.
1994
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1995
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1996
- );
1997
- #endif
1998
- if (p1 == p3) { // inplace case
1999
- #ifdef __SSE2__
2000
- for (; i < cnt; i++) {
2001
- #else
2002
- for (; i < n; i++) {
2003
- check_intdivzero(((dtype*)p2)[i]);
2004
- #endif
2005
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2006
- }
2007
- } else {
2008
- #ifdef __SSE2__
2009
- for (; i < cnt; i++) {
2010
- #else
2011
- for (; i < n; i++) {
2012
- check_intdivzero(((dtype*)p2)[i]);
2013
- #endif
2014
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2015
- }
2016
- }
2017
-
2018
- #ifdef __SSE2__
2019
- // Get the count of SIMD computation loops.
2020
- cnt_simd_loop = (n - i) % num_pack;
2021
-
2022
- // SIMD computation.
2023
- if (p1 == p3) { // inplace case
2024
- for (; i < n - cnt_simd_loop; i += num_pack) {
2025
- a = _mm_load_ps(&((dtype*)p1)[i]);
2026
- b = _mm_load_ps(&((dtype*)p2)[i]);
2027
- a = _mm_sub_ps(a, b);
2028
- _mm_store_ps(&((dtype*)p1)[i], a);
2029
- }
2030
- } else {
2031
- for (; i < n - cnt_simd_loop; i += num_pack) {
2032
- a = _mm_load_ps(&((dtype*)p1)[i]);
2033
- b = _mm_load_ps(&((dtype*)p2)[i]);
2034
- a = _mm_sub_ps(a, b);
2035
- _mm_stream_ps(&((dtype*)p3)[i], a);
2036
- }
2037
- }
2038
- }
2039
-
2040
- // Compute the remainder of the SIMD operation.
2041
- if (cnt_simd_loop != 0) {
2042
- if (p1 == p3) { // inplace case
2043
- for (; i < n; i++) {
2044
- check_intdivzero(((dtype*)p2)[i]);
2045
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2046
- }
2047
- } else {
2048
- for (; i < n; i++) {
2049
- check_intdivzero(((dtype*)p2)[i]);
2050
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2051
- }
2052
- }
2053
- }
2054
- #endif
2055
- return;
2056
- }
2057
-
2058
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2059
- is_aligned_step(s3, sizeof(dtype))) {
2060
- //
2061
-
2062
- if (s2 == 0) { // Broadcasting from scalar value.
2063
- check_intdivzero(*(dtype*)p2);
2064
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2065
- #ifdef __SSE2__
2066
- // Broadcast a scalar value and use it for SIMD computation.
2067
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2068
-
2069
- // Check number of elements. & Check same alignment.
2070
- if ((n >= num_pack) &&
2071
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2072
- // Calculate up to the position just before the start of SIMD computation.
2073
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2074
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2075
- );
2076
- #endif
2077
- if (p1 == p3) { // inplace case
2078
- #ifdef __SSE2__
2079
- for (; i < cnt; i++) {
2080
- #else
2081
- for (; i < n; i++) {
2082
- #endif
2083
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2084
- }
2085
- } else {
2086
- #ifdef __SSE2__
2087
- for (; i < cnt; i++) {
2088
- #else
2089
- for (; i < n; i++) {
2090
- #endif
2091
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2092
- }
2093
- }
2094
-
2095
- #ifdef __SSE2__
2096
- // Get the count of SIMD computation loops.
2097
- cnt_simd_loop = (n - i) % num_pack;
2098
-
2099
- // SIMD computation.
2100
- if (p1 == p3) { // inplace case
2101
- for (; i < n - cnt_simd_loop; i += num_pack) {
2102
- a = _mm_load_ps(&((dtype*)p1)[i]);
2103
- a = _mm_sub_ps(a, b);
2104
- _mm_store_ps(&((dtype*)p1)[i], a);
2105
- }
2106
- } else {
2107
- for (; i < n - cnt_simd_loop; i += num_pack) {
2108
- a = _mm_load_ps(&((dtype*)p1)[i]);
2109
- a = _mm_sub_ps(a, b);
2110
- _mm_stream_ps(&((dtype*)p3)[i], a);
2111
- }
2112
- }
2113
- }
2114
-
2115
- // Compute the remainder of the SIMD operation.
2116
- if (cnt_simd_loop != 0) {
2117
- if (p1 == p3) { // inplace case
2118
- for (; i < n; i++) {
2119
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2120
- }
2121
- } else {
2122
- for (; i < n; i++) {
2123
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2124
- }
2125
- }
2126
- }
2127
- #endif
2128
- } else {
2129
- for (i = 0; i < n; i++) {
2130
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2131
- p1 += s1;
2132
- p3 += s3;
2133
- }
2134
- }
2135
- } else {
2136
- if (p1 == p3) { // inplace case
2137
- for (i = 0; i < n; i++) {
2138
- check_intdivzero(*(dtype*)p2);
2139
- *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
2140
- p1 += s1;
2141
- p2 += s2;
2142
- }
2143
- } else {
2144
- for (i = 0; i < n; i++) {
2145
- check_intdivzero(*(dtype*)p2);
2146
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2147
- p1 += s1;
2148
- p2 += s2;
2149
- p3 += s3;
2150
- }
2151
- }
2152
- }
2153
-
2154
- return;
2155
- //
2156
- }
2157
- }
2158
- for (i = 0; i < n; i++) {
2159
- dtype x, y, z;
2160
- GET_DATA_STRIDE(p1, s1, dtype, x);
2161
- GET_DATA_STRIDE(p2, s2, dtype, y);
2162
- check_intdivzero(y);
2163
- z = m_sub(x, y);
2164
- SET_DATA_STRIDE(p3, s3, dtype, z);
2165
- }
2166
- //
2167
- }
2168
- #undef check_intdivzero
2169
-
2170
- static VALUE sfloat_sub_self(VALUE self, VALUE other) {
2171
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2172
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2173
- ndfunc_t ndf = { iter_sfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
2174
-
2175
- return na_ndloop(&ndf, 2, self, other);
2176
- }
2177
-
2178
- static VALUE sfloat_sub(VALUE self, VALUE other) {
2179
-
2180
- VALUE klass, v;
2181
-
2182
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2183
- if (klass == cT) {
2184
- return sfloat_sub_self(self, other);
2185
- } else {
2186
- v = rb_funcall(klass, id_cast, 1, self);
2187
- return rb_funcall(v, '-', 1, other);
2188
- }
2189
- }
2190
-
2191
- #define check_intdivzero(y) \
2192
- {}
2193
-
2194
- static void iter_sfloat_mul(na_loop_t* const lp) {
2195
- size_t i = 0;
2196
- size_t n;
2197
- char *p1, *p2, *p3;
2198
- ssize_t s1, s2, s3;
2199
-
2200
- #ifdef __SSE2__
2201
- size_t cnt;
2202
- size_t cnt_simd_loop = -1;
2203
-
2204
- __m128 a;
2205
- __m128 b;
2206
-
2207
- size_t num_pack; // Number of elements packed for SIMD.
2208
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2209
- #endif
2210
- INIT_COUNTER(lp, n);
2211
- INIT_PTR(lp, 0, p1, s1);
2212
- INIT_PTR(lp, 1, p2, s2);
2213
- INIT_PTR(lp, 2, p3, s3);
2214
-
2215
- //
2216
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2217
- is_aligned(p3, sizeof(dtype))) {
2218
-
2219
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2220
- #ifdef __SSE2__
2221
- // Check number of elements. & Check same alignment.
2222
- if ((n >= num_pack) &&
2223
- is_same_aligned3(
2224
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2225
- )) {
2226
- // Calculate up to the position just before the start of SIMD computation.
2227
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2228
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2229
- );
2230
- #endif
2231
- if (p1 == p3) { // inplace case
2232
- #ifdef __SSE2__
2233
- for (; i < cnt; i++) {
2234
- #else
2235
- for (; i < n; i++) {
2236
- check_intdivzero(((dtype*)p2)[i]);
2237
- #endif
2238
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2239
- }
2240
- } else {
2241
- #ifdef __SSE2__
2242
- for (; i < cnt; i++) {
2243
- #else
2244
- for (; i < n; i++) {
2245
- check_intdivzero(((dtype*)p2)[i]);
2246
- #endif
2247
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2248
- }
2249
- }
2250
-
2251
- #ifdef __SSE2__
2252
- // Get the count of SIMD computation loops.
2253
- cnt_simd_loop = (n - i) % num_pack;
2254
-
2255
- // SIMD computation.
2256
- if (p1 == p3) { // inplace case
2257
- for (; i < n - cnt_simd_loop; i += num_pack) {
2258
- a = _mm_load_ps(&((dtype*)p1)[i]);
2259
- b = _mm_load_ps(&((dtype*)p2)[i]);
2260
- a = _mm_mul_ps(a, b);
2261
- _mm_store_ps(&((dtype*)p1)[i], a);
2262
- }
2263
- } else {
2264
- for (; i < n - cnt_simd_loop; i += num_pack) {
2265
- a = _mm_load_ps(&((dtype*)p1)[i]);
2266
- b = _mm_load_ps(&((dtype*)p2)[i]);
2267
- a = _mm_mul_ps(a, b);
2268
- _mm_stream_ps(&((dtype*)p3)[i], a);
2269
- }
2270
- }
2271
- }
2272
-
2273
- // Compute the remainder of the SIMD operation.
2274
- if (cnt_simd_loop != 0) {
2275
- if (p1 == p3) { // inplace case
2276
- for (; i < n; i++) {
2277
- check_intdivzero(((dtype*)p2)[i]);
2278
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2279
- }
2280
- } else {
2281
- for (; i < n; i++) {
2282
- check_intdivzero(((dtype*)p2)[i]);
2283
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2284
- }
2285
- }
2286
- }
2287
- #endif
2288
- return;
2289
- }
2290
-
2291
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2292
- is_aligned_step(s3, sizeof(dtype))) {
2293
- //
2294
-
2295
- if (s2 == 0) { // Broadcasting from scalar value.
2296
- check_intdivzero(*(dtype*)p2);
2297
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2298
- #ifdef __SSE2__
2299
- // Broadcast a scalar value and use it for SIMD computation.
2300
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2301
-
2302
- // Check number of elements. & Check same alignment.
2303
- if ((n >= num_pack) &&
2304
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2305
- // Calculate up to the position just before the start of SIMD computation.
2306
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2307
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2308
- );
2309
- #endif
2310
- if (p1 == p3) { // inplace case
2311
- #ifdef __SSE2__
2312
- for (; i < cnt; i++) {
2313
- #else
2314
- for (; i < n; i++) {
2315
- #endif
2316
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2317
- }
2318
- } else {
2319
- #ifdef __SSE2__
2320
- for (; i < cnt; i++) {
2321
- #else
2322
- for (; i < n; i++) {
2323
- #endif
2324
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2325
- }
2326
- }
2327
-
2328
- #ifdef __SSE2__
2329
- // Get the count of SIMD computation loops.
2330
- cnt_simd_loop = (n - i) % num_pack;
2331
-
2332
- // SIMD computation.
2333
- if (p1 == p3) { // inplace case
2334
- for (; i < n - cnt_simd_loop; i += num_pack) {
2335
- a = _mm_load_ps(&((dtype*)p1)[i]);
2336
- a = _mm_mul_ps(a, b);
2337
- _mm_store_ps(&((dtype*)p1)[i], a);
2338
- }
2339
- } else {
2340
- for (; i < n - cnt_simd_loop; i += num_pack) {
2341
- a = _mm_load_ps(&((dtype*)p1)[i]);
2342
- a = _mm_mul_ps(a, b);
2343
- _mm_stream_ps(&((dtype*)p3)[i], a);
2344
- }
2345
- }
2346
- }
2347
-
2348
- // Compute the remainder of the SIMD operation.
2349
- if (cnt_simd_loop != 0) {
2350
- if (p1 == p3) { // inplace case
2351
- for (; i < n; i++) {
2352
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2353
- }
2354
- } else {
2355
- for (; i < n; i++) {
2356
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2357
- }
2358
- }
2359
- }
2360
- #endif
2361
- } else {
2362
- for (i = 0; i < n; i++) {
2363
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2364
- p1 += s1;
2365
- p3 += s3;
2366
- }
2367
- }
2368
- } else {
2369
- if (p1 == p3) { // inplace case
2370
- for (i = 0; i < n; i++) {
2371
- check_intdivzero(*(dtype*)p2);
2372
- *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
2373
- p1 += s1;
2374
- p2 += s2;
2375
- }
2376
- } else {
2377
- for (i = 0; i < n; i++) {
2378
- check_intdivzero(*(dtype*)p2);
2379
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2380
- p1 += s1;
2381
- p2 += s2;
2382
- p3 += s3;
2383
- }
2384
- }
2385
- }
2386
-
2387
- return;
2388
- //
2389
- }
2390
- }
2391
- for (i = 0; i < n; i++) {
2392
- dtype x, y, z;
2393
- GET_DATA_STRIDE(p1, s1, dtype, x);
2394
- GET_DATA_STRIDE(p2, s2, dtype, y);
2395
- check_intdivzero(y);
2396
- z = m_mul(x, y);
2397
- SET_DATA_STRIDE(p3, s3, dtype, z);
2398
- }
2399
- //
2400
- }
2401
- #undef check_intdivzero
2402
-
2403
- static VALUE sfloat_mul_self(VALUE self, VALUE other) {
2404
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2405
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2406
- ndfunc_t ndf = { iter_sfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
2407
-
2408
- return na_ndloop(&ndf, 2, self, other);
2409
- }
2410
-
2411
- static VALUE sfloat_mul(VALUE self, VALUE other) {
2412
-
2413
- VALUE klass, v;
2414
-
2415
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2416
- if (klass == cT) {
2417
- return sfloat_mul_self(self, other);
2418
- } else {
2419
- v = rb_funcall(klass, id_cast, 1, self);
2420
- return rb_funcall(v, '*', 1, other);
2421
- }
2422
- }
2423
-
2424
- #define check_intdivzero(y) \
2425
- {}
2426
-
2427
- static void iter_sfloat_div(na_loop_t* const lp) {
2428
- size_t i = 0;
2429
- size_t n;
2430
- char *p1, *p2, *p3;
2431
- ssize_t s1, s2, s3;
2432
-
2433
- #ifdef __SSE2__
2434
- size_t cnt;
2435
- size_t cnt_simd_loop = -1;
2436
-
2437
- __m128 a;
2438
- __m128 b;
2439
-
2440
- size_t num_pack; // Number of elements packed for SIMD.
2441
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2442
- #endif
2443
- INIT_COUNTER(lp, n);
2444
- INIT_PTR(lp, 0, p1, s1);
2445
- INIT_PTR(lp, 1, p2, s2);
2446
- INIT_PTR(lp, 2, p3, s3);
2447
-
2448
- //
2449
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2450
- is_aligned(p3, sizeof(dtype))) {
2451
-
2452
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2453
- #ifdef __SSE2__
2454
- // Check number of elements. & Check same alignment.
2455
- if ((n >= num_pack) &&
2456
- is_same_aligned3(
2457
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2458
- )) {
2459
- // Calculate up to the position just before the start of SIMD computation.
2460
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2461
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2462
- );
2463
- #endif
2464
- if (p1 == p3) { // inplace case
2465
- #ifdef __SSE2__
2466
- for (; i < cnt; i++) {
2467
- #else
2468
- for (; i < n; i++) {
2469
- check_intdivzero(((dtype*)p2)[i]);
2470
- #endif
2471
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2472
- }
2473
- } else {
2474
- #ifdef __SSE2__
2475
- for (; i < cnt; i++) {
2476
- #else
2477
- for (; i < n; i++) {
2478
- check_intdivzero(((dtype*)p2)[i]);
2479
- #endif
2480
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2481
- }
2482
- }
2483
-
2484
- #ifdef __SSE2__
2485
- // Get the count of SIMD computation loops.
2486
- cnt_simd_loop = (n - i) % num_pack;
2487
-
2488
- // SIMD computation.
2489
- if (p1 == p3) { // inplace case
2490
- for (; i < n - cnt_simd_loop; i += num_pack) {
2491
- a = _mm_load_ps(&((dtype*)p1)[i]);
2492
- b = _mm_load_ps(&((dtype*)p2)[i]);
2493
- a = _mm_div_ps(a, b);
2494
- _mm_store_ps(&((dtype*)p1)[i], a);
2495
- }
2496
- } else {
2497
- for (; i < n - cnt_simd_loop; i += num_pack) {
2498
- a = _mm_load_ps(&((dtype*)p1)[i]);
2499
- b = _mm_load_ps(&((dtype*)p2)[i]);
2500
- a = _mm_div_ps(a, b);
2501
- _mm_stream_ps(&((dtype*)p3)[i], a);
2502
- }
2503
- }
2504
- }
2505
-
2506
- // Compute the remainder of the SIMD operation.
2507
- if (cnt_simd_loop != 0) {
2508
- if (p1 == p3) { // inplace case
2509
- for (; i < n; i++) {
2510
- check_intdivzero(((dtype*)p2)[i]);
2511
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2512
- }
2513
- } else {
2514
- for (; i < n; i++) {
2515
- check_intdivzero(((dtype*)p2)[i]);
2516
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2517
- }
2518
- }
2519
- }
2520
- #endif
2521
- return;
2522
- }
2523
-
2524
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2525
- is_aligned_step(s3, sizeof(dtype))) {
2526
- //
2527
-
2528
- if (s2 == 0) { // Broadcasting from scalar value.
2529
- check_intdivzero(*(dtype*)p2);
2530
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2531
- #ifdef __SSE2__
2532
- // Broadcast a scalar value and use it for SIMD computation.
2533
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2534
-
2535
- // Check number of elements. & Check same alignment.
2536
- if ((n >= num_pack) &&
2537
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2538
- // Calculate up to the position just before the start of SIMD computation.
2539
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2540
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2541
- );
2542
- #endif
2543
- if (p1 == p3) { // inplace case
2544
- #ifdef __SSE2__
2545
- for (; i < cnt; i++) {
2546
- #else
2547
- for (; i < n; i++) {
2548
- #endif
2549
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2550
- }
2551
- } else {
2552
- #ifdef __SSE2__
2553
- for (; i < cnt; i++) {
2554
- #else
2555
- for (; i < n; i++) {
2556
- #endif
2557
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2558
- }
2559
- }
2560
-
2561
- #ifdef __SSE2__
2562
- // Get the count of SIMD computation loops.
2563
- cnt_simd_loop = (n - i) % num_pack;
2564
-
2565
- // SIMD computation.
2566
- if (p1 == p3) { // inplace case
2567
- for (; i < n - cnt_simd_loop; i += num_pack) {
2568
- a = _mm_load_ps(&((dtype*)p1)[i]);
2569
- a = _mm_div_ps(a, b);
2570
- _mm_store_ps(&((dtype*)p1)[i], a);
2571
- }
2572
- } else {
2573
- for (; i < n - cnt_simd_loop; i += num_pack) {
2574
- a = _mm_load_ps(&((dtype*)p1)[i]);
2575
- a = _mm_div_ps(a, b);
2576
- _mm_stream_ps(&((dtype*)p3)[i], a);
2577
- }
2578
- }
2579
- }
2580
-
2581
- // Compute the remainder of the SIMD operation.
2582
- if (cnt_simd_loop != 0) {
2583
- if (p1 == p3) { // inplace case
2584
- for (; i < n; i++) {
2585
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2586
- }
2587
- } else {
2588
- for (; i < n; i++) {
2589
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2590
- }
2591
- }
2592
- }
2593
- #endif
2594
- } else {
2595
- for (i = 0; i < n; i++) {
2596
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2597
- p1 += s1;
2598
- p3 += s3;
2599
- }
2600
- }
2601
- } else {
2602
- if (p1 == p3) { // inplace case
2603
- for (i = 0; i < n; i++) {
2604
- check_intdivzero(*(dtype*)p2);
2605
- *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
2606
- p1 += s1;
2607
- p2 += s2;
2608
- }
2609
- } else {
2610
- for (i = 0; i < n; i++) {
2611
- check_intdivzero(*(dtype*)p2);
2612
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2613
- p1 += s1;
2614
- p2 += s2;
2615
- p3 += s3;
2616
- }
2617
- }
2618
- }
2619
-
2620
- return;
2621
- //
2622
- }
2623
- }
2624
- for (i = 0; i < n; i++) {
2625
- dtype x, y, z;
2626
- GET_DATA_STRIDE(p1, s1, dtype, x);
2627
- GET_DATA_STRIDE(p2, s2, dtype, y);
2628
- check_intdivzero(y);
2629
- z = m_div(x, y);
2630
- SET_DATA_STRIDE(p3, s3, dtype, z);
2631
- }
2632
- //
2633
- }
2634
- #undef check_intdivzero
2635
-
2636
- static VALUE sfloat_div_self(VALUE self, VALUE other) {
2637
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2638
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2639
- ndfunc_t ndf = { iter_sfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
2640
-
2641
- return na_ndloop(&ndf, 2, self, other);
2642
- }
2643
-
2644
- static VALUE sfloat_div(VALUE self, VALUE other) {
2645
-
2646
- VALUE klass, v;
2647
-
2648
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2649
- if (klass == cT) {
2650
- return sfloat_div_self(self, other);
2651
- } else {
2652
- v = rb_funcall(klass, id_cast, 1, self);
2653
- return rb_funcall(v, '/', 1, other);
2654
- }
2655
- }
2656
-
2657
- #define check_intdivzero(y) \
2658
- {}
2659
-
2660
- static void iter_sfloat_mod(na_loop_t* const lp) {
2661
- size_t i = 0;
2662
- size_t n;
2663
- char *p1, *p2, *p3;
2664
- ssize_t s1, s2, s3;
2665
-
2666
- INIT_COUNTER(lp, n);
2667
- INIT_PTR(lp, 0, p1, s1);
2668
- INIT_PTR(lp, 1, p2, s2);
2669
- INIT_PTR(lp, 2, p3, s3);
2670
-
2671
- //
2672
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2673
- is_aligned(p3, sizeof(dtype))) {
2674
-
2675
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2676
- if (p1 == p3) { // inplace case
2677
- for (; i < n; i++) {
2678
- check_intdivzero(((dtype*)p2)[i]);
2679
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2680
- }
2681
- } else {
2682
- for (; i < n; i++) {
2683
- check_intdivzero(((dtype*)p2)[i]);
2684
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2685
- }
2686
- }
2687
- return;
2688
- }
2689
-
2690
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2691
- is_aligned_step(s3, sizeof(dtype))) {
2692
- //
2693
-
2694
- if (s2 == 0) { // Broadcasting from scalar value.
2695
- check_intdivzero(*(dtype*)p2);
2696
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2697
- if (p1 == p3) { // inplace case
2698
- for (; i < n; i++) {
2699
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2700
- }
2701
- } else {
2702
- for (; i < n; i++) {
2703
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2704
- }
2705
- }
2706
- } else {
2707
- for (i = 0; i < n; i++) {
2708
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2709
- p1 += s1;
2710
- p3 += s3;
2711
- }
2712
- }
2713
- } else {
2714
- if (p1 == p3) { // inplace case
2715
- for (i = 0; i < n; i++) {
2716
- check_intdivzero(*(dtype*)p2);
2717
- *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
2718
- p1 += s1;
2719
- p2 += s2;
2720
- }
2721
- } else {
2722
- for (i = 0; i < n; i++) {
2723
- check_intdivzero(*(dtype*)p2);
2724
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2725
- p1 += s1;
2726
- p2 += s2;
2727
- p3 += s3;
2728
- }
2729
- }
2730
- }
2731
-
2732
- return;
2733
- //
2734
- }
2735
- }
2736
- for (i = 0; i < n; i++) {
2737
- dtype x, y, z;
2738
- GET_DATA_STRIDE(p1, s1, dtype, x);
2739
- GET_DATA_STRIDE(p2, s2, dtype, y);
2740
- check_intdivzero(y);
2741
- z = m_mod(x, y);
2742
- SET_DATA_STRIDE(p3, s3, dtype, z);
2743
- }
2744
- //
2745
- }
2746
- #undef check_intdivzero
2747
-
2748
- static VALUE sfloat_mod_self(VALUE self, VALUE other) {
2749
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2750
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2751
- ndfunc_t ndf = { iter_sfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
2752
-
2753
- return na_ndloop(&ndf, 2, self, other);
2754
- }
2755
-
2756
- static VALUE sfloat_mod(VALUE self, VALUE other) {
2757
-
2758
- VALUE klass, v;
2759
-
2760
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2761
- if (klass == cT) {
2762
- return sfloat_mod_self(self, other);
2763
- } else {
2764
- v = rb_funcall(klass, id_cast, 1, self);
2765
- return rb_funcall(v, '%', 1, other);
2766
- }
2767
- }
2768
-
2769
- static void iter_sfloat_divmod(na_loop_t* const lp) {
2770
- size_t i, n;
2771
- char *p1, *p2, *p3, *p4;
2772
- ssize_t s1, s2, s3, s4;
2773
- dtype x, y, a, b;
2774
- INIT_COUNTER(lp, n);
2775
- INIT_PTR(lp, 0, p1, s1);
2776
- INIT_PTR(lp, 1, p2, s2);
2777
- INIT_PTR(lp, 2, p3, s3);
2778
- INIT_PTR(lp, 3, p4, s4);
2779
- for (i = n; i--;) {
2780
- GET_DATA_STRIDE(p1, s1, dtype, x);
2781
- GET_DATA_STRIDE(p2, s2, dtype, y);
2782
- m_divmod(x, y, a, b);
2783
- SET_DATA_STRIDE(p3, s3, dtype, a);
2784
- SET_DATA_STRIDE(p4, s4, dtype, b);
2785
- }
2786
- }
2787
-
2788
- static VALUE sfloat_divmod_self(VALUE self, VALUE other) {
2789
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2790
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2791
- ndfunc_t ndf = { iter_sfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
2792
-
2793
- return na_ndloop(&ndf, 2, self, other);
2794
- }
2795
-
2796
- static VALUE sfloat_divmod(VALUE self, VALUE other) {
2797
-
2798
- VALUE klass, v;
2799
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2800
- if (klass == cT) {
2801
- return sfloat_divmod_self(self, other);
2802
- } else {
2803
- v = rb_funcall(klass, id_cast, 1, self);
2804
- return rb_funcall(v, id_divmod, 1, other);
2805
- }
2806
- }
2807
-
2808
- static void iter_sfloat_pow(na_loop_t* const lp) {
2809
- size_t i;
2810
- char *p1, *p2, *p3;
2811
- ssize_t s1, s2, s3;
2812
- dtype x, y;
2813
- INIT_COUNTER(lp, i);
2814
- INIT_PTR(lp, 0, p1, s1);
2815
- INIT_PTR(lp, 1, p2, s2);
2816
- INIT_PTR(lp, 2, p3, s3);
2817
- for (; i--;) {
2818
- GET_DATA_STRIDE(p1, s1, dtype, x);
2819
- GET_DATA_STRIDE(p2, s2, dtype, y);
2820
- x = m_pow(x, y);
2821
- SET_DATA_STRIDE(p3, s3, dtype, x);
2822
- }
2823
- }
2824
-
2825
- static void iter_sfloat_pow_int32(na_loop_t* const lp) {
2826
- size_t i;
2827
- char *p1, *p2, *p3;
2828
- ssize_t s1, s2, s3;
2829
- dtype x;
2830
- int32_t y;
2831
- INIT_COUNTER(lp, i);
2832
- INIT_PTR(lp, 0, p1, s1);
2833
- INIT_PTR(lp, 1, p2, s2);
2834
- INIT_PTR(lp, 2, p3, s3);
2835
- for (; i--;) {
2836
- GET_DATA_STRIDE(p1, s1, dtype, x);
2837
- GET_DATA_STRIDE(p2, s2, int32_t, y);
2838
- x = m_pow_int(x, y);
2839
- SET_DATA_STRIDE(p3, s3, dtype, x);
2840
- }
2841
- }
2842
-
2843
- static VALUE sfloat_pow_self(VALUE self, VALUE other) {
2844
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2845
- ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
2846
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2847
- ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
2848
- ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
2849
-
2850
- // fixme : use na.integer?
2851
- if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
2852
- return na_ndloop(&ndf_i, 2, self, other);
2853
- } else {
2854
- return na_ndloop(&ndf, 2, self, other);
2855
- }
2856
- }
2857
-
2858
- static VALUE sfloat_pow(VALUE self, VALUE other) {
2859
-
2860
- VALUE klass, v;
2861
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2862
- if (klass == cT) {
2863
- return sfloat_pow_self(self, other);
2864
- } else {
2865
- v = rb_funcall(klass, id_cast, 1, self);
2866
- return rb_funcall(v, id_pow, 1, other);
2867
- }
2868
- }
2869
-
2870
- static void iter_sfloat_minus(na_loop_t* const lp) {
2871
- size_t i, n;
2872
- char *p1, *p2;
2873
- ssize_t s1, s2;
2874
- size_t *idx1, *idx2;
2875
- dtype x;
2876
-
2877
- INIT_COUNTER(lp, n);
2878
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2879
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2880
-
2881
- if (idx1) {
2882
- if (idx2) {
2883
- for (i = 0; i < n; i++) {
2884
- GET_DATA_INDEX(p1, idx1, dtype, x);
2885
- x = m_minus(x);
2886
- SET_DATA_INDEX(p2, idx2, dtype, x);
2887
- }
2888
- } else {
2889
- for (i = 0; i < n; i++) {
2890
- GET_DATA_INDEX(p1, idx1, dtype, x);
2891
- x = m_minus(x);
2892
- SET_DATA_STRIDE(p2, s2, dtype, x);
2893
- }
2894
- }
2895
- } else {
2896
- if (idx2) {
2897
- for (i = 0; i < n; i++) {
2898
- GET_DATA_STRIDE(p1, s1, dtype, x);
2899
- x = m_minus(x);
2900
- SET_DATA_INDEX(p2, idx2, dtype, x);
2901
- }
2902
- } else {
2903
- //
2904
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2905
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2906
- for (i = 0; i < n; i++) {
2907
- ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2908
- }
2909
- return;
2910
- }
2911
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2912
- //
2913
- for (i = 0; i < n; i++) {
2914
- *(dtype*)p2 = m_minus(*(dtype*)p1);
2915
- p1 += s1;
2916
- p2 += s2;
2917
- }
2918
- return;
2919
- //
2920
- }
2921
- }
2922
- for (i = 0; i < n; i++) {
2923
- GET_DATA_STRIDE(p1, s1, dtype, x);
2924
- x = m_minus(x);
2925
- SET_DATA_STRIDE(p2, s2, dtype, x);
2926
- }
2927
- //
2928
- }
2929
- }
2930
- }
2931
-
2932
- static VALUE sfloat_minus(VALUE self) {
2933
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2934
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2935
- ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2936
-
2937
- return na_ndloop(&ndf, 1, self);
2938
- }
2939
-
2940
- static void iter_sfloat_reciprocal(na_loop_t* const lp) {
2941
- size_t i, n;
2942
- char *p1, *p2;
2943
- ssize_t s1, s2;
2944
- size_t *idx1, *idx2;
2945
- dtype x;
2946
-
2947
- INIT_COUNTER(lp, n);
2948
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2949
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2950
-
2951
- if (idx1) {
2952
- if (idx2) {
2953
- for (i = 0; i < n; i++) {
2954
- GET_DATA_INDEX(p1, idx1, dtype, x);
2955
- x = m_reciprocal(x);
2956
- SET_DATA_INDEX(p2, idx2, dtype, x);
2957
- }
2958
- } else {
2959
- for (i = 0; i < n; i++) {
2960
- GET_DATA_INDEX(p1, idx1, dtype, x);
2961
- x = m_reciprocal(x);
2962
- SET_DATA_STRIDE(p2, s2, dtype, x);
2963
- }
2964
- }
2965
- } else {
2966
- if (idx2) {
2967
- for (i = 0; i < n; i++) {
2968
- GET_DATA_STRIDE(p1, s1, dtype, x);
2969
- x = m_reciprocal(x);
2970
- SET_DATA_INDEX(p2, idx2, dtype, x);
2971
- }
2972
- } else {
2973
- //
2974
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2975
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2976
- for (i = 0; i < n; i++) {
2977
- ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2978
- }
2979
- return;
2980
- }
2981
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2982
- //
2983
- for (i = 0; i < n; i++) {
2984
- *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
2985
- p1 += s1;
2986
- p2 += s2;
2987
- }
2988
- return;
2989
- //
2990
- }
2991
- }
2992
- for (i = 0; i < n; i++) {
2993
- GET_DATA_STRIDE(p1, s1, dtype, x);
2994
- x = m_reciprocal(x);
2995
- SET_DATA_STRIDE(p2, s2, dtype, x);
2996
- }
2997
- //
2998
- }
2999
- }
3000
- }
3001
-
3002
- static VALUE sfloat_reciprocal(VALUE self) {
3003
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3004
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3005
- ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
3006
-
3007
- return na_ndloop(&ndf, 1, self);
3008
- }
3009
-
3010
- static void iter_sfloat_sign(na_loop_t* const lp) {
3011
- size_t i, n;
3012
- char *p1, *p2;
3013
- ssize_t s1, s2;
3014
- size_t *idx1, *idx2;
3015
- dtype x;
3016
-
3017
- INIT_COUNTER(lp, n);
3018
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3019
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3020
-
3021
- if (idx1) {
3022
- if (idx2) {
3023
- for (i = 0; i < n; i++) {
3024
- GET_DATA_INDEX(p1, idx1, dtype, x);
3025
- x = m_sign(x);
3026
- SET_DATA_INDEX(p2, idx2, dtype, x);
3027
- }
3028
- } else {
3029
- for (i = 0; i < n; i++) {
3030
- GET_DATA_INDEX(p1, idx1, dtype, x);
3031
- x = m_sign(x);
3032
- SET_DATA_STRIDE(p2, s2, dtype, x);
3033
- }
3034
- }
3035
- } else {
3036
- if (idx2) {
3037
- for (i = 0; i < n; i++) {
3038
- GET_DATA_STRIDE(p1, s1, dtype, x);
3039
- x = m_sign(x);
3040
- SET_DATA_INDEX(p2, idx2, dtype, x);
3041
- }
3042
- } else {
3043
- //
3044
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3045
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3046
- for (i = 0; i < n; i++) {
3047
- ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
3048
- }
3049
- return;
3050
- }
3051
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3052
- //
3053
- for (i = 0; i < n; i++) {
3054
- *(dtype*)p2 = m_sign(*(dtype*)p1);
3055
- p1 += s1;
3056
- p2 += s2;
3057
- }
3058
- return;
3059
- //
3060
- }
3061
- }
3062
- for (i = 0; i < n; i++) {
3063
- GET_DATA_STRIDE(p1, s1, dtype, x);
3064
- x = m_sign(x);
3065
- SET_DATA_STRIDE(p2, s2, dtype, x);
3066
- }
3067
- //
3068
- }
3069
- }
3070
- }
3071
-
3072
- static VALUE sfloat_sign(VALUE self) {
3073
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3074
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3075
- ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3076
-
3077
- return na_ndloop(&ndf, 1, self);
3078
- }
3079
-
3080
- static void iter_sfloat_square(na_loop_t* const lp) {
3081
- size_t i, n;
3082
- char *p1, *p2;
3083
- ssize_t s1, s2;
3084
- size_t *idx1, *idx2;
3085
- dtype x;
3086
-
3087
- INIT_COUNTER(lp, n);
3088
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3089
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3090
-
3091
- if (idx1) {
3092
- if (idx2) {
3093
- for (i = 0; i < n; i++) {
3094
- GET_DATA_INDEX(p1, idx1, dtype, x);
3095
- x = m_square(x);
3096
- SET_DATA_INDEX(p2, idx2, dtype, x);
3097
- }
3098
- } else {
3099
- for (i = 0; i < n; i++) {
3100
- GET_DATA_INDEX(p1, idx1, dtype, x);
3101
- x = m_square(x);
3102
- SET_DATA_STRIDE(p2, s2, dtype, x);
3103
- }
3104
- }
3105
- } else {
3106
- if (idx2) {
3107
- for (i = 0; i < n; i++) {
3108
- GET_DATA_STRIDE(p1, s1, dtype, x);
3109
- x = m_square(x);
3110
- SET_DATA_INDEX(p2, idx2, dtype, x);
3111
- }
3112
- } else {
3113
- //
3114
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3115
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3116
- for (i = 0; i < n; i++) {
3117
- ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
3118
- }
3119
- return;
3120
- }
3121
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3122
- //
3123
- for (i = 0; i < n; i++) {
3124
- *(dtype*)p2 = m_square(*(dtype*)p1);
3125
- p1 += s1;
3126
- p2 += s2;
3127
- }
3128
- return;
3129
- //
3130
- }
3131
- }
3132
- for (i = 0; i < n; i++) {
3133
- GET_DATA_STRIDE(p1, s1, dtype, x);
3134
- x = m_square(x);
3135
- SET_DATA_STRIDE(p2, s2, dtype, x);
3136
- }
3137
- //
3138
- }
3139
- }
3140
- }
3141
-
3142
- static VALUE sfloat_square(VALUE self) {
3143
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3144
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3145
- ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
3146
-
3147
- return na_ndloop(&ndf, 1, self);
3148
- }
3149
-
3150
- static void iter_sfloat_eq(na_loop_t* const lp) {
3151
- size_t i;
3152
- char *p1, *p2;
3153
- BIT_DIGIT* a3;
3154
- size_t p3;
3155
- ssize_t s1, s2, s3;
3156
- dtype x, y;
3157
- BIT_DIGIT b;
3158
- INIT_COUNTER(lp, i);
3159
- INIT_PTR(lp, 0, p1, s1);
3160
- INIT_PTR(lp, 1, p2, s2);
3161
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3162
- for (; i--;) {
3163
- GET_DATA_STRIDE(p1, s1, dtype, x);
3164
- GET_DATA_STRIDE(p2, s2, dtype, y);
3165
- b = (m_eq(x, y)) ? 1 : 0;
3166
- STORE_BIT(a3, p3, b);
3167
- p3 += s3;
3168
- }
3169
- }
3170
-
3171
- static VALUE sfloat_eq_self(VALUE self, VALUE other) {
3172
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3173
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3174
- ndfunc_t ndf = { iter_sfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
3175
-
3176
- return na_ndloop(&ndf, 2, self, other);
3177
- }
3178
-
3179
- static VALUE sfloat_eq(VALUE self, VALUE other) {
3180
-
3181
- VALUE klass, v;
3182
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3183
- if (klass == cT) {
3184
- return sfloat_eq_self(self, other);
3185
- } else {
3186
- v = rb_funcall(klass, id_cast, 1, self);
3187
- return rb_funcall(v, id_eq, 1, other);
3188
- }
3189
- }
3190
-
3191
- static void iter_sfloat_ne(na_loop_t* const lp) {
3192
- size_t i;
3193
- char *p1, *p2;
3194
- BIT_DIGIT* a3;
3195
- size_t p3;
3196
- ssize_t s1, s2, s3;
3197
- dtype x, y;
3198
- BIT_DIGIT b;
3199
- INIT_COUNTER(lp, i);
3200
- INIT_PTR(lp, 0, p1, s1);
3201
- INIT_PTR(lp, 1, p2, s2);
3202
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3203
- for (; i--;) {
3204
- GET_DATA_STRIDE(p1, s1, dtype, x);
3205
- GET_DATA_STRIDE(p2, s2, dtype, y);
3206
- b = (m_ne(x, y)) ? 1 : 0;
3207
- STORE_BIT(a3, p3, b);
3208
- p3 += s3;
3209
- }
3210
- }
3211
-
3212
- static VALUE sfloat_ne_self(VALUE self, VALUE other) {
3213
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3214
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3215
- ndfunc_t ndf = { iter_sfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
3216
-
3217
- return na_ndloop(&ndf, 2, self, other);
3218
- }
3219
-
3220
- static VALUE sfloat_ne(VALUE self, VALUE other) {
3221
-
3222
- VALUE klass, v;
3223
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3224
- if (klass == cT) {
3225
- return sfloat_ne_self(self, other);
3226
- } else {
3227
- v = rb_funcall(klass, id_cast, 1, self);
3228
- return rb_funcall(v, id_ne, 1, other);
3229
- }
3230
- }
3231
-
3232
- static void iter_sfloat_nearly_eq(na_loop_t* const lp) {
3233
- size_t i;
3234
- char *p1, *p2;
3235
- BIT_DIGIT* a3;
3236
- size_t p3;
3237
- ssize_t s1, s2, s3;
3238
- dtype x, y;
3239
- BIT_DIGIT b;
3240
- INIT_COUNTER(lp, i);
3241
- INIT_PTR(lp, 0, p1, s1);
3242
- INIT_PTR(lp, 1, p2, s2);
3243
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3244
- for (; i--;) {
3245
- GET_DATA_STRIDE(p1, s1, dtype, x);
3246
- GET_DATA_STRIDE(p2, s2, dtype, y);
3247
- b = (m_nearly_eq(x, y)) ? 1 : 0;
3248
- STORE_BIT(a3, p3, b);
3249
- p3 += s3;
3250
- }
3251
- }
3252
-
3253
- static VALUE sfloat_nearly_eq_self(VALUE self, VALUE other) {
3254
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3255
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3256
- ndfunc_t ndf = { iter_sfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
3257
-
3258
- return na_ndloop(&ndf, 2, self, other);
3259
- }
3260
-
3261
- static VALUE sfloat_nearly_eq(VALUE self, VALUE other) {
3262
-
3263
- VALUE klass, v;
3264
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3265
- if (klass == cT) {
3266
- return sfloat_nearly_eq_self(self, other);
3267
- } else {
3268
- v = rb_funcall(klass, id_cast, 1, self);
3269
- return rb_funcall(v, id_nearly_eq, 1, other);
3270
- }
3271
- }
3272
-
3273
- static void iter_sfloat_floor(na_loop_t* const lp) {
3274
- size_t i, n;
3275
- char *p1, *p2;
3276
- ssize_t s1, s2;
3277
- size_t *idx1, *idx2;
3278
- dtype x;
3279
-
3280
- INIT_COUNTER(lp, n);
3281
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3282
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3283
-
3284
- if (idx1) {
3285
- if (idx2) {
3286
- for (i = 0; i < n; i++) {
3287
- GET_DATA_INDEX(p1, idx1, dtype, x);
3288
- x = m_floor(x);
3289
- SET_DATA_INDEX(p2, idx2, dtype, x);
3290
- }
3291
- } else {
3292
- for (i = 0; i < n; i++) {
3293
- GET_DATA_INDEX(p1, idx1, dtype, x);
3294
- x = m_floor(x);
3295
- SET_DATA_STRIDE(p2, s2, dtype, x);
3296
- }
3297
- }
3298
- } else {
3299
- if (idx2) {
3300
- for (i = 0; i < n; i++) {
3301
- GET_DATA_STRIDE(p1, s1, dtype, x);
3302
- x = m_floor(x);
3303
- SET_DATA_INDEX(p2, idx2, dtype, x);
3304
- }
3305
- } else {
3306
- //
3307
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3308
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3309
- for (i = 0; i < n; i++) {
3310
- ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
3311
- }
3312
- return;
3313
- }
3314
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3315
- //
3316
- for (i = 0; i < n; i++) {
3317
- *(dtype*)p2 = m_floor(*(dtype*)p1);
3318
- p1 += s1;
3319
- p2 += s2;
3320
- }
3321
- return;
3322
- //
3323
- }
3324
- }
3325
- for (i = 0; i < n; i++) {
3326
- GET_DATA_STRIDE(p1, s1, dtype, x);
3327
- x = m_floor(x);
3328
- SET_DATA_STRIDE(p2, s2, dtype, x);
3329
- }
3330
- //
3331
- }
3332
- }
3333
- }
3334
-
3335
- static VALUE sfloat_floor(VALUE self) {
3336
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3337
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3338
- ndfunc_t ndf = { iter_sfloat_floor, FULL_LOOP, 1, 1, ain, aout };
3339
-
3340
- return na_ndloop(&ndf, 1, self);
3341
- }
3342
-
3343
- static void iter_sfloat_round(na_loop_t* const lp) {
3344
- size_t i, n;
3345
- char *p1, *p2;
3346
- ssize_t s1, s2;
3347
- size_t *idx1, *idx2;
3348
- dtype x;
3349
-
3350
- INIT_COUNTER(lp, n);
3351
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3352
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3353
-
3354
- if (idx1) {
3355
- if (idx2) {
3356
- for (i = 0; i < n; i++) {
3357
- GET_DATA_INDEX(p1, idx1, dtype, x);
3358
- x = m_round(x);
3359
- SET_DATA_INDEX(p2, idx2, dtype, x);
3360
- }
3361
- } else {
3362
- for (i = 0; i < n; i++) {
3363
- GET_DATA_INDEX(p1, idx1, dtype, x);
3364
- x = m_round(x);
3365
- SET_DATA_STRIDE(p2, s2, dtype, x);
3366
- }
3367
- }
3368
- } else {
3369
- if (idx2) {
3370
- for (i = 0; i < n; i++) {
3371
- GET_DATA_STRIDE(p1, s1, dtype, x);
3372
- x = m_round(x);
3373
- SET_DATA_INDEX(p2, idx2, dtype, x);
3374
- }
3375
- } else {
3376
- //
3377
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3378
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3379
- for (i = 0; i < n; i++) {
3380
- ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
3381
- }
3382
- return;
3383
- }
3384
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3385
- //
3386
- for (i = 0; i < n; i++) {
3387
- *(dtype*)p2 = m_round(*(dtype*)p1);
3388
- p1 += s1;
3389
- p2 += s2;
3390
- }
3391
- return;
3392
- //
3393
- }
3394
- }
3395
- for (i = 0; i < n; i++) {
3396
- GET_DATA_STRIDE(p1, s1, dtype, x);
3397
- x = m_round(x);
3398
- SET_DATA_STRIDE(p2, s2, dtype, x);
3399
- }
3400
- //
3401
- }
3402
- }
3403
- }
3404
-
3405
- static VALUE sfloat_round(VALUE self) {
3406
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3407
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3408
- ndfunc_t ndf = { iter_sfloat_round, FULL_LOOP, 1, 1, ain, aout };
3409
-
3410
- return na_ndloop(&ndf, 1, self);
3411
- }
3412
-
3413
- static void iter_sfloat_ceil(na_loop_t* const lp) {
3414
- size_t i, n;
3415
- char *p1, *p2;
3416
- ssize_t s1, s2;
3417
- size_t *idx1, *idx2;
3418
- dtype x;
3419
-
3420
- INIT_COUNTER(lp, n);
3421
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3422
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3423
-
3424
- if (idx1) {
3425
- if (idx2) {
3426
- for (i = 0; i < n; i++) {
3427
- GET_DATA_INDEX(p1, idx1, dtype, x);
3428
- x = m_ceil(x);
3429
- SET_DATA_INDEX(p2, idx2, dtype, x);
3430
- }
3431
- } else {
3432
- for (i = 0; i < n; i++) {
3433
- GET_DATA_INDEX(p1, idx1, dtype, x);
3434
- x = m_ceil(x);
3435
- SET_DATA_STRIDE(p2, s2, dtype, x);
3436
- }
3437
- }
3438
- } else {
3439
- if (idx2) {
3440
- for (i = 0; i < n; i++) {
3441
- GET_DATA_STRIDE(p1, s1, dtype, x);
3442
- x = m_ceil(x);
3443
- SET_DATA_INDEX(p2, idx2, dtype, x);
3444
- }
3445
- } else {
3446
- //
3447
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3448
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3449
- for (i = 0; i < n; i++) {
3450
- ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
3451
- }
3452
- return;
3453
- }
3454
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3455
- //
3456
- for (i = 0; i < n; i++) {
3457
- *(dtype*)p2 = m_ceil(*(dtype*)p1);
3458
- p1 += s1;
3459
- p2 += s2;
3460
- }
3461
- return;
3462
- //
3463
- }
3464
- }
3465
- for (i = 0; i < n; i++) {
3466
- GET_DATA_STRIDE(p1, s1, dtype, x);
3467
- x = m_ceil(x);
3468
- SET_DATA_STRIDE(p2, s2, dtype, x);
3469
- }
3470
- //
3471
- }
3472
- }
3473
- }
3474
-
3475
- static VALUE sfloat_ceil(VALUE self) {
3476
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3477
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3478
- ndfunc_t ndf = { iter_sfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
3479
-
3480
- return na_ndloop(&ndf, 1, self);
3481
- }
3482
-
3483
- static void iter_sfloat_trunc(na_loop_t* const lp) {
3484
- size_t i, n;
3485
- char *p1, *p2;
3486
- ssize_t s1, s2;
3487
- size_t *idx1, *idx2;
3488
- dtype x;
3489
-
3490
- INIT_COUNTER(lp, n);
3491
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3492
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3493
-
3494
- if (idx1) {
3495
- if (idx2) {
3496
- for (i = 0; i < n; i++) {
3497
- GET_DATA_INDEX(p1, idx1, dtype, x);
3498
- x = m_trunc(x);
3499
- SET_DATA_INDEX(p2, idx2, dtype, x);
3500
- }
3501
- } else {
3502
- for (i = 0; i < n; i++) {
3503
- GET_DATA_INDEX(p1, idx1, dtype, x);
3504
- x = m_trunc(x);
3505
- SET_DATA_STRIDE(p2, s2, dtype, x);
3506
- }
3507
- }
3508
- } else {
3509
- if (idx2) {
3510
- for (i = 0; i < n; i++) {
3511
- GET_DATA_STRIDE(p1, s1, dtype, x);
3512
- x = m_trunc(x);
3513
- SET_DATA_INDEX(p2, idx2, dtype, x);
3514
- }
3515
- } else {
3516
- //
3517
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3518
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3519
- for (i = 0; i < n; i++) {
3520
- ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
3521
- }
3522
- return;
3523
- }
3524
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3525
- //
3526
- for (i = 0; i < n; i++) {
3527
- *(dtype*)p2 = m_trunc(*(dtype*)p1);
3528
- p1 += s1;
3529
- p2 += s2;
3530
- }
3531
- return;
3532
- //
3533
- }
3534
- }
3535
- for (i = 0; i < n; i++) {
3536
- GET_DATA_STRIDE(p1, s1, dtype, x);
3537
- x = m_trunc(x);
3538
- SET_DATA_STRIDE(p2, s2, dtype, x);
3539
- }
3540
- //
3541
- }
3542
- }
3543
- }
3544
-
3545
- static VALUE sfloat_trunc(VALUE self) {
3546
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3547
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3548
- ndfunc_t ndf = { iter_sfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
3549
-
3550
- return na_ndloop(&ndf, 1, self);
3551
- }
3552
-
3553
- static void iter_sfloat_rint(na_loop_t* const lp) {
3554
- size_t i, n;
3555
- char *p1, *p2;
3556
- ssize_t s1, s2;
3557
- size_t *idx1, *idx2;
3558
- dtype x;
3559
-
3560
- INIT_COUNTER(lp, n);
3561
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3562
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3563
-
3564
- if (idx1) {
3565
- if (idx2) {
3566
- for (i = 0; i < n; i++) {
3567
- GET_DATA_INDEX(p1, idx1, dtype, x);
3568
- x = m_rint(x);
3569
- SET_DATA_INDEX(p2, idx2, dtype, x);
3570
- }
3571
- } else {
3572
- for (i = 0; i < n; i++) {
3573
- GET_DATA_INDEX(p1, idx1, dtype, x);
3574
- x = m_rint(x);
3575
- SET_DATA_STRIDE(p2, s2, dtype, x);
3576
- }
3577
- }
3578
- } else {
3579
- if (idx2) {
3580
- for (i = 0; i < n; i++) {
3581
- GET_DATA_STRIDE(p1, s1, dtype, x);
3582
- x = m_rint(x);
3583
- SET_DATA_INDEX(p2, idx2, dtype, x);
3584
- }
3585
- } else {
3586
- //
3587
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3588
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3589
- for (i = 0; i < n; i++) {
3590
- ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
3591
- }
3592
- return;
3593
- }
3594
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3595
- //
3596
- for (i = 0; i < n; i++) {
3597
- *(dtype*)p2 = m_rint(*(dtype*)p1);
3598
- p1 += s1;
3599
- p2 += s2;
3600
- }
3601
- return;
3602
- //
3603
- }
3604
- }
3605
- for (i = 0; i < n; i++) {
3606
- GET_DATA_STRIDE(p1, s1, dtype, x);
3607
- x = m_rint(x);
3608
- SET_DATA_STRIDE(p2, s2, dtype, x);
3609
- }
3610
- //
3611
- }
3612
- }
3613
- }
3614
-
3615
- static VALUE sfloat_rint(VALUE self) {
3616
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3617
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3618
- ndfunc_t ndf = { iter_sfloat_rint, FULL_LOOP, 1, 1, ain, aout };
3619
-
3620
- return na_ndloop(&ndf, 1, self);
3621
- }
3622
-
3623
- #define check_intdivzero(y) \
3624
- {}
3625
-
3626
- static void iter_sfloat_copysign(na_loop_t* const lp) {
3627
- size_t i = 0;
3628
- size_t n;
3629
- char *p1, *p2, *p3;
3630
- ssize_t s1, s2, s3;
3631
-
3632
- INIT_COUNTER(lp, n);
3633
- INIT_PTR(lp, 0, p1, s1);
3634
- INIT_PTR(lp, 1, p2, s2);
3635
- INIT_PTR(lp, 2, p3, s3);
3636
-
3637
- //
3638
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
3639
- is_aligned(p3, sizeof(dtype))) {
3640
-
3641
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
3642
- if (p1 == p3) { // inplace case
3643
- for (; i < n; i++) {
3644
- check_intdivzero(((dtype*)p2)[i]);
3645
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3646
- }
3647
- } else {
3648
- for (; i < n; i++) {
3649
- check_intdivzero(((dtype*)p2)[i]);
3650
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3651
- }
3652
- }
3653
- return;
3654
- }
3655
-
3656
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
3657
- is_aligned_step(s3, sizeof(dtype))) {
3658
- //
3659
-
3660
- if (s2 == 0) { // Broadcasting from scalar value.
3661
- check_intdivzero(*(dtype*)p2);
3662
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
3663
- if (p1 == p3) { // inplace case
3664
- for (; i < n; i++) {
3665
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3666
- }
3667
- } else {
3668
- for (; i < n; i++) {
3669
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3670
- }
3671
- }
3672
- } else {
3673
- for (i = 0; i < n; i++) {
3674
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3675
- p1 += s1;
3676
- p3 += s3;
3677
- }
3678
- }
3679
- } else {
3680
- if (p1 == p3) { // inplace case
3681
- for (i = 0; i < n; i++) {
3682
- check_intdivzero(*(dtype*)p2);
3683
- *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3684
- p1 += s1;
3685
- p2 += s2;
3686
- }
3687
- } else {
3688
- for (i = 0; i < n; i++) {
3689
- check_intdivzero(*(dtype*)p2);
3690
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3691
- p1 += s1;
3692
- p2 += s2;
3693
- p3 += s3;
3694
- }
3695
- }
3696
- }
3697
-
3698
- return;
3699
- //
3700
- }
3701
- }
3702
- for (i = 0; i < n; i++) {
3703
- dtype x, y, z;
3704
- GET_DATA_STRIDE(p1, s1, dtype, x);
3705
- GET_DATA_STRIDE(p2, s2, dtype, y);
3706
- check_intdivzero(y);
3707
- z = m_copysign(x, y);
3708
- SET_DATA_STRIDE(p3, s3, dtype, z);
3709
- }
3710
- //
3711
- }
3712
- #undef check_intdivzero
3713
-
3714
- static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
3715
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3716
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3717
- ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
3718
-
3719
- return na_ndloop(&ndf, 2, self, other);
3720
- }
3721
-
3722
- static VALUE sfloat_copysign(VALUE self, VALUE other) {
3723
-
3724
- VALUE klass, v;
3725
-
3726
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3727
- if (klass == cT) {
3728
- return sfloat_copysign_self(self, other);
3729
- } else {
3730
- v = rb_funcall(klass, id_cast, 1, self);
3731
- return rb_funcall(v, id_copysign, 1, other);
3732
- }
3733
- }
3734
-
3735
- static void iter_sfloat_signbit(na_loop_t* const lp) {
3736
- size_t i;
3737
- char* p1;
3738
- BIT_DIGIT* a2;
3739
- size_t p2;
3740
- ssize_t s1, s2;
3741
- size_t* idx1;
3742
- dtype x;
3743
- BIT_DIGIT b;
3744
- INIT_COUNTER(lp, i);
3745
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3746
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3747
- if (idx1) {
3748
- for (; i--;) {
3749
- GET_DATA_INDEX(p1, idx1, dtype, x);
3750
- b = (m_signbit(x)) ? 1 : 0;
3751
- STORE_BIT(a2, p2, b);
3752
- p2 += s2;
3753
- }
3754
- } else {
3755
- for (; i--;) {
3756
- GET_DATA_STRIDE(p1, s1, dtype, x);
3757
- b = (m_signbit(x)) ? 1 : 0;
3758
- STORE_BIT(a2, p2, b);
3759
- p2 += s2;
3760
- }
3761
- }
3762
- }
3763
-
3764
- static VALUE sfloat_signbit(VALUE self) {
3765
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3766
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3767
- ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
3768
-
3769
- return na_ndloop(&ndf, 1, self);
3770
- }
3771
-
3772
- static void iter_sfloat_modf(na_loop_t* const lp) {
3773
- size_t i;
3774
- char *p1, *p2, *p3;
3775
- ssize_t s1, s2, s3;
3776
- dtype x, y, z;
3777
- INIT_COUNTER(lp, i);
3778
- INIT_PTR(lp, 0, p1, s1);
3779
- INIT_PTR(lp, 1, p2, s2);
3780
- INIT_PTR(lp, 2, p3, s3);
3781
- for (; i--;) {
3782
- GET_DATA_STRIDE(p1, s1, dtype, x);
3783
- m_modf(x, y, z);
3784
- SET_DATA_STRIDE(p2, s2, dtype, y);
3785
- SET_DATA_STRIDE(p3, s3, dtype, z);
3786
- }
3787
- }
3788
-
3789
- static VALUE sfloat_modf(VALUE self) {
3790
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3791
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
3792
- ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
3793
-
3794
- return na_ndloop(&ndf, 1, self);
3795
- }
3796
-
3797
- static void iter_sfloat_gt(na_loop_t* const lp) {
3798
- size_t i;
3799
- char *p1, *p2;
3800
- BIT_DIGIT* a3;
3801
- size_t p3;
3802
- ssize_t s1, s2, s3;
3803
- dtype x, y;
3804
- BIT_DIGIT b;
3805
- INIT_COUNTER(lp, i);
3806
- INIT_PTR(lp, 0, p1, s1);
3807
- INIT_PTR(lp, 1, p2, s2);
3808
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3809
- for (; i--;) {
3810
- GET_DATA_STRIDE(p1, s1, dtype, x);
3811
- GET_DATA_STRIDE(p2, s2, dtype, y);
3812
- b = (m_gt(x, y)) ? 1 : 0;
3813
- STORE_BIT(a3, p3, b);
3814
- p3 += s3;
3815
- }
3816
- }
3817
-
3818
- static VALUE sfloat_gt_self(VALUE self, VALUE other) {
3819
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3820
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3821
- ndfunc_t ndf = { iter_sfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
3822
-
3823
- return na_ndloop(&ndf, 2, self, other);
3824
- }
3825
-
3826
- static VALUE sfloat_gt(VALUE self, VALUE other) {
3827
-
3828
- VALUE klass, v;
3829
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3830
- if (klass == cT) {
3831
- return sfloat_gt_self(self, other);
3832
- } else {
3833
- v = rb_funcall(klass, id_cast, 1, self);
3834
- return rb_funcall(v, id_gt, 1, other);
3835
- }
3836
- }
3837
-
3838
- static void iter_sfloat_ge(na_loop_t* const lp) {
3839
- size_t i;
3840
- char *p1, *p2;
3841
- BIT_DIGIT* a3;
3842
- size_t p3;
3843
- ssize_t s1, s2, s3;
3844
- dtype x, y;
3845
- BIT_DIGIT b;
3846
- INIT_COUNTER(lp, i);
3847
- INIT_PTR(lp, 0, p1, s1);
3848
- INIT_PTR(lp, 1, p2, s2);
3849
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3850
- for (; i--;) {
3851
- GET_DATA_STRIDE(p1, s1, dtype, x);
3852
- GET_DATA_STRIDE(p2, s2, dtype, y);
3853
- b = (m_ge(x, y)) ? 1 : 0;
3854
- STORE_BIT(a3, p3, b);
3855
- p3 += s3;
3856
- }
3857
- }
3858
-
3859
- static VALUE sfloat_ge_self(VALUE self, VALUE other) {
3860
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3861
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3862
- ndfunc_t ndf = { iter_sfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
3863
-
3864
- return na_ndloop(&ndf, 2, self, other);
3865
- }
3866
-
3867
- static VALUE sfloat_ge(VALUE self, VALUE other) {
3868
-
3869
- VALUE klass, v;
3870
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3871
- if (klass == cT) {
3872
- return sfloat_ge_self(self, other);
3873
- } else {
3874
- v = rb_funcall(klass, id_cast, 1, self);
3875
- return rb_funcall(v, id_ge, 1, other);
3876
- }
3877
- }
3878
-
3879
- static void iter_sfloat_lt(na_loop_t* const lp) {
3880
- size_t i;
3881
- char *p1, *p2;
3882
- BIT_DIGIT* a3;
3883
- size_t p3;
3884
- ssize_t s1, s2, s3;
3885
- dtype x, y;
3886
- BIT_DIGIT b;
3887
- INIT_COUNTER(lp, i);
3888
- INIT_PTR(lp, 0, p1, s1);
3889
- INIT_PTR(lp, 1, p2, s2);
3890
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3891
- for (; i--;) {
3892
- GET_DATA_STRIDE(p1, s1, dtype, x);
3893
- GET_DATA_STRIDE(p2, s2, dtype, y);
3894
- b = (m_lt(x, y)) ? 1 : 0;
3895
- STORE_BIT(a3, p3, b);
3896
- p3 += s3;
3897
- }
3898
- }
3899
-
3900
- static VALUE sfloat_lt_self(VALUE self, VALUE other) {
3901
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3902
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3903
- ndfunc_t ndf = { iter_sfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
3904
-
3905
- return na_ndloop(&ndf, 2, self, other);
3906
- }
3907
-
3908
- static VALUE sfloat_lt(VALUE self, VALUE other) {
3909
-
3910
- VALUE klass, v;
3911
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3912
- if (klass == cT) {
3913
- return sfloat_lt_self(self, other);
3914
- } else {
3915
- v = rb_funcall(klass, id_cast, 1, self);
3916
- return rb_funcall(v, id_lt, 1, other);
3917
- }
3918
- }
3919
-
3920
- static void iter_sfloat_le(na_loop_t* const lp) {
3921
- size_t i;
3922
- char *p1, *p2;
3923
- BIT_DIGIT* a3;
3924
- size_t p3;
3925
- ssize_t s1, s2, s3;
3926
- dtype x, y;
3927
- BIT_DIGIT b;
3928
- INIT_COUNTER(lp, i);
3929
- INIT_PTR(lp, 0, p1, s1);
3930
- INIT_PTR(lp, 1, p2, s2);
3931
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3932
- for (; i--;) {
3933
- GET_DATA_STRIDE(p1, s1, dtype, x);
3934
- GET_DATA_STRIDE(p2, s2, dtype, y);
3935
- b = (m_le(x, y)) ? 1 : 0;
3936
- STORE_BIT(a3, p3, b);
3937
- p3 += s3;
3938
- }
3939
- }
3940
-
3941
- static VALUE sfloat_le_self(VALUE self, VALUE other) {
3942
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3943
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3944
- ndfunc_t ndf = { iter_sfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
3945
-
3946
- return na_ndloop(&ndf, 2, self, other);
3947
- }
3948
-
3949
- static VALUE sfloat_le(VALUE self, VALUE other) {
3950
-
3951
- VALUE klass, v;
3952
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3953
- if (klass == cT) {
3954
- return sfloat_le_self(self, other);
3955
- } else {
3956
- v = rb_funcall(klass, id_cast, 1, self);
3957
- return rb_funcall(v, id_le, 1, other);
3958
- }
3959
- }
3960
-
3961
- static void iter_sfloat_isnan(na_loop_t* const lp) {
3962
- size_t i;
3963
- char* p1;
3964
- BIT_DIGIT* a2;
3965
- size_t p2;
3966
- ssize_t s1, s2;
3967
- size_t* idx1;
3968
- dtype x;
3969
- BIT_DIGIT b;
3970
- INIT_COUNTER(lp, i);
3971
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3972
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3973
- if (idx1) {
3974
- for (; i--;) {
3975
- GET_DATA_INDEX(p1, idx1, dtype, x);
3976
- b = (m_isnan(x)) ? 1 : 0;
3977
- STORE_BIT(a2, p2, b);
3978
- p2 += s2;
3979
- }
3980
- } else {
3981
- for (; i--;) {
3982
- GET_DATA_STRIDE(p1, s1, dtype, x);
3983
- b = (m_isnan(x)) ? 1 : 0;
3984
- STORE_BIT(a2, p2, b);
3985
- p2 += s2;
3986
- }
3987
- }
3988
- }
3989
-
3990
- static VALUE sfloat_isnan(VALUE self) {
3991
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3992
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3993
- ndfunc_t ndf = { iter_sfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
3994
-
3995
- return na_ndloop(&ndf, 1, self);
3996
- }
3997
-
3998
- static void iter_sfloat_isinf(na_loop_t* const lp) {
3999
- size_t i;
4000
- char* p1;
4001
- BIT_DIGIT* a2;
4002
- size_t p2;
4003
- ssize_t s1, s2;
4004
- size_t* idx1;
4005
- dtype x;
4006
- BIT_DIGIT b;
4007
- INIT_COUNTER(lp, i);
4008
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4009
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4010
- if (idx1) {
4011
- for (; i--;) {
4012
- GET_DATA_INDEX(p1, idx1, dtype, x);
4013
- b = (m_isinf(x)) ? 1 : 0;
4014
- STORE_BIT(a2, p2, b);
4015
- p2 += s2;
4016
- }
4017
- } else {
4018
- for (; i--;) {
4019
- GET_DATA_STRIDE(p1, s1, dtype, x);
4020
- b = (m_isinf(x)) ? 1 : 0;
4021
- STORE_BIT(a2, p2, b);
4022
- p2 += s2;
4023
- }
4024
- }
4025
- }
4026
-
4027
- static VALUE sfloat_isinf(VALUE self) {
4028
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4029
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4030
- ndfunc_t ndf = { iter_sfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
4031
-
4032
- return na_ndloop(&ndf, 1, self);
4033
- }
4034
-
4035
- static void iter_sfloat_isposinf(na_loop_t* const lp) {
4036
- size_t i;
4037
- char* p1;
4038
- BIT_DIGIT* a2;
4039
- size_t p2;
4040
- ssize_t s1, s2;
4041
- size_t* idx1;
4042
- dtype x;
4043
- BIT_DIGIT b;
4044
- INIT_COUNTER(lp, i);
4045
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4046
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4047
- if (idx1) {
4048
- for (; i--;) {
4049
- GET_DATA_INDEX(p1, idx1, dtype, x);
4050
- b = (m_isposinf(x)) ? 1 : 0;
4051
- STORE_BIT(a2, p2, b);
4052
- p2 += s2;
4053
- }
4054
- } else {
4055
- for (; i--;) {
4056
- GET_DATA_STRIDE(p1, s1, dtype, x);
4057
- b = (m_isposinf(x)) ? 1 : 0;
4058
- STORE_BIT(a2, p2, b);
4059
- p2 += s2;
4060
- }
4061
- }
4062
- }
4063
-
4064
- static VALUE sfloat_isposinf(VALUE self) {
4065
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4066
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4067
- ndfunc_t ndf = { iter_sfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
4068
-
4069
- return na_ndloop(&ndf, 1, self);
4070
- }
4071
-
4072
- static void iter_sfloat_isneginf(na_loop_t* const lp) {
4073
- size_t i;
4074
- char* p1;
4075
- BIT_DIGIT* a2;
4076
- size_t p2;
4077
- ssize_t s1, s2;
4078
- size_t* idx1;
4079
- dtype x;
4080
- BIT_DIGIT b;
4081
- INIT_COUNTER(lp, i);
4082
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4083
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4084
- if (idx1) {
4085
- for (; i--;) {
4086
- GET_DATA_INDEX(p1, idx1, dtype, x);
4087
- b = (m_isneginf(x)) ? 1 : 0;
4088
- STORE_BIT(a2, p2, b);
4089
- p2 += s2;
4090
- }
4091
- } else {
4092
- for (; i--;) {
4093
- GET_DATA_STRIDE(p1, s1, dtype, x);
4094
- b = (m_isneginf(x)) ? 1 : 0;
4095
- STORE_BIT(a2, p2, b);
4096
- p2 += s2;
4097
- }
4098
- }
4099
- }
4100
-
4101
- static VALUE sfloat_isneginf(VALUE self) {
4102
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4103
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4104
- ndfunc_t ndf = { iter_sfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
4105
-
4106
- return na_ndloop(&ndf, 1, self);
4107
- }
4108
-
4109
- static void iter_sfloat_isfinite(na_loop_t* const lp) {
4110
- size_t i;
4111
- char* p1;
4112
- BIT_DIGIT* a2;
4113
- size_t p2;
4114
- ssize_t s1, s2;
4115
- size_t* idx1;
4116
- dtype x;
4117
- BIT_DIGIT b;
4118
- INIT_COUNTER(lp, i);
4119
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4120
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4121
- if (idx1) {
4122
- for (; i--;) {
4123
- GET_DATA_INDEX(p1, idx1, dtype, x);
4124
- b = (m_isfinite(x)) ? 1 : 0;
4125
- STORE_BIT(a2, p2, b);
4126
- p2 += s2;
4127
- }
4128
- } else {
4129
- for (; i--;) {
4130
- GET_DATA_STRIDE(p1, s1, dtype, x);
4131
- b = (m_isfinite(x)) ? 1 : 0;
4132
- STORE_BIT(a2, p2, b);
4133
- p2 += s2;
4134
- }
4135
- }
4136
- }
4137
-
4138
- static VALUE sfloat_isfinite(VALUE self) {
4139
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4140
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4141
- ndfunc_t ndf = { iter_sfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
1803
+ static VALUE sfloat_modf(VALUE self) {
1804
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
1805
+ ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
1806
+ ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
4142
1807
 
4143
1808
  return na_ndloop(&ndf, 1, self);
4144
1809
  }
4145
1810
 
4146
- typedef struct {
4147
- dtype mu;
4148
- rtype sigma;
4149
- } randn_opt_t;
4150
-
4151
- static void iter_sfloat_rand_norm(na_loop_t* const lp) {
4152
- size_t i;
4153
- char* p1;
4154
- ssize_t s1;
4155
- size_t* idx1;
4156
-
4157
- dtype *a0, *a1;
4158
-
4159
- dtype mu;
4160
- rtype sigma;
4161
- randn_opt_t* g;
4162
-
4163
- INIT_COUNTER(lp, i);
4164
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4165
- g = (randn_opt_t*)(lp->opt_ptr);
4166
- mu = g->mu;
4167
- sigma = g->sigma;
4168
-
4169
- if (idx1) {
4170
-
4171
- for (; i > 1; i -= 2) {
4172
- a0 = (dtype*)(p1 + *idx1);
4173
- a1 = (dtype*)(p1 + *(idx1 + 1));
4174
- m_rand_norm(mu, sigma, a0, a1);
4175
- idx1 += 2;
4176
- }
4177
- if (i > 0) {
4178
- a0 = (dtype*)(p1 + *idx1);
4179
- m_rand_norm(mu, sigma, a0, 0);
4180
- }
4181
-
4182
- } else {
4183
-
4184
- for (; i > 1; i -= 2) {
4185
- a0 = (dtype*)(p1);
4186
- a1 = (dtype*)(p1 + s1);
4187
- m_rand_norm(mu, sigma, a0, a1);
4188
- p1 += s1 * 2;
4189
- }
4190
- if (i > 0) {
4191
- a0 = (dtype*)(p1);
4192
- m_rand_norm(mu, sigma, a0, 0);
4193
- }
4194
- }
4195
- }
4196
-
4197
- static VALUE sfloat_rand_norm(int argc, VALUE* args, VALUE self) {
4198
- int n;
4199
- randn_opt_t g;
4200
- VALUE v1 = Qnil, v2 = Qnil;
4201
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
4202
- ndfunc_t ndf = { iter_sfloat_rand_norm, FULL_LOOP, 1, 0, ain, 0 };
4203
-
4204
- n = rb_scan_args(argc, args, "02", &v1, &v2);
4205
- if (n == 0) {
4206
- g.mu = m_zero;
4207
- } else {
4208
- g.mu = m_num_to_data(v1);
4209
- }
4210
- if (n == 2) {
4211
- g.sigma = NUM2DBL(v2);
4212
- } else {
4213
- g.sigma = 1;
4214
- }
4215
- na_ndloop3(&ndf, &g, 1, self);
4216
- return self;
4217
- }
4218
-
4219
1811
  static void iter_sfloat_poly(na_loop_t* const lp) {
4220
1812
  size_t i;
4221
1813
  dtype x, y, a;
@@ -5875,7 +3467,7 @@ void Init_numo_sfloat(void) {
5875
3467
  rb_define_method(cT, "rand", sfloat_rand, -1);
5876
3468
  /**
5877
3469
  * Generates random numbers from the normal distribution on self narray
5878
- * using Box-Muller Transformation.
3470
+ * using Box-Muller Transformation.
5879
3471
  * @overload rand_norm([mu,[sigma]])
5880
3472
  * @param [Numeric] mu mean of normal distribution. (default=0)
5881
3473
  * @param [Numeric] sigma standard deviation of normal distribution. (default=1)