numo-narray-alt 0.9.10 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -1
  3. data/LICENSE +1 -1
  4. data/README.md +7 -0
  5. data/ext/numo/narray/numo/narray.h +2 -2
  6. data/ext/numo/narray/numo/types/robj_macro.h +1 -1
  7. data/ext/numo/narray/numo/types/robject.h +1 -1
  8. data/ext/numo/narray/src/mh/argmax.h +154 -0
  9. data/ext/numo/narray/src/mh/argmin.h +154 -0
  10. data/ext/numo/narray/src/mh/bincount.h +233 -0
  11. data/ext/numo/narray/src/mh/bit/and.h +225 -0
  12. data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
  13. data/ext/numo/narray/src/mh/bit/not.h +173 -0
  14. data/ext/numo/narray/src/mh/bit/or.h +225 -0
  15. data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
  16. data/ext/numo/narray/src/mh/bit/xor.h +225 -0
  17. data/ext/numo/narray/src/mh/clip.h +115 -0
  18. data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
  19. data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
  20. data/ext/numo/narray/src/mh/comp/eq.h +26 -0
  21. data/ext/numo/narray/src/mh/comp/ge.h +26 -0
  22. data/ext/numo/narray/src/mh/comp/gt.h +26 -0
  23. data/ext/numo/narray/src/mh/comp/le.h +26 -0
  24. data/ext/numo/narray/src/mh/comp/lt.h +26 -0
  25. data/ext/numo/narray/src/mh/comp/ne.h +26 -0
  26. data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
  27. data/ext/numo/narray/src/mh/cumprod.h +98 -0
  28. data/ext/numo/narray/src/mh/cumsum.h +98 -0
  29. data/ext/numo/narray/src/mh/divmod.h +142 -0
  30. data/ext/numo/narray/src/mh/eye.h +82 -0
  31. data/ext/numo/narray/src/mh/fill.h +94 -0
  32. data/ext/numo/narray/src/mh/format.h +108 -0
  33. data/ext/numo/narray/src/mh/format_to_a.h +89 -0
  34. data/ext/numo/narray/src/mh/inspect.h +33 -0
  35. data/ext/numo/narray/src/mh/isfinite.h +42 -0
  36. data/ext/numo/narray/src/mh/isinf.h +42 -0
  37. data/ext/numo/narray/src/mh/isnan.h +42 -0
  38. data/ext/numo/narray/src/mh/isneginf.h +42 -0
  39. data/ext/numo/narray/src/mh/isposinf.h +42 -0
  40. data/ext/numo/narray/src/mh/logseq.h +69 -0
  41. data/ext/numo/narray/src/mh/math/acos.h +2 -2
  42. data/ext/numo/narray/src/mh/math/acosh.h +2 -2
  43. data/ext/numo/narray/src/mh/math/asin.h +2 -2
  44. data/ext/numo/narray/src/mh/math/asinh.h +2 -2
  45. data/ext/numo/narray/src/mh/math/atan.h +2 -2
  46. data/ext/numo/narray/src/mh/math/atan2.h +3 -3
  47. data/ext/numo/narray/src/mh/math/atanh.h +2 -2
  48. data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
  49. data/ext/numo/narray/src/mh/math/cos.h +2 -2
  50. data/ext/numo/narray/src/mh/math/cosh.h +2 -2
  51. data/ext/numo/narray/src/mh/math/erf.h +2 -2
  52. data/ext/numo/narray/src/mh/math/erfc.h +2 -2
  53. data/ext/numo/narray/src/mh/math/exp.h +2 -2
  54. data/ext/numo/narray/src/mh/math/exp10.h +2 -2
  55. data/ext/numo/narray/src/mh/math/exp2.h +2 -2
  56. data/ext/numo/narray/src/mh/math/expm1.h +2 -2
  57. data/ext/numo/narray/src/mh/math/frexp.h +3 -3
  58. data/ext/numo/narray/src/mh/math/hypot.h +3 -3
  59. data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
  60. data/ext/numo/narray/src/mh/math/log.h +2 -2
  61. data/ext/numo/narray/src/mh/math/log10.h +2 -2
  62. data/ext/numo/narray/src/mh/math/log1p.h +2 -2
  63. data/ext/numo/narray/src/mh/math/log2.h +2 -2
  64. data/ext/numo/narray/src/mh/math/sin.h +2 -2
  65. data/ext/numo/narray/src/mh/math/sinc.h +2 -2
  66. data/ext/numo/narray/src/mh/math/sinh.h +2 -2
  67. data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
  68. data/ext/numo/narray/src/mh/math/tan.h +2 -2
  69. data/ext/numo/narray/src/mh/math/tanh.h +2 -2
  70. data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
  71. data/ext/numo/narray/src/mh/max.h +69 -0
  72. data/ext/numo/narray/src/mh/max_index.h +184 -0
  73. data/ext/numo/narray/src/mh/maximum.h +116 -0
  74. data/ext/numo/narray/src/mh/min.h +69 -0
  75. data/ext/numo/narray/src/mh/min_index.h +184 -0
  76. data/ext/numo/narray/src/mh/minimum.h +116 -0
  77. data/ext/numo/narray/src/mh/minmax.h +77 -0
  78. data/ext/numo/narray/src/mh/mulsum.h +185 -0
  79. data/ext/numo/narray/src/mh/op/add.h +78 -0
  80. data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
  81. data/ext/numo/narray/src/mh/op/div.h +118 -0
  82. data/ext/numo/narray/src/mh/op/mod.h +108 -0
  83. data/ext/numo/narray/src/mh/op/mul.h +78 -0
  84. data/ext/numo/narray/src/mh/op/sub.h +78 -0
  85. data/ext/numo/narray/src/mh/prod.h +69 -0
  86. data/ext/numo/narray/src/mh/ptp.h +69 -0
  87. data/ext/numo/narray/src/mh/rand.h +315 -0
  88. data/ext/numo/narray/src/mh/round/ceil.h +11 -0
  89. data/ext/numo/narray/src/mh/round/floor.h +11 -0
  90. data/ext/numo/narray/src/mh/round/rint.h +9 -0
  91. data/ext/numo/narray/src/mh/round/round.h +11 -0
  92. data/ext/numo/narray/src/mh/round/trunc.h +11 -0
  93. data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
  94. data/ext/numo/narray/src/mh/seq.h +130 -0
  95. data/ext/numo/narray/src/mh/sum.h +69 -0
  96. data/ext/numo/narray/src/mh/to_a.h +78 -0
  97. data/ext/numo/narray/src/t_bit.c +45 -234
  98. data/ext/numo/narray/src/t_dcomplex.c +608 -2369
  99. data/ext/numo/narray/src/t_dfloat.c +485 -3736
  100. data/ext/numo/narray/src/t_int16.c +743 -3444
  101. data/ext/numo/narray/src/t_int32.c +745 -3445
  102. data/ext/numo/narray/src/t_int64.c +743 -3446
  103. data/ext/numo/narray/src/t_int8.c +678 -3040
  104. data/ext/numo/narray/src/t_robject.c +771 -3548
  105. data/ext/numo/narray/src/t_scomplex.c +607 -2368
  106. data/ext/numo/narray/src/t_sfloat.c +440 -3693
  107. data/ext/numo/narray/src/t_uint16.c +743 -3440
  108. data/ext/numo/narray/src/t_uint32.c +743 -3440
  109. data/ext/numo/narray/src/t_uint64.c +743 -3442
  110. data/ext/numo/narray/src/t_uint8.c +678 -3038
  111. data/lib/numo/narray.rb +2 -3
  112. metadata +62 -3
@@ -42,10 +42,59 @@ static ID id_to_a;
42
42
  VALUE cT;
43
43
  extern VALUE cRT;
44
44
 
45
+ #include "mh/coerce_cast.h"
46
+ #include "mh/to_a.h"
47
+ #include "mh/fill.h"
48
+ #include "mh/format.h"
49
+ #include "mh/format_to_a.h"
50
+ #include "mh/inspect.h"
51
+ #include "mh/op/add.h"
52
+ #include "mh/op/sub.h"
53
+ #include "mh/op/mul.h"
54
+ #include "mh/op/div.h"
55
+ #include "mh/op/mod.h"
56
+ #include "mh/divmod.h"
57
+ #include "mh/round/floor.h"
58
+ #include "mh/round/round.h"
59
+ #include "mh/round/ceil.h"
60
+ #include "mh/round/trunc.h"
61
+ #include "mh/round/rint.h"
62
+ #include "mh/comp/eq.h"
63
+ #include "mh/comp/ne.h"
64
+ #include "mh/comp/nearly_eq.h"
65
+ #include "mh/comp/gt.h"
66
+ #include "mh/comp/ge.h"
67
+ #include "mh/comp/lt.h"
68
+ #include "mh/comp/le.h"
69
+ #include "mh/clip.h"
70
+ #include "mh/isnan.h"
71
+ #include "mh/isinf.h"
72
+ #include "mh/isposinf.h"
73
+ #include "mh/isneginf.h"
74
+ #include "mh/isfinite.h"
75
+ #include "mh/sum.h"
76
+ #include "mh/prod.h"
45
77
  #include "mh/mean.h"
46
78
  #include "mh/var.h"
47
79
  #include "mh/stddev.h"
48
80
  #include "mh/rms.h"
81
+ #include "mh/min.h"
82
+ #include "mh/max.h"
83
+ #include "mh/ptp.h"
84
+ #include "mh/max_index.h"
85
+ #include "mh/min_index.h"
86
+ #include "mh/argmax.h"
87
+ #include "mh/argmin.h"
88
+ #include "mh/maximum.h"
89
+ #include "mh/minimum.h"
90
+ #include "mh/minmax.h"
91
+ #include "mh/cumsum.h"
92
+ #include "mh/cumprod.h"
93
+ #include "mh/mulsum.h"
94
+ #include "mh/seq.h"
95
+ #include "mh/logseq.h"
96
+ #include "mh/eye.h"
97
+ #include "mh/rand.h"
49
98
  #include "mh/math/sqrt.h"
50
99
  #include "mh/math/cbrt.h"
51
100
  #include "mh/math/log.h"
@@ -78,10 +127,66 @@ extern VALUE cRT;
78
127
 
79
128
  typedef float sfloat; // Type aliases for shorter notation
80
129
  // following the codebase naming convention.
130
+ DEF_NARRAY_COERCE_CAST_METHOD_FUNC(sfloat)
131
+ DEF_NARRAY_TO_A_METHOD_FUNC(sfloat)
132
+ DEF_NARRAY_FILL_METHOD_FUNC(sfloat)
133
+ DEF_NARRAY_FORMAT_METHOD_FUNC(sfloat)
134
+ DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(sfloat)
135
+ DEF_NARRAY_INSPECT_METHOD_FUNC(sfloat)
136
+ #ifdef __SSE2__
137
+ DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
138
+ DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
139
+ DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
140
+ DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
141
+ #else
142
+ DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
143
+ DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
144
+ DEF_NARRAY_MUL_METHOD_FUNC(sfloat, numo_cSFloat)
145
+ DEF_NARRAY_FLT_DIV_METHOD_FUNC(sfloat, numo_cSFloat)
146
+ #endif
147
+ DEF_NARRAY_FLT_MOD_METHOD_FUNC(sfloat, numo_cSFloat)
148
+ DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(sfloat, numo_cSFloat)
149
+ DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(sfloat, numo_cSFloat)
150
+ DEF_NARRAY_FLT_ROUND_METHOD_FUNC(sfloat, numo_cSFloat)
151
+ DEF_NARRAY_FLT_CEIL_METHOD_FUNC(sfloat, numo_cSFloat)
152
+ DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(sfloat, numo_cSFloat)
153
+ DEF_NARRAY_FLT_RINT_METHOD_FUNC(sfloat, numo_cSFloat)
154
+ DEF_NARRAY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
155
+ DEF_NARRAY_NE_METHOD_FUNC(sfloat, numo_cSFloat)
156
+ DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
157
+ DEF_NARRAY_GT_METHOD_FUNC(sfloat, numo_cSFloat)
158
+ DEF_NARRAY_GE_METHOD_FUNC(sfloat, numo_cSFloat)
159
+ DEF_NARRAY_LT_METHOD_FUNC(sfloat, numo_cSFloat)
160
+ DEF_NARRAY_LE_METHOD_FUNC(sfloat, numo_cSFloat)
161
+ DEF_NARRAY_CLIP_METHOD_FUNC(sfloat, numo_cSFloat)
162
+ DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(sfloat, numo_cSFloat)
163
+ DEF_NARRAY_FLT_ISINF_METHOD_FUNC(sfloat, numo_cSFloat)
164
+ DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(sfloat, numo_cSFloat)
165
+ DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(sfloat, numo_cSFloat)
166
+ DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(sfloat, numo_cSFloat)
167
+ DEF_NARRAY_FLT_SUM_METHOD_FUNC(sfloat, numo_cSFloat)
168
+ DEF_NARRAY_FLT_PROD_METHOD_FUNC(sfloat, numo_cSFloat)
81
169
  DEF_NARRAY_FLT_MEAN_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
82
170
  DEF_NARRAY_FLT_VAR_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
83
171
  DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
84
172
  DEF_NARRAY_FLT_RMS_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
173
+ DEF_NARRAY_FLT_MIN_METHOD_FUNC(sfloat, numo_cSFloat)
174
+ DEF_NARRAY_FLT_MAX_METHOD_FUNC(sfloat, numo_cSFloat)
175
+ DEF_NARRAY_FLT_PTP_METHOD_FUNC(sfloat, numo_cSFloat)
176
+ DEF_NARRAY_FLT_MAX_INDEX_METHOD_FUNC(sfloat)
177
+ DEF_NARRAY_FLT_MIN_INDEX_METHOD_FUNC(sfloat)
178
+ DEF_NARRAY_FLT_ARGMAX_METHOD_FUNC(sfloat)
179
+ DEF_NARRAY_FLT_ARGMIN_METHOD_FUNC(sfloat)
180
+ DEF_NARRAY_FLT_MAXIMUM_METHOD_FUNC(sfloat, numo_cSFloat)
181
+ DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(sfloat, numo_cSFloat)
182
+ DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(sfloat, numo_cSFloat)
183
+ DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(sfloat, numo_cSFloat)
184
+ DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(sfloat, numo_cSFloat)
185
+ DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(sfloat, numo_cSFloat)
186
+ DEF_NARRAY_FLT_SEQ_METHOD_FUNC(sfloat)
187
+ DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(sfloat)
188
+ DEF_NARRAY_EYE_METHOD_FUNC(sfloat)
189
+ DEF_NARRAY_FLT_RAND_METHOD_FUNC(sfloat)
85
190
  #ifdef __SSE2__
86
191
  DEF_NARRAY_FLT_SQRT_SSE2_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
87
192
  #else
@@ -1235,171 +1340,6 @@ static VALUE sfloat_aset(int argc, VALUE* argv, VALUE self) {
1235
1340
  return argv[argc];
1236
1341
  }
1237
1342
 
1238
- static VALUE sfloat_coerce_cast(VALUE self, VALUE type) {
1239
- return Qnil;
1240
- }
1241
-
1242
- static void iter_sfloat_to_a(na_loop_t* const lp) {
1243
- size_t i, s1;
1244
- char* p1;
1245
- size_t* idx1;
1246
- dtype x;
1247
- volatile VALUE a, y;
1248
-
1249
- INIT_COUNTER(lp, i);
1250
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1251
- a = rb_ary_new2(i);
1252
- rb_ary_push(lp->args[1].value, a);
1253
- if (idx1) {
1254
- for (; i--;) {
1255
- GET_DATA_INDEX(p1, idx1, dtype, x);
1256
- y = m_data_to_num(x);
1257
- rb_ary_push(a, y);
1258
- }
1259
- } else {
1260
- for (; i--;) {
1261
- GET_DATA_STRIDE(p1, s1, dtype, x);
1262
- y = m_data_to_num(x);
1263
- rb_ary_push(a, y);
1264
- }
1265
- }
1266
- }
1267
-
1268
- static VALUE sfloat_to_a(VALUE self) {
1269
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1270
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1271
- ndfunc_t ndf = { iter_sfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1272
- return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
1273
- }
1274
-
1275
- static void iter_sfloat_fill(na_loop_t* const lp) {
1276
- size_t i;
1277
- char* p1;
1278
- ssize_t s1;
1279
- size_t* idx1;
1280
- VALUE x = lp->option;
1281
- dtype y;
1282
- INIT_COUNTER(lp, i);
1283
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1284
- y = m_num_to_data(x);
1285
- if (idx1) {
1286
- for (; i--;) {
1287
- SET_DATA_INDEX(p1, idx1, dtype, y);
1288
- }
1289
- } else {
1290
- for (; i--;) {
1291
- SET_DATA_STRIDE(p1, s1, dtype, y);
1292
- }
1293
- }
1294
- }
1295
-
1296
- static VALUE sfloat_fill(VALUE self, VALUE val) {
1297
- ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
1298
- ndfunc_t ndf = { iter_sfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
1299
-
1300
- na_ndloop(&ndf, 2, self, val);
1301
- return self;
1302
- }
1303
-
1304
- static VALUE format_sfloat(VALUE fmt, dtype* x) {
1305
- // fix-me
1306
- char s[48];
1307
- int n;
1308
-
1309
- if (NIL_P(fmt)) {
1310
- n = m_sprintf(s, *x);
1311
- return rb_str_new(s, n);
1312
- }
1313
- return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
1314
- }
1315
-
1316
- static void iter_sfloat_format(na_loop_t* const lp) {
1317
- size_t i;
1318
- char *p1, *p2;
1319
- ssize_t s1, s2;
1320
- size_t* idx1;
1321
- dtype* x;
1322
- VALUE y;
1323
- VALUE fmt = lp->option;
1324
- INIT_COUNTER(lp, i);
1325
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1326
- INIT_PTR(lp, 1, p2, s2);
1327
- if (idx1) {
1328
- for (; i--;) {
1329
- x = (dtype*)(p1 + *idx1);
1330
- idx1++;
1331
- y = format_sfloat(fmt, x);
1332
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1333
- }
1334
- } else {
1335
- for (; i--;) {
1336
- x = (dtype*)p1;
1337
- p1 += s1;
1338
- y = format_sfloat(fmt, x);
1339
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1340
- }
1341
- }
1342
- }
1343
-
1344
- static VALUE sfloat_format(int argc, VALUE* argv, VALUE self) {
1345
- VALUE fmt = Qnil;
1346
-
1347
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
1348
- ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
1349
- ndfunc_t ndf = { iter_sfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
1350
-
1351
- rb_scan_args(argc, argv, "01", &fmt);
1352
- return na_ndloop(&ndf, 2, self, fmt);
1353
- }
1354
-
1355
- static void iter_sfloat_format_to_a(na_loop_t* const lp) {
1356
- size_t i;
1357
- char* p1;
1358
- ssize_t s1;
1359
- size_t* idx1;
1360
- dtype* x;
1361
- VALUE y;
1362
- volatile VALUE a;
1363
- VALUE fmt = lp->option;
1364
- INIT_COUNTER(lp, i);
1365
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1366
- a = rb_ary_new2(i);
1367
- rb_ary_push(lp->args[1].value, a);
1368
- if (idx1) {
1369
- for (; i--;) {
1370
- x = (dtype*)(p1 + *idx1);
1371
- idx1++;
1372
- y = format_sfloat(fmt, x);
1373
- rb_ary_push(a, y);
1374
- }
1375
- } else {
1376
- for (; i--;) {
1377
- x = (dtype*)p1;
1378
- p1 += s1;
1379
- y = format_sfloat(fmt, x);
1380
- rb_ary_push(a, y);
1381
- }
1382
- }
1383
- }
1384
-
1385
- static VALUE sfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
1386
- VALUE fmt = Qnil;
1387
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1388
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1389
- ndfunc_t ndf = { iter_sfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1390
-
1391
- rb_scan_args(argc, argv, "01", &fmt);
1392
- return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
1393
- }
1394
-
1395
- static VALUE iter_sfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
1396
- return format_sfloat(fmt, (dtype*)(ptr + pos));
1397
- }
1398
-
1399
- static VALUE sfloat_inspect(VALUE ary) {
1400
- return na_ndloop_inspect(ary, iter_sfloat_inspect, Qnil);
1401
- }
1402
-
1403
1343
  static void iter_sfloat_each(na_loop_t* const lp) {
1404
1344
  size_t i, s1;
1405
1345
  char* p1;
@@ -1682,1173 +1622,300 @@ static VALUE sfloat_abs(VALUE self) {
1682
1622
  return na_ndloop(&ndf, 1, self);
1683
1623
  }
1684
1624
 
1685
- #define check_intdivzero(y) \
1686
- {}
1687
-
1688
- static void iter_sfloat_add(na_loop_t* const lp) {
1689
- size_t i = 0;
1690
- size_t n;
1625
+ static void iter_sfloat_pow(na_loop_t* const lp) {
1626
+ size_t i;
1691
1627
  char *p1, *p2, *p3;
1692
1628
  ssize_t s1, s2, s3;
1693
-
1694
- #ifdef __SSE2__
1695
- size_t cnt;
1696
- size_t cnt_simd_loop = -1;
1697
-
1698
- __m128 a;
1699
- __m128 b;
1700
-
1701
- size_t num_pack; // Number of elements packed for SIMD.
1702
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1703
- #endif
1704
- INIT_COUNTER(lp, n);
1629
+ dtype x, y;
1630
+ INIT_COUNTER(lp, i);
1705
1631
  INIT_PTR(lp, 0, p1, s1);
1706
1632
  INIT_PTR(lp, 1, p2, s2);
1707
1633
  INIT_PTR(lp, 2, p3, s3);
1708
-
1709
- //
1710
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1711
- is_aligned(p3, sizeof(dtype))) {
1712
-
1713
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1714
- #ifdef __SSE2__
1715
- // Check number of elements. & Check same alignment.
1716
- if ((n >= num_pack) &&
1717
- is_same_aligned3(
1718
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1719
- )) {
1720
- // Calculate up to the position just before the start of SIMD computation.
1721
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1722
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1723
- );
1724
- #endif
1725
- if (p1 == p3) { // inplace case
1726
- #ifdef __SSE2__
1727
- for (; i < cnt; i++) {
1728
- #else
1729
- for (; i < n; i++) {
1730
- check_intdivzero(((dtype*)p2)[i]);
1731
- #endif
1732
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1733
- }
1734
- } else {
1735
- #ifdef __SSE2__
1736
- for (; i < cnt; i++) {
1737
- #else
1738
- for (; i < n; i++) {
1739
- check_intdivzero(((dtype*)p2)[i]);
1740
- #endif
1741
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1742
- }
1743
- }
1744
-
1745
- #ifdef __SSE2__
1746
- // Get the count of SIMD computation loops.
1747
- cnt_simd_loop = (n - i) % num_pack;
1748
-
1749
- // SIMD computation.
1750
- if (p1 == p3) { // inplace case
1751
- for (; i < n - cnt_simd_loop; i += num_pack) {
1752
- a = _mm_load_ps(&((dtype*)p1)[i]);
1753
- b = _mm_load_ps(&((dtype*)p2)[i]);
1754
- a = _mm_add_ps(a, b);
1755
- _mm_store_ps(&((dtype*)p1)[i], a);
1756
- }
1757
- } else {
1758
- for (; i < n - cnt_simd_loop; i += num_pack) {
1759
- a = _mm_load_ps(&((dtype*)p1)[i]);
1760
- b = _mm_load_ps(&((dtype*)p2)[i]);
1761
- a = _mm_add_ps(a, b);
1762
- _mm_stream_ps(&((dtype*)p3)[i], a);
1763
- }
1764
- }
1765
- }
1766
-
1767
- // Compute the remainder of the SIMD operation.
1768
- if (cnt_simd_loop != 0) {
1769
- if (p1 == p3) { // inplace case
1770
- for (; i < n; i++) {
1771
- check_intdivzero(((dtype*)p2)[i]);
1772
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1773
- }
1774
- } else {
1775
- for (; i < n; i++) {
1776
- check_intdivzero(((dtype*)p2)[i]);
1777
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1778
- }
1779
- }
1780
- }
1781
- #endif
1782
- return;
1783
- }
1784
-
1785
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
1786
- is_aligned_step(s3, sizeof(dtype))) {
1787
- //
1788
-
1789
- if (s2 == 0) { // Broadcasting from scalar value.
1790
- check_intdivzero(*(dtype*)p2);
1791
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
1792
- #ifdef __SSE2__
1793
- // Broadcast a scalar value and use it for SIMD computation.
1794
- b = _mm_load1_ps(&((dtype*)p2)[0]);
1795
-
1796
- // Check number of elements. & Check same alignment.
1797
- if ((n >= num_pack) &&
1798
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
1799
- // Calculate up to the position just before the start of SIMD computation.
1800
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1801
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1802
- );
1803
- #endif
1804
- if (p1 == p3) { // inplace case
1805
- #ifdef __SSE2__
1806
- for (; i < cnt; i++) {
1807
- #else
1808
- for (; i < n; i++) {
1809
- #endif
1810
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1811
- }
1812
- } else {
1813
- #ifdef __SSE2__
1814
- for (; i < cnt; i++) {
1815
- #else
1816
- for (; i < n; i++) {
1817
- #endif
1818
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1819
- }
1820
- }
1821
-
1822
- #ifdef __SSE2__
1823
- // Get the count of SIMD computation loops.
1824
- cnt_simd_loop = (n - i) % num_pack;
1825
-
1826
- // SIMD computation.
1827
- if (p1 == p3) { // inplace case
1828
- for (; i < n - cnt_simd_loop; i += num_pack) {
1829
- a = _mm_load_ps(&((dtype*)p1)[i]);
1830
- a = _mm_add_ps(a, b);
1831
- _mm_store_ps(&((dtype*)p1)[i], a);
1832
- }
1833
- } else {
1834
- for (; i < n - cnt_simd_loop; i += num_pack) {
1835
- a = _mm_load_ps(&((dtype*)p1)[i]);
1836
- a = _mm_add_ps(a, b);
1837
- _mm_stream_ps(&((dtype*)p3)[i], a);
1838
- }
1839
- }
1840
- }
1841
-
1842
- // Compute the remainder of the SIMD operation.
1843
- if (cnt_simd_loop != 0) {
1844
- if (p1 == p3) { // inplace case
1845
- for (; i < n; i++) {
1846
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1847
- }
1848
- } else {
1849
- for (; i < n; i++) {
1850
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1851
- }
1852
- }
1853
- }
1854
- #endif
1855
- } else {
1856
- for (i = 0; i < n; i++) {
1857
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1858
- p1 += s1;
1859
- p3 += s3;
1860
- }
1861
- }
1862
- } else {
1863
- if (p1 == p3) { // inplace case
1864
- for (i = 0; i < n; i++) {
1865
- check_intdivzero(*(dtype*)p2);
1866
- *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
1867
- p1 += s1;
1868
- p2 += s2;
1869
- }
1870
- } else {
1871
- for (i = 0; i < n; i++) {
1872
- check_intdivzero(*(dtype*)p2);
1873
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1874
- p1 += s1;
1875
- p2 += s2;
1876
- p3 += s3;
1877
- }
1878
- }
1879
- }
1880
-
1881
- return;
1882
- //
1883
- }
1884
- }
1885
- for (i = 0; i < n; i++) {
1886
- dtype x, y, z;
1634
+ for (; i--;) {
1887
1635
  GET_DATA_STRIDE(p1, s1, dtype, x);
1888
1636
  GET_DATA_STRIDE(p2, s2, dtype, y);
1889
- check_intdivzero(y);
1890
- z = m_add(x, y);
1891
- SET_DATA_STRIDE(p3, s3, dtype, z);
1637
+ x = m_pow(x, y);
1638
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1639
+ }
1640
+ }
1641
+
1642
+ static void iter_sfloat_pow_int32(na_loop_t* const lp) {
1643
+ size_t i;
1644
+ char *p1, *p2, *p3;
1645
+ ssize_t s1, s2, s3;
1646
+ dtype x;
1647
+ int32_t y;
1648
+ INIT_COUNTER(lp, i);
1649
+ INIT_PTR(lp, 0, p1, s1);
1650
+ INIT_PTR(lp, 1, p2, s2);
1651
+ INIT_PTR(lp, 2, p3, s3);
1652
+ for (; i--;) {
1653
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1654
+ GET_DATA_STRIDE(p2, s2, int32_t, y);
1655
+ x = m_pow_int(x, y);
1656
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1892
1657
  }
1893
- //
1894
1658
  }
1895
- #undef check_intdivzero
1896
1659
 
1897
- static VALUE sfloat_add_self(VALUE self, VALUE other) {
1660
+ static VALUE sfloat_pow_self(VALUE self, VALUE other) {
1898
1661
  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1662
+ ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
1899
1663
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1900
- ndfunc_t ndf = { iter_sfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
1664
+ ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
1665
+ ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
1901
1666
 
1902
- return na_ndloop(&ndf, 2, self, other);
1667
+ // fixme : use na.integer?
1668
+ if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
1669
+ return na_ndloop(&ndf_i, 2, self, other);
1670
+ } else {
1671
+ return na_ndloop(&ndf, 2, self, other);
1672
+ }
1903
1673
  }
1904
1674
 
1905
- static VALUE sfloat_add(VALUE self, VALUE other) {
1675
+ static VALUE sfloat_pow(VALUE self, VALUE other) {
1906
1676
 
1907
1677
  VALUE klass, v;
1908
-
1909
1678
  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
1910
1679
  if (klass == cT) {
1911
- return sfloat_add_self(self, other);
1680
+ return sfloat_pow_self(self, other);
1912
1681
  } else {
1913
1682
  v = rb_funcall(klass, id_cast, 1, self);
1914
- return rb_funcall(v, '+', 1, other);
1683
+ return rb_funcall(v, id_pow, 1, other);
1915
1684
  }
1916
1685
  }
1917
1686
 
1918
- #define check_intdivzero(y) \
1919
- {}
1920
-
1921
- static void iter_sfloat_sub(na_loop_t* const lp) {
1922
- size_t i = 0;
1923
- size_t n;
1924
- char *p1, *p2, *p3;
1925
- ssize_t s1, s2, s3;
1926
-
1927
- #ifdef __SSE2__
1928
- size_t cnt;
1929
- size_t cnt_simd_loop = -1;
1930
-
1931
- __m128 a;
1932
- __m128 b;
1687
+ static void iter_sfloat_minus(na_loop_t* const lp) {
1688
+ size_t i, n;
1689
+ char *p1, *p2;
1690
+ ssize_t s1, s2;
1691
+ size_t *idx1, *idx2;
1692
+ dtype x;
1933
1693
 
1934
- size_t num_pack; // Number of elements packed for SIMD.
1935
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1936
- #endif
1937
1694
  INIT_COUNTER(lp, n);
1938
- INIT_PTR(lp, 0, p1, s1);
1939
- INIT_PTR(lp, 1, p2, s2);
1940
- INIT_PTR(lp, 2, p3, s3);
1941
-
1942
- //
1943
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1944
- is_aligned(p3, sizeof(dtype))) {
1945
-
1946
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1947
- #ifdef __SSE2__
1948
- // Check number of elements. & Check same alignment.
1949
- if ((n >= num_pack) &&
1950
- is_same_aligned3(
1951
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1952
- )) {
1953
- // Calculate up to the position just before the start of SIMD computation.
1954
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1955
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1956
- );
1957
- #endif
1958
- if (p1 == p3) { // inplace case
1959
- #ifdef __SSE2__
1960
- for (; i < cnt; i++) {
1961
- #else
1962
- for (; i < n; i++) {
1963
- check_intdivzero(((dtype*)p2)[i]);
1964
- #endif
1965
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
1966
- }
1967
- } else {
1968
- #ifdef __SSE2__
1969
- for (; i < cnt; i++) {
1970
- #else
1971
- for (; i < n; i++) {
1972
- check_intdivzero(((dtype*)p2)[i]);
1973
- #endif
1974
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
1975
- }
1976
- }
1977
-
1978
- #ifdef __SSE2__
1979
- // Get the count of SIMD computation loops.
1980
- cnt_simd_loop = (n - i) % num_pack;
1695
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1696
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
1981
1697
 
1982
- // SIMD computation.
1983
- if (p1 == p3) { // inplace case
1984
- for (; i < n - cnt_simd_loop; i += num_pack) {
1985
- a = _mm_load_ps(&((dtype*)p1)[i]);
1986
- b = _mm_load_ps(&((dtype*)p2)[i]);
1987
- a = _mm_sub_ps(a, b);
1988
- _mm_store_ps(&((dtype*)p1)[i], a);
1989
- }
1990
- } else {
1991
- for (; i < n - cnt_simd_loop; i += num_pack) {
1992
- a = _mm_load_ps(&((dtype*)p1)[i]);
1993
- b = _mm_load_ps(&((dtype*)p2)[i]);
1994
- a = _mm_sub_ps(a, b);
1995
- _mm_stream_ps(&((dtype*)p3)[i], a);
1996
- }
1997
- }
1698
+ if (idx1) {
1699
+ if (idx2) {
1700
+ for (i = 0; i < n; i++) {
1701
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1702
+ x = m_minus(x);
1703
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1998
1704
  }
1999
-
2000
- // Compute the remainder of the SIMD operation.
2001
- if (cnt_simd_loop != 0) {
2002
- if (p1 == p3) { // inplace case
2003
- for (; i < n; i++) {
2004
- check_intdivzero(((dtype*)p2)[i]);
2005
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2006
- }
2007
- } else {
2008
- for (; i < n; i++) {
2009
- check_intdivzero(((dtype*)p2)[i]);
2010
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2011
- }
2012
- }
1705
+ } else {
1706
+ for (i = 0; i < n; i++) {
1707
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1708
+ x = m_minus(x);
1709
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2013
1710
  }
2014
- #endif
2015
- return;
2016
1711
  }
2017
-
2018
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2019
- is_aligned_step(s3, sizeof(dtype))) {
1712
+ } else {
1713
+ if (idx2) {
1714
+ for (i = 0; i < n; i++) {
1715
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1716
+ x = m_minus(x);
1717
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1718
+ }
1719
+ } else {
2020
1720
  //
2021
-
2022
- if (s2 == 0) { // Broadcasting from scalar value.
2023
- check_intdivzero(*(dtype*)p2);
2024
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2025
- #ifdef __SSE2__
2026
- // Broadcast a scalar value and use it for SIMD computation.
2027
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2028
-
2029
- // Check number of elements. & Check same alignment.
2030
- if ((n >= num_pack) &&
2031
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2032
- // Calculate up to the position just before the start of SIMD computation.
2033
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2034
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2035
- );
2036
- #endif
2037
- if (p1 == p3) { // inplace case
2038
- #ifdef __SSE2__
2039
- for (; i < cnt; i++) {
2040
- #else
2041
- for (; i < n; i++) {
2042
- #endif
2043
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2044
- }
2045
- } else {
2046
- #ifdef __SSE2__
2047
- for (; i < cnt; i++) {
2048
- #else
2049
- for (; i < n; i++) {
2050
- #endif
2051
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2052
- }
2053
- }
2054
-
2055
- #ifdef __SSE2__
2056
- // Get the count of SIMD computation loops.
2057
- cnt_simd_loop = (n - i) % num_pack;
2058
-
2059
- // SIMD computation.
2060
- if (p1 == p3) { // inplace case
2061
- for (; i < n - cnt_simd_loop; i += num_pack) {
2062
- a = _mm_load_ps(&((dtype*)p1)[i]);
2063
- a = _mm_sub_ps(a, b);
2064
- _mm_store_ps(&((dtype*)p1)[i], a);
2065
- }
2066
- } else {
2067
- for (; i < n - cnt_simd_loop; i += num_pack) {
2068
- a = _mm_load_ps(&((dtype*)p1)[i]);
2069
- a = _mm_sub_ps(a, b);
2070
- _mm_stream_ps(&((dtype*)p3)[i], a);
2071
- }
2072
- }
2073
- }
2074
-
2075
- // Compute the remainder of the SIMD operation.
2076
- if (cnt_simd_loop != 0) {
2077
- if (p1 == p3) { // inplace case
2078
- for (; i < n; i++) {
2079
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2080
- }
2081
- } else {
2082
- for (; i < n; i++) {
2083
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2084
- }
2085
- }
2086
- }
2087
- #endif
2088
- } else {
1721
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1722
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2089
1723
  for (i = 0; i < n; i++) {
2090
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2091
- p1 += s1;
2092
- p3 += s3;
1724
+ ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2093
1725
  }
1726
+ return;
2094
1727
  }
2095
- } else {
2096
- if (p1 == p3) { // inplace case
2097
- for (i = 0; i < n; i++) {
2098
- check_intdivzero(*(dtype*)p2);
2099
- *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
2100
- p1 += s1;
2101
- p2 += s2;
2102
- }
2103
- } else {
1728
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1729
+ //
2104
1730
  for (i = 0; i < n; i++) {
2105
- check_intdivzero(*(dtype*)p2);
2106
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
1731
+ *(dtype*)p2 = m_minus(*(dtype*)p1);
2107
1732
  p1 += s1;
2108
1733
  p2 += s2;
2109
- p3 += s3;
2110
1734
  }
1735
+ return;
1736
+ //
2111
1737
  }
2112
1738
  }
2113
-
2114
- return;
1739
+ for (i = 0; i < n; i++) {
1740
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1741
+ x = m_minus(x);
1742
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1743
+ }
2115
1744
  //
2116
1745
  }
2117
1746
  }
2118
- for (i = 0; i < n; i++) {
2119
- dtype x, y, z;
2120
- GET_DATA_STRIDE(p1, s1, dtype, x);
2121
- GET_DATA_STRIDE(p2, s2, dtype, y);
2122
- check_intdivzero(y);
2123
- z = m_sub(x, y);
2124
- SET_DATA_STRIDE(p3, s3, dtype, z);
2125
- }
2126
- //
2127
1747
  }
2128
- #undef check_intdivzero
2129
1748
 
2130
- static VALUE sfloat_sub_self(VALUE self, VALUE other) {
2131
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1749
+ static VALUE sfloat_minus(VALUE self) {
1750
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2132
1751
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2133
- ndfunc_t ndf = { iter_sfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
2134
-
2135
- return na_ndloop(&ndf, 2, self, other);
2136
- }
2137
-
2138
- static VALUE sfloat_sub(VALUE self, VALUE other) {
2139
-
2140
- VALUE klass, v;
1752
+ ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2141
1753
 
2142
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2143
- if (klass == cT) {
2144
- return sfloat_sub_self(self, other);
2145
- } else {
2146
- v = rb_funcall(klass, id_cast, 1, self);
2147
- return rb_funcall(v, '-', 1, other);
2148
- }
1754
+ return na_ndloop(&ndf, 1, self);
2149
1755
  }
2150
1756
 
2151
- #define check_intdivzero(y) \
2152
- {}
2153
-
2154
- static void iter_sfloat_mul(na_loop_t* const lp) {
2155
- size_t i = 0;
2156
- size_t n;
2157
- char *p1, *p2, *p3;
2158
- ssize_t s1, s2, s3;
2159
-
2160
- #ifdef __SSE2__
2161
- size_t cnt;
2162
- size_t cnt_simd_loop = -1;
2163
-
2164
- __m128 a;
2165
- __m128 b;
1757
+ static void iter_sfloat_reciprocal(na_loop_t* const lp) {
1758
+ size_t i, n;
1759
+ char *p1, *p2;
1760
+ ssize_t s1, s2;
1761
+ size_t *idx1, *idx2;
1762
+ dtype x;
2166
1763
 
2167
- size_t num_pack; // Number of elements packed for SIMD.
2168
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2169
- #endif
2170
1764
  INIT_COUNTER(lp, n);
2171
- INIT_PTR(lp, 0, p1, s1);
2172
- INIT_PTR(lp, 1, p2, s2);
2173
- INIT_PTR(lp, 2, p3, s3);
2174
-
2175
- //
2176
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2177
- is_aligned(p3, sizeof(dtype))) {
1765
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1766
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2178
1767
 
2179
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2180
- #ifdef __SSE2__
2181
- // Check number of elements. & Check same alignment.
2182
- if ((n >= num_pack) &&
2183
- is_same_aligned3(
2184
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2185
- )) {
2186
- // Calculate up to the position just before the start of SIMD computation.
2187
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2188
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2189
- );
2190
- #endif
2191
- if (p1 == p3) { // inplace case
2192
- #ifdef __SSE2__
2193
- for (; i < cnt; i++) {
2194
- #else
2195
- for (; i < n; i++) {
2196
- check_intdivzero(((dtype*)p2)[i]);
2197
- #endif
2198
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2199
- }
2200
- } else {
2201
- #ifdef __SSE2__
2202
- for (; i < cnt; i++) {
2203
- #else
2204
- for (; i < n; i++) {
2205
- check_intdivzero(((dtype*)p2)[i]);
2206
- #endif
2207
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
1768
+ if (idx1) {
1769
+ if (idx2) {
1770
+ for (i = 0; i < n; i++) {
1771
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1772
+ x = m_reciprocal(x);
1773
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1774
+ }
1775
+ } else {
1776
+ for (i = 0; i < n; i++) {
1777
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1778
+ x = m_reciprocal(x);
1779
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1780
+ }
1781
+ }
1782
+ } else {
1783
+ if (idx2) {
1784
+ for (i = 0; i < n; i++) {
1785
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1786
+ x = m_reciprocal(x);
1787
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1788
+ }
1789
+ } else {
1790
+ //
1791
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1792
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
1793
+ for (i = 0; i < n; i++) {
1794
+ ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2208
1795
  }
1796
+ return;
2209
1797
  }
2210
-
2211
- #ifdef __SSE2__
2212
- // Get the count of SIMD computation loops.
2213
- cnt_simd_loop = (n - i) % num_pack;
2214
-
2215
- // SIMD computation.
2216
- if (p1 == p3) { // inplace case
2217
- for (; i < n - cnt_simd_loop; i += num_pack) {
2218
- a = _mm_load_ps(&((dtype*)p1)[i]);
2219
- b = _mm_load_ps(&((dtype*)p2)[i]);
2220
- a = _mm_mul_ps(a, b);
2221
- _mm_store_ps(&((dtype*)p1)[i], a);
2222
- }
2223
- } else {
2224
- for (; i < n - cnt_simd_loop; i += num_pack) {
2225
- a = _mm_load_ps(&((dtype*)p1)[i]);
2226
- b = _mm_load_ps(&((dtype*)p2)[i]);
2227
- a = _mm_mul_ps(a, b);
2228
- _mm_stream_ps(&((dtype*)p3)[i], a);
1798
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1799
+ //
1800
+ for (i = 0; i < n; i++) {
1801
+ *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
1802
+ p1 += s1;
1803
+ p2 += s2;
2229
1804
  }
1805
+ return;
1806
+ //
2230
1807
  }
2231
1808
  }
2232
-
2233
- // Compute the remainder of the SIMD operation.
2234
- if (cnt_simd_loop != 0) {
2235
- if (p1 == p3) { // inplace case
2236
- for (; i < n; i++) {
2237
- check_intdivzero(((dtype*)p2)[i]);
2238
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2239
- }
2240
- } else {
2241
- for (; i < n; i++) {
2242
- check_intdivzero(((dtype*)p2)[i]);
2243
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2244
- }
2245
- }
1809
+ for (i = 0; i < n; i++) {
1810
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1811
+ x = m_reciprocal(x);
1812
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2246
1813
  }
2247
- #endif
2248
- return;
2249
- }
2250
-
2251
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2252
- is_aligned_step(s3, sizeof(dtype))) {
2253
1814
  //
1815
+ }
1816
+ }
1817
+ }
2254
1818
 
2255
- if (s2 == 0) { // Broadcasting from scalar value.
2256
- check_intdivzero(*(dtype*)p2);
2257
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2258
- #ifdef __SSE2__
2259
- // Broadcast a scalar value and use it for SIMD computation.
2260
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2261
-
2262
- // Check number of elements. & Check same alignment.
2263
- if ((n >= num_pack) &&
2264
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2265
- // Calculate up to the position just before the start of SIMD computation.
2266
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2267
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2268
- );
2269
- #endif
2270
- if (p1 == p3) { // inplace case
2271
- #ifdef __SSE2__
2272
- for (; i < cnt; i++) {
2273
- #else
2274
- for (; i < n; i++) {
2275
- #endif
2276
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2277
- }
2278
- } else {
2279
- #ifdef __SSE2__
2280
- for (; i < cnt; i++) {
2281
- #else
2282
- for (; i < n; i++) {
2283
- #endif
2284
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2285
- }
2286
- }
2287
-
2288
- #ifdef __SSE2__
2289
- // Get the count of SIMD computation loops.
2290
- cnt_simd_loop = (n - i) % num_pack;
2291
-
2292
- // SIMD computation.
2293
- if (p1 == p3) { // inplace case
2294
- for (; i < n - cnt_simd_loop; i += num_pack) {
2295
- a = _mm_load_ps(&((dtype*)p1)[i]);
2296
- a = _mm_mul_ps(a, b);
2297
- _mm_store_ps(&((dtype*)p1)[i], a);
2298
- }
2299
- } else {
2300
- for (; i < n - cnt_simd_loop; i += num_pack) {
2301
- a = _mm_load_ps(&((dtype*)p1)[i]);
2302
- a = _mm_mul_ps(a, b);
2303
- _mm_stream_ps(&((dtype*)p3)[i], a);
2304
- }
2305
- }
2306
- }
2307
-
2308
- // Compute the remainder of the SIMD operation.
2309
- if (cnt_simd_loop != 0) {
2310
- if (p1 == p3) { // inplace case
2311
- for (; i < n; i++) {
2312
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2313
- }
2314
- } else {
2315
- for (; i < n; i++) {
2316
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2317
- }
2318
- }
2319
- }
2320
- #endif
2321
- } else {
2322
- for (i = 0; i < n; i++) {
2323
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2324
- p1 += s1;
2325
- p3 += s3;
2326
- }
2327
- }
2328
- } else {
2329
- if (p1 == p3) { // inplace case
2330
- for (i = 0; i < n; i++) {
2331
- check_intdivzero(*(dtype*)p2);
2332
- *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
2333
- p1 += s1;
2334
- p2 += s2;
2335
- }
2336
- } else {
2337
- for (i = 0; i < n; i++) {
2338
- check_intdivzero(*(dtype*)p2);
2339
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2340
- p1 += s1;
2341
- p2 += s2;
2342
- p3 += s3;
2343
- }
2344
- }
2345
- }
2346
-
2347
- return;
2348
- //
2349
- }
2350
- }
2351
- for (i = 0; i < n; i++) {
2352
- dtype x, y, z;
2353
- GET_DATA_STRIDE(p1, s1, dtype, x);
2354
- GET_DATA_STRIDE(p2, s2, dtype, y);
2355
- check_intdivzero(y);
2356
- z = m_mul(x, y);
2357
- SET_DATA_STRIDE(p3, s3, dtype, z);
2358
- }
2359
- //
2360
- }
2361
- #undef check_intdivzero
2362
-
2363
- static VALUE sfloat_mul_self(VALUE self, VALUE other) {
2364
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1819
+ static VALUE sfloat_reciprocal(VALUE self) {
1820
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2365
1821
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2366
- ndfunc_t ndf = { iter_sfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
2367
-
2368
- return na_ndloop(&ndf, 2, self, other);
2369
- }
2370
-
2371
- static VALUE sfloat_mul(VALUE self, VALUE other) {
2372
-
2373
- VALUE klass, v;
1822
+ ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
2374
1823
 
2375
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2376
- if (klass == cT) {
2377
- return sfloat_mul_self(self, other);
2378
- } else {
2379
- v = rb_funcall(klass, id_cast, 1, self);
2380
- return rb_funcall(v, '*', 1, other);
2381
- }
1824
+ return na_ndloop(&ndf, 1, self);
2382
1825
  }
2383
1826
 
2384
- #define check_intdivzero(y) \
2385
- {}
2386
-
2387
- static void iter_sfloat_div(na_loop_t* const lp) {
2388
- size_t i = 0;
2389
- size_t n;
2390
- char *p1, *p2, *p3;
2391
- ssize_t s1, s2, s3;
2392
-
2393
- #ifdef __SSE2__
2394
- size_t cnt;
2395
- size_t cnt_simd_loop = -1;
2396
-
2397
- __m128 a;
2398
- __m128 b;
1827
+ static void iter_sfloat_sign(na_loop_t* const lp) {
1828
+ size_t i, n;
1829
+ char *p1, *p2;
1830
+ ssize_t s1, s2;
1831
+ size_t *idx1, *idx2;
1832
+ dtype x;
2399
1833
 
2400
- size_t num_pack; // Number of elements packed for SIMD.
2401
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2402
- #endif
2403
1834
  INIT_COUNTER(lp, n);
2404
- INIT_PTR(lp, 0, p1, s1);
2405
- INIT_PTR(lp, 1, p2, s2);
2406
- INIT_PTR(lp, 2, p3, s3);
2407
-
2408
- //
2409
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2410
- is_aligned(p3, sizeof(dtype))) {
2411
-
2412
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2413
- #ifdef __SSE2__
2414
- // Check number of elements. & Check same alignment.
2415
- if ((n >= num_pack) &&
2416
- is_same_aligned3(
2417
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2418
- )) {
2419
- // Calculate up to the position just before the start of SIMD computation.
2420
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2421
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2422
- );
2423
- #endif
2424
- if (p1 == p3) { // inplace case
2425
- #ifdef __SSE2__
2426
- for (; i < cnt; i++) {
2427
- #else
2428
- for (; i < n; i++) {
2429
- check_intdivzero(((dtype*)p2)[i]);
2430
- #endif
2431
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2432
- }
2433
- } else {
2434
- #ifdef __SSE2__
2435
- for (; i < cnt; i++) {
2436
- #else
2437
- for (; i < n; i++) {
2438
- check_intdivzero(((dtype*)p2)[i]);
2439
- #endif
2440
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2441
- }
2442
- }
2443
-
2444
- #ifdef __SSE2__
2445
- // Get the count of SIMD computation loops.
2446
- cnt_simd_loop = (n - i) % num_pack;
1835
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1836
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2447
1837
 
2448
- // SIMD computation.
2449
- if (p1 == p3) { // inplace case
2450
- for (; i < n - cnt_simd_loop; i += num_pack) {
2451
- a = _mm_load_ps(&((dtype*)p1)[i]);
2452
- b = _mm_load_ps(&((dtype*)p2)[i]);
2453
- a = _mm_div_ps(a, b);
2454
- _mm_store_ps(&((dtype*)p1)[i], a);
2455
- }
2456
- } else {
2457
- for (; i < n - cnt_simd_loop; i += num_pack) {
2458
- a = _mm_load_ps(&((dtype*)p1)[i]);
2459
- b = _mm_load_ps(&((dtype*)p2)[i]);
2460
- a = _mm_div_ps(a, b);
2461
- _mm_stream_ps(&((dtype*)p3)[i], a);
2462
- }
2463
- }
1838
+ if (idx1) {
1839
+ if (idx2) {
1840
+ for (i = 0; i < n; i++) {
1841
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1842
+ x = m_sign(x);
1843
+ SET_DATA_INDEX(p2, idx2, dtype, x);
2464
1844
  }
2465
-
2466
- // Compute the remainder of the SIMD operation.
2467
- if (cnt_simd_loop != 0) {
2468
- if (p1 == p3) { // inplace case
2469
- for (; i < n; i++) {
2470
- check_intdivzero(((dtype*)p2)[i]);
2471
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2472
- }
2473
- } else {
2474
- for (; i < n; i++) {
2475
- check_intdivzero(((dtype*)p2)[i]);
2476
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2477
- }
2478
- }
1845
+ } else {
1846
+ for (i = 0; i < n; i++) {
1847
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1848
+ x = m_sign(x);
1849
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2479
1850
  }
2480
- #endif
2481
- return;
2482
1851
  }
2483
-
2484
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2485
- is_aligned_step(s3, sizeof(dtype))) {
1852
+ } else {
1853
+ if (idx2) {
1854
+ for (i = 0; i < n; i++) {
1855
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1856
+ x = m_sign(x);
1857
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1858
+ }
1859
+ } else {
2486
1860
  //
2487
-
2488
- if (s2 == 0) { // Broadcasting from scalar value.
2489
- check_intdivzero(*(dtype*)p2);
2490
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2491
- #ifdef __SSE2__
2492
- // Broadcast a scalar value and use it for SIMD computation.
2493
- b = _mm_load1_ps(&((dtype*)p2)[0]);
2494
-
2495
- // Check number of elements. & Check same alignment.
2496
- if ((n >= num_pack) &&
2497
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2498
- // Calculate up to the position just before the start of SIMD computation.
2499
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2500
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2501
- );
2502
- #endif
2503
- if (p1 == p3) { // inplace case
2504
- #ifdef __SSE2__
2505
- for (; i < cnt; i++) {
2506
- #else
2507
- for (; i < n; i++) {
2508
- #endif
2509
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2510
- }
2511
- } else {
2512
- #ifdef __SSE2__
2513
- for (; i < cnt; i++) {
2514
- #else
2515
- for (; i < n; i++) {
2516
- #endif
2517
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2518
- }
2519
- }
2520
-
2521
- #ifdef __SSE2__
2522
- // Get the count of SIMD computation loops.
2523
- cnt_simd_loop = (n - i) % num_pack;
2524
-
2525
- // SIMD computation.
2526
- if (p1 == p3) { // inplace case
2527
- for (; i < n - cnt_simd_loop; i += num_pack) {
2528
- a = _mm_load_ps(&((dtype*)p1)[i]);
2529
- a = _mm_div_ps(a, b);
2530
- _mm_store_ps(&((dtype*)p1)[i], a);
2531
- }
2532
- } else {
2533
- for (; i < n - cnt_simd_loop; i += num_pack) {
2534
- a = _mm_load_ps(&((dtype*)p1)[i]);
2535
- a = _mm_div_ps(a, b);
2536
- _mm_stream_ps(&((dtype*)p3)[i], a);
2537
- }
2538
- }
2539
- }
2540
-
2541
- // Compute the remainder of the SIMD operation.
2542
- if (cnt_simd_loop != 0) {
2543
- if (p1 == p3) { // inplace case
2544
- for (; i < n; i++) {
2545
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2546
- }
2547
- } else {
2548
- for (; i < n; i++) {
2549
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2550
- }
2551
- }
2552
- }
2553
- #endif
2554
- } else {
1861
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1862
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2555
1863
  for (i = 0; i < n; i++) {
2556
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2557
- p1 += s1;
2558
- p3 += s3;
1864
+ ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
2559
1865
  }
1866
+ return;
2560
1867
  }
2561
- } else {
2562
- if (p1 == p3) { // inplace case
2563
- for (i = 0; i < n; i++) {
2564
- check_intdivzero(*(dtype*)p2);
2565
- *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
2566
- p1 += s1;
2567
- p2 += s2;
2568
- }
2569
- } else {
1868
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1869
+ //
2570
1870
  for (i = 0; i < n; i++) {
2571
- check_intdivzero(*(dtype*)p2);
2572
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
1871
+ *(dtype*)p2 = m_sign(*(dtype*)p1);
2573
1872
  p1 += s1;
2574
1873
  p2 += s2;
2575
- p3 += s3;
2576
1874
  }
1875
+ return;
1876
+ //
2577
1877
  }
2578
1878
  }
2579
-
2580
- return;
1879
+ for (i = 0; i < n; i++) {
1880
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1881
+ x = m_sign(x);
1882
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1883
+ }
2581
1884
  //
2582
1885
  }
2583
1886
  }
2584
- for (i = 0; i < n; i++) {
2585
- dtype x, y, z;
2586
- GET_DATA_STRIDE(p1, s1, dtype, x);
2587
- GET_DATA_STRIDE(p2, s2, dtype, y);
2588
- check_intdivzero(y);
2589
- z = m_div(x, y);
2590
- SET_DATA_STRIDE(p3, s3, dtype, z);
2591
- }
2592
- //
2593
1887
  }
2594
- #undef check_intdivzero
2595
1888
 
2596
- static VALUE sfloat_div_self(VALUE self, VALUE other) {
2597
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1889
+ static VALUE sfloat_sign(VALUE self) {
1890
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2598
1891
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2599
- ndfunc_t ndf = { iter_sfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
2600
-
2601
- return na_ndloop(&ndf, 2, self, other);
2602
- }
2603
-
2604
- static VALUE sfloat_div(VALUE self, VALUE other) {
2605
-
2606
- VALUE klass, v;
1892
+ ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
2607
1893
 
2608
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2609
- if (klass == cT) {
2610
- return sfloat_div_self(self, other);
2611
- } else {
2612
- v = rb_funcall(klass, id_cast, 1, self);
2613
- return rb_funcall(v, '/', 1, other);
2614
- }
1894
+ return na_ndloop(&ndf, 1, self);
2615
1895
  }
2616
1896
 
2617
- #define check_intdivzero(y) \
2618
- {}
2619
-
2620
- static void iter_sfloat_mod(na_loop_t* const lp) {
2621
- size_t i = 0;
2622
- size_t n;
2623
- char *p1, *p2, *p3;
2624
- ssize_t s1, s2, s3;
1897
+ static void iter_sfloat_square(na_loop_t* const lp) {
1898
+ size_t i, n;
1899
+ char *p1, *p2;
1900
+ ssize_t s1, s2;
1901
+ size_t *idx1, *idx2;
1902
+ dtype x;
2625
1903
 
2626
1904
  INIT_COUNTER(lp, n);
2627
- INIT_PTR(lp, 0, p1, s1);
2628
- INIT_PTR(lp, 1, p2, s2);
2629
- INIT_PTR(lp, 2, p3, s3);
2630
-
2631
- //
2632
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2633
- is_aligned(p3, sizeof(dtype))) {
2634
-
2635
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2636
- if (p1 == p3) { // inplace case
2637
- for (; i < n; i++) {
2638
- check_intdivzero(((dtype*)p2)[i]);
2639
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2640
- }
2641
- } else {
2642
- for (; i < n; i++) {
2643
- check_intdivzero(((dtype*)p2)[i]);
2644
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2645
- }
2646
- }
2647
- return;
2648
- }
2649
-
2650
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2651
- is_aligned_step(s3, sizeof(dtype))) {
2652
- //
2653
-
2654
- if (s2 == 0) { // Broadcasting from scalar value.
2655
- check_intdivzero(*(dtype*)p2);
2656
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2657
- if (p1 == p3) { // inplace case
2658
- for (; i < n; i++) {
2659
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2660
- }
2661
- } else {
2662
- for (; i < n; i++) {
2663
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2664
- }
2665
- }
2666
- } else {
2667
- for (i = 0; i < n; i++) {
2668
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2669
- p1 += s1;
2670
- p3 += s3;
2671
- }
2672
- }
2673
- } else {
2674
- if (p1 == p3) { // inplace case
2675
- for (i = 0; i < n; i++) {
2676
- check_intdivzero(*(dtype*)p2);
2677
- *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
2678
- p1 += s1;
2679
- p2 += s2;
2680
- }
2681
- } else {
2682
- for (i = 0; i < n; i++) {
2683
- check_intdivzero(*(dtype*)p2);
2684
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2685
- p1 += s1;
2686
- p2 += s2;
2687
- p3 += s3;
2688
- }
2689
- }
2690
- }
2691
-
2692
- return;
2693
- //
2694
- }
2695
- }
2696
- for (i = 0; i < n; i++) {
2697
- dtype x, y, z;
2698
- GET_DATA_STRIDE(p1, s1, dtype, x);
2699
- GET_DATA_STRIDE(p2, s2, dtype, y);
2700
- check_intdivzero(y);
2701
- z = m_mod(x, y);
2702
- SET_DATA_STRIDE(p3, s3, dtype, z);
2703
- }
2704
- //
2705
- }
2706
- #undef check_intdivzero
2707
-
2708
- static VALUE sfloat_mod_self(VALUE self, VALUE other) {
2709
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2710
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2711
- ndfunc_t ndf = { iter_sfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
2712
-
2713
- return na_ndloop(&ndf, 2, self, other);
2714
- }
2715
-
2716
- static VALUE sfloat_mod(VALUE self, VALUE other) {
2717
-
2718
- VALUE klass, v;
2719
-
2720
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2721
- if (klass == cT) {
2722
- return sfloat_mod_self(self, other);
2723
- } else {
2724
- v = rb_funcall(klass, id_cast, 1, self);
2725
- return rb_funcall(v, '%', 1, other);
2726
- }
2727
- }
2728
-
2729
- static void iter_sfloat_divmod(na_loop_t* const lp) {
2730
- size_t i, n;
2731
- char *p1, *p2, *p3, *p4;
2732
- ssize_t s1, s2, s3, s4;
2733
- dtype x, y, a, b;
2734
- INIT_COUNTER(lp, n);
2735
- INIT_PTR(lp, 0, p1, s1);
2736
- INIT_PTR(lp, 1, p2, s2);
2737
- INIT_PTR(lp, 2, p3, s3);
2738
- INIT_PTR(lp, 3, p4, s4);
2739
- for (i = n; i--;) {
2740
- GET_DATA_STRIDE(p1, s1, dtype, x);
2741
- GET_DATA_STRIDE(p2, s2, dtype, y);
2742
- m_divmod(x, y, a, b);
2743
- SET_DATA_STRIDE(p3, s3, dtype, a);
2744
- SET_DATA_STRIDE(p4, s4, dtype, b);
2745
- }
2746
- }
2747
-
2748
- static VALUE sfloat_divmod_self(VALUE self, VALUE other) {
2749
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2750
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2751
- ndfunc_t ndf = { iter_sfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
2752
-
2753
- return na_ndloop(&ndf, 2, self, other);
2754
- }
2755
-
2756
- static VALUE sfloat_divmod(VALUE self, VALUE other) {
2757
-
2758
- VALUE klass, v;
2759
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2760
- if (klass == cT) {
2761
- return sfloat_divmod_self(self, other);
2762
- } else {
2763
- v = rb_funcall(klass, id_cast, 1, self);
2764
- return rb_funcall(v, id_divmod, 1, other);
2765
- }
2766
- }
2767
-
2768
- static void iter_sfloat_pow(na_loop_t* const lp) {
2769
- size_t i;
2770
- char *p1, *p2, *p3;
2771
- ssize_t s1, s2, s3;
2772
- dtype x, y;
2773
- INIT_COUNTER(lp, i);
2774
- INIT_PTR(lp, 0, p1, s1);
2775
- INIT_PTR(lp, 1, p2, s2);
2776
- INIT_PTR(lp, 2, p3, s3);
2777
- for (; i--;) {
2778
- GET_DATA_STRIDE(p1, s1, dtype, x);
2779
- GET_DATA_STRIDE(p2, s2, dtype, y);
2780
- x = m_pow(x, y);
2781
- SET_DATA_STRIDE(p3, s3, dtype, x);
2782
- }
2783
- }
2784
-
2785
- static void iter_sfloat_pow_int32(na_loop_t* const lp) {
2786
- size_t i;
2787
- char *p1, *p2, *p3;
2788
- ssize_t s1, s2, s3;
2789
- dtype x;
2790
- int32_t y;
2791
- INIT_COUNTER(lp, i);
2792
- INIT_PTR(lp, 0, p1, s1);
2793
- INIT_PTR(lp, 1, p2, s2);
2794
- INIT_PTR(lp, 2, p3, s3);
2795
- for (; i--;) {
2796
- GET_DATA_STRIDE(p1, s1, dtype, x);
2797
- GET_DATA_STRIDE(p2, s2, int32_t, y);
2798
- x = m_pow_int(x, y);
2799
- SET_DATA_STRIDE(p3, s3, dtype, x);
2800
- }
2801
- }
2802
-
2803
- static VALUE sfloat_pow_self(VALUE self, VALUE other) {
2804
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2805
- ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
2806
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2807
- ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
2808
- ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
2809
-
2810
- // fixme : use na.integer?
2811
- if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
2812
- return na_ndloop(&ndf_i, 2, self, other);
2813
- } else {
2814
- return na_ndloop(&ndf, 2, self, other);
2815
- }
2816
- }
2817
-
2818
- static VALUE sfloat_pow(VALUE self, VALUE other) {
2819
-
2820
- VALUE klass, v;
2821
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2822
- if (klass == cT) {
2823
- return sfloat_pow_self(self, other);
2824
- } else {
2825
- v = rb_funcall(klass, id_cast, 1, self);
2826
- return rb_funcall(v, id_pow, 1, other);
2827
- }
2828
- }
2829
-
2830
- static void iter_sfloat_minus(na_loop_t* const lp) {
2831
- size_t i, n;
2832
- char *p1, *p2;
2833
- ssize_t s1, s2;
2834
- size_t *idx1, *idx2;
2835
- dtype x;
2836
-
2837
- INIT_COUNTER(lp, n);
2838
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2839
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
1905
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1906
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2840
1907
 
2841
1908
  if (idx1) {
2842
1909
  if (idx2) {
2843
1910
  for (i = 0; i < n; i++) {
2844
1911
  GET_DATA_INDEX(p1, idx1, dtype, x);
2845
- x = m_minus(x);
1912
+ x = m_square(x);
2846
1913
  SET_DATA_INDEX(p2, idx2, dtype, x);
2847
1914
  }
2848
1915
  } else {
2849
1916
  for (i = 0; i < n; i++) {
2850
1917
  GET_DATA_INDEX(p1, idx1, dtype, x);
2851
- x = m_minus(x);
1918
+ x = m_square(x);
2852
1919
  SET_DATA_STRIDE(p2, s2, dtype, x);
2853
1920
  }
2854
1921
  }
@@ -2856,7 +1923,7 @@ static void iter_sfloat_minus(na_loop_t* const lp) {
2856
1923
  if (idx2) {
2857
1924
  for (i = 0; i < n; i++) {
2858
1925
  GET_DATA_STRIDE(p1, s1, dtype, x);
2859
- x = m_minus(x);
1926
+ x = m_square(x);
2860
1927
  SET_DATA_INDEX(p2, idx2, dtype, x);
2861
1928
  }
2862
1929
  } else {
@@ -2864,2187 +1931,45 @@ static void iter_sfloat_minus(na_loop_t* const lp) {
2864
1931
  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2865
1932
  if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2866
1933
  for (i = 0; i < n; i++) {
2867
- ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
1934
+ ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
2868
1935
  }
2869
1936
  return;
2870
1937
  }
2871
1938
  if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2872
1939
  //
2873
1940
  for (i = 0; i < n; i++) {
2874
- *(dtype*)p2 = m_minus(*(dtype*)p1);
1941
+ *(dtype*)p2 = m_square(*(dtype*)p1);
2875
1942
  p1 += s1;
2876
- p2 += s2;
2877
- }
2878
- return;
2879
- //
2880
- }
2881
- }
2882
- for (i = 0; i < n; i++) {
2883
- GET_DATA_STRIDE(p1, s1, dtype, x);
2884
- x = m_minus(x);
2885
- SET_DATA_STRIDE(p2, s2, dtype, x);
2886
- }
2887
- //
2888
- }
2889
- }
2890
- }
2891
-
2892
- static VALUE sfloat_minus(VALUE self) {
2893
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2894
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2895
- ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2896
-
2897
- return na_ndloop(&ndf, 1, self);
2898
- }
2899
-
2900
- static void iter_sfloat_reciprocal(na_loop_t* const lp) {
2901
- size_t i, n;
2902
- char *p1, *p2;
2903
- ssize_t s1, s2;
2904
- size_t *idx1, *idx2;
2905
- dtype x;
2906
-
2907
- INIT_COUNTER(lp, n);
2908
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2909
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2910
-
2911
- if (idx1) {
2912
- if (idx2) {
2913
- for (i = 0; i < n; i++) {
2914
- GET_DATA_INDEX(p1, idx1, dtype, x);
2915
- x = m_reciprocal(x);
2916
- SET_DATA_INDEX(p2, idx2, dtype, x);
2917
- }
2918
- } else {
2919
- for (i = 0; i < n; i++) {
2920
- GET_DATA_INDEX(p1, idx1, dtype, x);
2921
- x = m_reciprocal(x);
2922
- SET_DATA_STRIDE(p2, s2, dtype, x);
2923
- }
2924
- }
2925
- } else {
2926
- if (idx2) {
2927
- for (i = 0; i < n; i++) {
2928
- GET_DATA_STRIDE(p1, s1, dtype, x);
2929
- x = m_reciprocal(x);
2930
- SET_DATA_INDEX(p2, idx2, dtype, x);
2931
- }
2932
- } else {
2933
- //
2934
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2935
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2936
- for (i = 0; i < n; i++) {
2937
- ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2938
- }
2939
- return;
2940
- }
2941
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2942
- //
2943
- for (i = 0; i < n; i++) {
2944
- *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
2945
- p1 += s1;
2946
- p2 += s2;
2947
- }
2948
- return;
2949
- //
2950
- }
2951
- }
2952
- for (i = 0; i < n; i++) {
2953
- GET_DATA_STRIDE(p1, s1, dtype, x);
2954
- x = m_reciprocal(x);
2955
- SET_DATA_STRIDE(p2, s2, dtype, x);
2956
- }
2957
- //
2958
- }
2959
- }
2960
- }
2961
-
2962
- static VALUE sfloat_reciprocal(VALUE self) {
2963
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2964
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2965
- ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
2966
-
2967
- return na_ndloop(&ndf, 1, self);
2968
- }
2969
-
2970
- static void iter_sfloat_sign(na_loop_t* const lp) {
2971
- size_t i, n;
2972
- char *p1, *p2;
2973
- ssize_t s1, s2;
2974
- size_t *idx1, *idx2;
2975
- dtype x;
2976
-
2977
- INIT_COUNTER(lp, n);
2978
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2979
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2980
-
2981
- if (idx1) {
2982
- if (idx2) {
2983
- for (i = 0; i < n; i++) {
2984
- GET_DATA_INDEX(p1, idx1, dtype, x);
2985
- x = m_sign(x);
2986
- SET_DATA_INDEX(p2, idx2, dtype, x);
2987
- }
2988
- } else {
2989
- for (i = 0; i < n; i++) {
2990
- GET_DATA_INDEX(p1, idx1, dtype, x);
2991
- x = m_sign(x);
2992
- SET_DATA_STRIDE(p2, s2, dtype, x);
2993
- }
2994
- }
2995
- } else {
2996
- if (idx2) {
2997
- for (i = 0; i < n; i++) {
2998
- GET_DATA_STRIDE(p1, s1, dtype, x);
2999
- x = m_sign(x);
3000
- SET_DATA_INDEX(p2, idx2, dtype, x);
3001
- }
3002
- } else {
3003
- //
3004
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3005
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3006
- for (i = 0; i < n; i++) {
3007
- ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
3008
- }
3009
- return;
3010
- }
3011
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3012
- //
3013
- for (i = 0; i < n; i++) {
3014
- *(dtype*)p2 = m_sign(*(dtype*)p1);
3015
- p1 += s1;
3016
- p2 += s2;
3017
- }
3018
- return;
3019
- //
3020
- }
3021
- }
3022
- for (i = 0; i < n; i++) {
3023
- GET_DATA_STRIDE(p1, s1, dtype, x);
3024
- x = m_sign(x);
3025
- SET_DATA_STRIDE(p2, s2, dtype, x);
3026
- }
3027
- //
3028
- }
3029
- }
3030
- }
3031
-
3032
- static VALUE sfloat_sign(VALUE self) {
3033
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3034
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3035
- ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3036
-
3037
- return na_ndloop(&ndf, 1, self);
3038
- }
3039
-
3040
- static void iter_sfloat_square(na_loop_t* const lp) {
3041
- size_t i, n;
3042
- char *p1, *p2;
3043
- ssize_t s1, s2;
3044
- size_t *idx1, *idx2;
3045
- dtype x;
3046
-
3047
- INIT_COUNTER(lp, n);
3048
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3049
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3050
-
3051
- if (idx1) {
3052
- if (idx2) {
3053
- for (i = 0; i < n; i++) {
3054
- GET_DATA_INDEX(p1, idx1, dtype, x);
3055
- x = m_square(x);
3056
- SET_DATA_INDEX(p2, idx2, dtype, x);
3057
- }
3058
- } else {
3059
- for (i = 0; i < n; i++) {
3060
- GET_DATA_INDEX(p1, idx1, dtype, x);
3061
- x = m_square(x);
3062
- SET_DATA_STRIDE(p2, s2, dtype, x);
3063
- }
3064
- }
3065
- } else {
3066
- if (idx2) {
3067
- for (i = 0; i < n; i++) {
3068
- GET_DATA_STRIDE(p1, s1, dtype, x);
3069
- x = m_square(x);
3070
- SET_DATA_INDEX(p2, idx2, dtype, x);
3071
- }
3072
- } else {
3073
- //
3074
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3075
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3076
- for (i = 0; i < n; i++) {
3077
- ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
3078
- }
3079
- return;
3080
- }
3081
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3082
- //
3083
- for (i = 0; i < n; i++) {
3084
- *(dtype*)p2 = m_square(*(dtype*)p1);
3085
- p1 += s1;
3086
- p2 += s2;
3087
- }
3088
- return;
3089
- //
3090
- }
3091
- }
3092
- for (i = 0; i < n; i++) {
3093
- GET_DATA_STRIDE(p1, s1, dtype, x);
3094
- x = m_square(x);
3095
- SET_DATA_STRIDE(p2, s2, dtype, x);
3096
- }
3097
- //
3098
- }
3099
- }
3100
- }
3101
-
3102
- static VALUE sfloat_square(VALUE self) {
3103
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3104
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3105
- ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
3106
-
3107
- return na_ndloop(&ndf, 1, self);
3108
- }
3109
-
3110
- static void iter_sfloat_eq(na_loop_t* const lp) {
3111
- size_t i;
3112
- char *p1, *p2;
3113
- BIT_DIGIT* a3;
3114
- size_t p3;
3115
- ssize_t s1, s2, s3;
3116
- dtype x, y;
3117
- BIT_DIGIT b;
3118
- INIT_COUNTER(lp, i);
3119
- INIT_PTR(lp, 0, p1, s1);
3120
- INIT_PTR(lp, 1, p2, s2);
3121
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3122
- for (; i--;) {
3123
- GET_DATA_STRIDE(p1, s1, dtype, x);
3124
- GET_DATA_STRIDE(p2, s2, dtype, y);
3125
- b = (m_eq(x, y)) ? 1 : 0;
3126
- STORE_BIT(a3, p3, b);
3127
- p3 += s3;
3128
- }
3129
- }
3130
-
3131
- static VALUE sfloat_eq_self(VALUE self, VALUE other) {
3132
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3133
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3134
- ndfunc_t ndf = { iter_sfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
3135
-
3136
- return na_ndloop(&ndf, 2, self, other);
3137
- }
3138
-
3139
- static VALUE sfloat_eq(VALUE self, VALUE other) {
3140
-
3141
- VALUE klass, v;
3142
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3143
- if (klass == cT) {
3144
- return sfloat_eq_self(self, other);
3145
- } else {
3146
- v = rb_funcall(klass, id_cast, 1, self);
3147
- return rb_funcall(v, id_eq, 1, other);
3148
- }
3149
- }
3150
-
3151
- static void iter_sfloat_ne(na_loop_t* const lp) {
3152
- size_t i;
3153
- char *p1, *p2;
3154
- BIT_DIGIT* a3;
3155
- size_t p3;
3156
- ssize_t s1, s2, s3;
3157
- dtype x, y;
3158
- BIT_DIGIT b;
3159
- INIT_COUNTER(lp, i);
3160
- INIT_PTR(lp, 0, p1, s1);
3161
- INIT_PTR(lp, 1, p2, s2);
3162
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3163
- for (; i--;) {
3164
- GET_DATA_STRIDE(p1, s1, dtype, x);
3165
- GET_DATA_STRIDE(p2, s2, dtype, y);
3166
- b = (m_ne(x, y)) ? 1 : 0;
3167
- STORE_BIT(a3, p3, b);
3168
- p3 += s3;
3169
- }
3170
- }
3171
-
3172
- static VALUE sfloat_ne_self(VALUE self, VALUE other) {
3173
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3174
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3175
- ndfunc_t ndf = { iter_sfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
3176
-
3177
- return na_ndloop(&ndf, 2, self, other);
3178
- }
3179
-
3180
- static VALUE sfloat_ne(VALUE self, VALUE other) {
3181
-
3182
- VALUE klass, v;
3183
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3184
- if (klass == cT) {
3185
- return sfloat_ne_self(self, other);
3186
- } else {
3187
- v = rb_funcall(klass, id_cast, 1, self);
3188
- return rb_funcall(v, id_ne, 1, other);
3189
- }
3190
- }
3191
-
3192
- static void iter_sfloat_nearly_eq(na_loop_t* const lp) {
3193
- size_t i;
3194
- char *p1, *p2;
3195
- BIT_DIGIT* a3;
3196
- size_t p3;
3197
- ssize_t s1, s2, s3;
3198
- dtype x, y;
3199
- BIT_DIGIT b;
3200
- INIT_COUNTER(lp, i);
3201
- INIT_PTR(lp, 0, p1, s1);
3202
- INIT_PTR(lp, 1, p2, s2);
3203
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3204
- for (; i--;) {
3205
- GET_DATA_STRIDE(p1, s1, dtype, x);
3206
- GET_DATA_STRIDE(p2, s2, dtype, y);
3207
- b = (m_nearly_eq(x, y)) ? 1 : 0;
3208
- STORE_BIT(a3, p3, b);
3209
- p3 += s3;
3210
- }
3211
- }
3212
-
3213
- static VALUE sfloat_nearly_eq_self(VALUE self, VALUE other) {
3214
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3215
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3216
- ndfunc_t ndf = { iter_sfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
3217
-
3218
- return na_ndloop(&ndf, 2, self, other);
3219
- }
3220
-
3221
- static VALUE sfloat_nearly_eq(VALUE self, VALUE other) {
3222
-
3223
- VALUE klass, v;
3224
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3225
- if (klass == cT) {
3226
- return sfloat_nearly_eq_self(self, other);
3227
- } else {
3228
- v = rb_funcall(klass, id_cast, 1, self);
3229
- return rb_funcall(v, id_nearly_eq, 1, other);
3230
- }
3231
- }
3232
-
3233
- static void iter_sfloat_floor(na_loop_t* const lp) {
3234
- size_t i, n;
3235
- char *p1, *p2;
3236
- ssize_t s1, s2;
3237
- size_t *idx1, *idx2;
3238
- dtype x;
3239
-
3240
- INIT_COUNTER(lp, n);
3241
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3242
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3243
-
3244
- if (idx1) {
3245
- if (idx2) {
3246
- for (i = 0; i < n; i++) {
3247
- GET_DATA_INDEX(p1, idx1, dtype, x);
3248
- x = m_floor(x);
3249
- SET_DATA_INDEX(p2, idx2, dtype, x);
3250
- }
3251
- } else {
3252
- for (i = 0; i < n; i++) {
3253
- GET_DATA_INDEX(p1, idx1, dtype, x);
3254
- x = m_floor(x);
3255
- SET_DATA_STRIDE(p2, s2, dtype, x);
3256
- }
3257
- }
3258
- } else {
3259
- if (idx2) {
3260
- for (i = 0; i < n; i++) {
3261
- GET_DATA_STRIDE(p1, s1, dtype, x);
3262
- x = m_floor(x);
3263
- SET_DATA_INDEX(p2, idx2, dtype, x);
3264
- }
3265
- } else {
3266
- //
3267
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3268
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3269
- for (i = 0; i < n; i++) {
3270
- ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
3271
- }
3272
- return;
3273
- }
3274
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3275
- //
3276
- for (i = 0; i < n; i++) {
3277
- *(dtype*)p2 = m_floor(*(dtype*)p1);
3278
- p1 += s1;
3279
- p2 += s2;
3280
- }
3281
- return;
3282
- //
3283
- }
3284
- }
3285
- for (i = 0; i < n; i++) {
3286
- GET_DATA_STRIDE(p1, s1, dtype, x);
3287
- x = m_floor(x);
3288
- SET_DATA_STRIDE(p2, s2, dtype, x);
3289
- }
3290
- //
3291
- }
3292
- }
3293
- }
3294
-
3295
- static VALUE sfloat_floor(VALUE self) {
3296
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3297
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3298
- ndfunc_t ndf = { iter_sfloat_floor, FULL_LOOP, 1, 1, ain, aout };
3299
-
3300
- return na_ndloop(&ndf, 1, self);
3301
- }
3302
-
3303
- static void iter_sfloat_round(na_loop_t* const lp) {
3304
- size_t i, n;
3305
- char *p1, *p2;
3306
- ssize_t s1, s2;
3307
- size_t *idx1, *idx2;
3308
- dtype x;
3309
-
3310
- INIT_COUNTER(lp, n);
3311
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3312
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3313
-
3314
- if (idx1) {
3315
- if (idx2) {
3316
- for (i = 0; i < n; i++) {
3317
- GET_DATA_INDEX(p1, idx1, dtype, x);
3318
- x = m_round(x);
3319
- SET_DATA_INDEX(p2, idx2, dtype, x);
3320
- }
3321
- } else {
3322
- for (i = 0; i < n; i++) {
3323
- GET_DATA_INDEX(p1, idx1, dtype, x);
3324
- x = m_round(x);
3325
- SET_DATA_STRIDE(p2, s2, dtype, x);
3326
- }
3327
- }
3328
- } else {
3329
- if (idx2) {
3330
- for (i = 0; i < n; i++) {
3331
- GET_DATA_STRIDE(p1, s1, dtype, x);
3332
- x = m_round(x);
3333
- SET_DATA_INDEX(p2, idx2, dtype, x);
3334
- }
3335
- } else {
3336
- //
3337
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3338
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3339
- for (i = 0; i < n; i++) {
3340
- ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
3341
- }
3342
- return;
3343
- }
3344
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3345
- //
3346
- for (i = 0; i < n; i++) {
3347
- *(dtype*)p2 = m_round(*(dtype*)p1);
3348
- p1 += s1;
3349
- p2 += s2;
3350
- }
3351
- return;
3352
- //
3353
- }
3354
- }
3355
- for (i = 0; i < n; i++) {
3356
- GET_DATA_STRIDE(p1, s1, dtype, x);
3357
- x = m_round(x);
3358
- SET_DATA_STRIDE(p2, s2, dtype, x);
3359
- }
3360
- //
3361
- }
3362
- }
3363
- }
3364
-
3365
- static VALUE sfloat_round(VALUE self) {
3366
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3367
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3368
- ndfunc_t ndf = { iter_sfloat_round, FULL_LOOP, 1, 1, ain, aout };
3369
-
3370
- return na_ndloop(&ndf, 1, self);
3371
- }
3372
-
3373
- static void iter_sfloat_ceil(na_loop_t* const lp) {
3374
- size_t i, n;
3375
- char *p1, *p2;
3376
- ssize_t s1, s2;
3377
- size_t *idx1, *idx2;
3378
- dtype x;
3379
-
3380
- INIT_COUNTER(lp, n);
3381
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3382
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3383
-
3384
- if (idx1) {
3385
- if (idx2) {
3386
- for (i = 0; i < n; i++) {
3387
- GET_DATA_INDEX(p1, idx1, dtype, x);
3388
- x = m_ceil(x);
3389
- SET_DATA_INDEX(p2, idx2, dtype, x);
3390
- }
3391
- } else {
3392
- for (i = 0; i < n; i++) {
3393
- GET_DATA_INDEX(p1, idx1, dtype, x);
3394
- x = m_ceil(x);
3395
- SET_DATA_STRIDE(p2, s2, dtype, x);
3396
- }
3397
- }
3398
- } else {
3399
- if (idx2) {
3400
- for (i = 0; i < n; i++) {
3401
- GET_DATA_STRIDE(p1, s1, dtype, x);
3402
- x = m_ceil(x);
3403
- SET_DATA_INDEX(p2, idx2, dtype, x);
3404
- }
3405
- } else {
3406
- //
3407
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3408
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3409
- for (i = 0; i < n; i++) {
3410
- ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
3411
- }
3412
- return;
3413
- }
3414
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3415
- //
3416
- for (i = 0; i < n; i++) {
3417
- *(dtype*)p2 = m_ceil(*(dtype*)p1);
3418
- p1 += s1;
3419
- p2 += s2;
3420
- }
3421
- return;
3422
- //
3423
- }
3424
- }
3425
- for (i = 0; i < n; i++) {
3426
- GET_DATA_STRIDE(p1, s1, dtype, x);
3427
- x = m_ceil(x);
3428
- SET_DATA_STRIDE(p2, s2, dtype, x);
3429
- }
3430
- //
3431
- }
3432
- }
3433
- }
3434
-
3435
- static VALUE sfloat_ceil(VALUE self) {
3436
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3437
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3438
- ndfunc_t ndf = { iter_sfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
3439
-
3440
- return na_ndloop(&ndf, 1, self);
3441
- }
3442
-
3443
- static void iter_sfloat_trunc(na_loop_t* const lp) {
3444
- size_t i, n;
3445
- char *p1, *p2;
3446
- ssize_t s1, s2;
3447
- size_t *idx1, *idx2;
3448
- dtype x;
3449
-
3450
- INIT_COUNTER(lp, n);
3451
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3452
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3453
-
3454
- if (idx1) {
3455
- if (idx2) {
3456
- for (i = 0; i < n; i++) {
3457
- GET_DATA_INDEX(p1, idx1, dtype, x);
3458
- x = m_trunc(x);
3459
- SET_DATA_INDEX(p2, idx2, dtype, x);
3460
- }
3461
- } else {
3462
- for (i = 0; i < n; i++) {
3463
- GET_DATA_INDEX(p1, idx1, dtype, x);
3464
- x = m_trunc(x);
3465
- SET_DATA_STRIDE(p2, s2, dtype, x);
3466
- }
3467
- }
3468
- } else {
3469
- if (idx2) {
3470
- for (i = 0; i < n; i++) {
3471
- GET_DATA_STRIDE(p1, s1, dtype, x);
3472
- x = m_trunc(x);
3473
- SET_DATA_INDEX(p2, idx2, dtype, x);
3474
- }
3475
- } else {
3476
- //
3477
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3478
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3479
- for (i = 0; i < n; i++) {
3480
- ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
3481
- }
3482
- return;
3483
- }
3484
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3485
- //
3486
- for (i = 0; i < n; i++) {
3487
- *(dtype*)p2 = m_trunc(*(dtype*)p1);
3488
- p1 += s1;
3489
- p2 += s2;
3490
- }
3491
- return;
3492
- //
3493
- }
3494
- }
3495
- for (i = 0; i < n; i++) {
3496
- GET_DATA_STRIDE(p1, s1, dtype, x);
3497
- x = m_trunc(x);
3498
- SET_DATA_STRIDE(p2, s2, dtype, x);
3499
- }
3500
- //
3501
- }
3502
- }
3503
- }
3504
-
3505
- static VALUE sfloat_trunc(VALUE self) {
3506
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3507
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3508
- ndfunc_t ndf = { iter_sfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
3509
-
3510
- return na_ndloop(&ndf, 1, self);
3511
- }
3512
-
3513
- static void iter_sfloat_rint(na_loop_t* const lp) {
3514
- size_t i, n;
3515
- char *p1, *p2;
3516
- ssize_t s1, s2;
3517
- size_t *idx1, *idx2;
3518
- dtype x;
3519
-
3520
- INIT_COUNTER(lp, n);
3521
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3522
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3523
-
3524
- if (idx1) {
3525
- if (idx2) {
3526
- for (i = 0; i < n; i++) {
3527
- GET_DATA_INDEX(p1, idx1, dtype, x);
3528
- x = m_rint(x);
3529
- SET_DATA_INDEX(p2, idx2, dtype, x);
3530
- }
3531
- } else {
3532
- for (i = 0; i < n; i++) {
3533
- GET_DATA_INDEX(p1, idx1, dtype, x);
3534
- x = m_rint(x);
3535
- SET_DATA_STRIDE(p2, s2, dtype, x);
3536
- }
3537
- }
3538
- } else {
3539
- if (idx2) {
3540
- for (i = 0; i < n; i++) {
3541
- GET_DATA_STRIDE(p1, s1, dtype, x);
3542
- x = m_rint(x);
3543
- SET_DATA_INDEX(p2, idx2, dtype, x);
3544
- }
3545
- } else {
3546
- //
3547
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3548
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3549
- for (i = 0; i < n; i++) {
3550
- ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
3551
- }
3552
- return;
3553
- }
3554
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3555
- //
3556
- for (i = 0; i < n; i++) {
3557
- *(dtype*)p2 = m_rint(*(dtype*)p1);
3558
- p1 += s1;
3559
- p2 += s2;
3560
- }
3561
- return;
3562
- //
3563
- }
3564
- }
3565
- for (i = 0; i < n; i++) {
3566
- GET_DATA_STRIDE(p1, s1, dtype, x);
3567
- x = m_rint(x);
3568
- SET_DATA_STRIDE(p2, s2, dtype, x);
3569
- }
3570
- //
3571
- }
3572
- }
3573
- }
3574
-
3575
- static VALUE sfloat_rint(VALUE self) {
3576
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3577
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3578
- ndfunc_t ndf = { iter_sfloat_rint, FULL_LOOP, 1, 1, ain, aout };
3579
-
3580
- return na_ndloop(&ndf, 1, self);
3581
- }
3582
-
3583
- #define check_intdivzero(y) \
3584
- {}
3585
-
3586
- static void iter_sfloat_copysign(na_loop_t* const lp) {
3587
- size_t i = 0;
3588
- size_t n;
3589
- char *p1, *p2, *p3;
3590
- ssize_t s1, s2, s3;
3591
-
3592
- INIT_COUNTER(lp, n);
3593
- INIT_PTR(lp, 0, p1, s1);
3594
- INIT_PTR(lp, 1, p2, s2);
3595
- INIT_PTR(lp, 2, p3, s3);
3596
-
3597
- //
3598
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
3599
- is_aligned(p3, sizeof(dtype))) {
3600
-
3601
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
3602
- if (p1 == p3) { // inplace case
3603
- for (; i < n; i++) {
3604
- check_intdivzero(((dtype*)p2)[i]);
3605
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3606
- }
3607
- } else {
3608
- for (; i < n; i++) {
3609
- check_intdivzero(((dtype*)p2)[i]);
3610
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3611
- }
3612
- }
3613
- return;
3614
- }
3615
-
3616
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
3617
- is_aligned_step(s3, sizeof(dtype))) {
3618
- //
3619
-
3620
- if (s2 == 0) { // Broadcasting from scalar value.
3621
- check_intdivzero(*(dtype*)p2);
3622
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
3623
- if (p1 == p3) { // inplace case
3624
- for (; i < n; i++) {
3625
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3626
- }
3627
- } else {
3628
- for (; i < n; i++) {
3629
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3630
- }
3631
- }
3632
- } else {
3633
- for (i = 0; i < n; i++) {
3634
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3635
- p1 += s1;
3636
- p3 += s3;
3637
- }
3638
- }
3639
- } else {
3640
- if (p1 == p3) { // inplace case
3641
- for (i = 0; i < n; i++) {
3642
- check_intdivzero(*(dtype*)p2);
3643
- *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3644
- p1 += s1;
3645
- p2 += s2;
3646
- }
3647
- } else {
3648
- for (i = 0; i < n; i++) {
3649
- check_intdivzero(*(dtype*)p2);
3650
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3651
- p1 += s1;
3652
- p2 += s2;
3653
- p3 += s3;
3654
- }
3655
- }
3656
- }
3657
-
3658
- return;
3659
- //
3660
- }
3661
- }
3662
- for (i = 0; i < n; i++) {
3663
- dtype x, y, z;
3664
- GET_DATA_STRIDE(p1, s1, dtype, x);
3665
- GET_DATA_STRIDE(p2, s2, dtype, y);
3666
- check_intdivzero(y);
3667
- z = m_copysign(x, y);
3668
- SET_DATA_STRIDE(p3, s3, dtype, z);
3669
- }
3670
- //
3671
- }
3672
- #undef check_intdivzero
3673
-
3674
- static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
3675
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3676
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3677
- ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
3678
-
3679
- return na_ndloop(&ndf, 2, self, other);
3680
- }
3681
-
3682
- static VALUE sfloat_copysign(VALUE self, VALUE other) {
3683
-
3684
- VALUE klass, v;
3685
-
3686
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3687
- if (klass == cT) {
3688
- return sfloat_copysign_self(self, other);
3689
- } else {
3690
- v = rb_funcall(klass, id_cast, 1, self);
3691
- return rb_funcall(v, id_copysign, 1, other);
3692
- }
3693
- }
3694
-
3695
- static void iter_sfloat_signbit(na_loop_t* const lp) {
3696
- size_t i;
3697
- char* p1;
3698
- BIT_DIGIT* a2;
3699
- size_t p2;
3700
- ssize_t s1, s2;
3701
- size_t* idx1;
3702
- dtype x;
3703
- BIT_DIGIT b;
3704
- INIT_COUNTER(lp, i);
3705
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3706
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3707
- if (idx1) {
3708
- for (; i--;) {
3709
- GET_DATA_INDEX(p1, idx1, dtype, x);
3710
- b = (m_signbit(x)) ? 1 : 0;
3711
- STORE_BIT(a2, p2, b);
3712
- p2 += s2;
3713
- }
3714
- } else {
3715
- for (; i--;) {
3716
- GET_DATA_STRIDE(p1, s1, dtype, x);
3717
- b = (m_signbit(x)) ? 1 : 0;
3718
- STORE_BIT(a2, p2, b);
3719
- p2 += s2;
3720
- }
3721
- }
3722
- }
3723
-
3724
- static VALUE sfloat_signbit(VALUE self) {
3725
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3726
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3727
- ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
3728
-
3729
- return na_ndloop(&ndf, 1, self);
3730
- }
3731
-
3732
- static void iter_sfloat_modf(na_loop_t* const lp) {
3733
- size_t i;
3734
- char *p1, *p2, *p3;
3735
- ssize_t s1, s2, s3;
3736
- dtype x, y, z;
3737
- INIT_COUNTER(lp, i);
3738
- INIT_PTR(lp, 0, p1, s1);
3739
- INIT_PTR(lp, 1, p2, s2);
3740
- INIT_PTR(lp, 2, p3, s3);
3741
- for (; i--;) {
3742
- GET_DATA_STRIDE(p1, s1, dtype, x);
3743
- m_modf(x, y, z);
3744
- SET_DATA_STRIDE(p2, s2, dtype, y);
3745
- SET_DATA_STRIDE(p3, s3, dtype, z);
3746
- }
3747
- }
3748
-
3749
- static VALUE sfloat_modf(VALUE self) {
3750
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3751
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
3752
- ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
3753
-
3754
- return na_ndloop(&ndf, 1, self);
3755
- }
3756
-
3757
- static void iter_sfloat_gt(na_loop_t* const lp) {
3758
- size_t i;
3759
- char *p1, *p2;
3760
- BIT_DIGIT* a3;
3761
- size_t p3;
3762
- ssize_t s1, s2, s3;
3763
- dtype x, y;
3764
- BIT_DIGIT b;
3765
- INIT_COUNTER(lp, i);
3766
- INIT_PTR(lp, 0, p1, s1);
3767
- INIT_PTR(lp, 1, p2, s2);
3768
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3769
- for (; i--;) {
3770
- GET_DATA_STRIDE(p1, s1, dtype, x);
3771
- GET_DATA_STRIDE(p2, s2, dtype, y);
3772
- b = (m_gt(x, y)) ? 1 : 0;
3773
- STORE_BIT(a3, p3, b);
3774
- p3 += s3;
3775
- }
3776
- }
3777
-
3778
- static VALUE sfloat_gt_self(VALUE self, VALUE other) {
3779
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3780
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3781
- ndfunc_t ndf = { iter_sfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
3782
-
3783
- return na_ndloop(&ndf, 2, self, other);
3784
- }
3785
-
3786
- static VALUE sfloat_gt(VALUE self, VALUE other) {
3787
-
3788
- VALUE klass, v;
3789
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3790
- if (klass == cT) {
3791
- return sfloat_gt_self(self, other);
3792
- } else {
3793
- v = rb_funcall(klass, id_cast, 1, self);
3794
- return rb_funcall(v, id_gt, 1, other);
3795
- }
3796
- }
3797
-
3798
- static void iter_sfloat_ge(na_loop_t* const lp) {
3799
- size_t i;
3800
- char *p1, *p2;
3801
- BIT_DIGIT* a3;
3802
- size_t p3;
3803
- ssize_t s1, s2, s3;
3804
- dtype x, y;
3805
- BIT_DIGIT b;
3806
- INIT_COUNTER(lp, i);
3807
- INIT_PTR(lp, 0, p1, s1);
3808
- INIT_PTR(lp, 1, p2, s2);
3809
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3810
- for (; i--;) {
3811
- GET_DATA_STRIDE(p1, s1, dtype, x);
3812
- GET_DATA_STRIDE(p2, s2, dtype, y);
3813
- b = (m_ge(x, y)) ? 1 : 0;
3814
- STORE_BIT(a3, p3, b);
3815
- p3 += s3;
3816
- }
3817
- }
3818
-
3819
- static VALUE sfloat_ge_self(VALUE self, VALUE other) {
3820
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3821
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3822
- ndfunc_t ndf = { iter_sfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
3823
-
3824
- return na_ndloop(&ndf, 2, self, other);
3825
- }
3826
-
3827
- static VALUE sfloat_ge(VALUE self, VALUE other) {
3828
-
3829
- VALUE klass, v;
3830
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3831
- if (klass == cT) {
3832
- return sfloat_ge_self(self, other);
3833
- } else {
3834
- v = rb_funcall(klass, id_cast, 1, self);
3835
- return rb_funcall(v, id_ge, 1, other);
3836
- }
3837
- }
3838
-
3839
- static void iter_sfloat_lt(na_loop_t* const lp) {
3840
- size_t i;
3841
- char *p1, *p2;
3842
- BIT_DIGIT* a3;
3843
- size_t p3;
3844
- ssize_t s1, s2, s3;
3845
- dtype x, y;
3846
- BIT_DIGIT b;
3847
- INIT_COUNTER(lp, i);
3848
- INIT_PTR(lp, 0, p1, s1);
3849
- INIT_PTR(lp, 1, p2, s2);
3850
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3851
- for (; i--;) {
3852
- GET_DATA_STRIDE(p1, s1, dtype, x);
3853
- GET_DATA_STRIDE(p2, s2, dtype, y);
3854
- b = (m_lt(x, y)) ? 1 : 0;
3855
- STORE_BIT(a3, p3, b);
3856
- p3 += s3;
3857
- }
3858
- }
3859
-
3860
- static VALUE sfloat_lt_self(VALUE self, VALUE other) {
3861
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3862
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3863
- ndfunc_t ndf = { iter_sfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
3864
-
3865
- return na_ndloop(&ndf, 2, self, other);
3866
- }
3867
-
3868
- static VALUE sfloat_lt(VALUE self, VALUE other) {
3869
-
3870
- VALUE klass, v;
3871
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3872
- if (klass == cT) {
3873
- return sfloat_lt_self(self, other);
3874
- } else {
3875
- v = rb_funcall(klass, id_cast, 1, self);
3876
- return rb_funcall(v, id_lt, 1, other);
3877
- }
3878
- }
3879
-
3880
- static void iter_sfloat_le(na_loop_t* const lp) {
3881
- size_t i;
3882
- char *p1, *p2;
3883
- BIT_DIGIT* a3;
3884
- size_t p3;
3885
- ssize_t s1, s2, s3;
3886
- dtype x, y;
3887
- BIT_DIGIT b;
3888
- INIT_COUNTER(lp, i);
3889
- INIT_PTR(lp, 0, p1, s1);
3890
- INIT_PTR(lp, 1, p2, s2);
3891
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3892
- for (; i--;) {
3893
- GET_DATA_STRIDE(p1, s1, dtype, x);
3894
- GET_DATA_STRIDE(p2, s2, dtype, y);
3895
- b = (m_le(x, y)) ? 1 : 0;
3896
- STORE_BIT(a3, p3, b);
3897
- p3 += s3;
3898
- }
3899
- }
3900
-
3901
- static VALUE sfloat_le_self(VALUE self, VALUE other) {
3902
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3903
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3904
- ndfunc_t ndf = { iter_sfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
3905
-
3906
- return na_ndloop(&ndf, 2, self, other);
3907
- }
3908
-
3909
- static VALUE sfloat_le(VALUE self, VALUE other) {
3910
-
3911
- VALUE klass, v;
3912
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3913
- if (klass == cT) {
3914
- return sfloat_le_self(self, other);
3915
- } else {
3916
- v = rb_funcall(klass, id_cast, 1, self);
3917
- return rb_funcall(v, id_le, 1, other);
3918
- }
3919
- }
3920
-
3921
- static void iter_sfloat_clip(na_loop_t* const lp) {
3922
- size_t i;
3923
- char *p1, *p2, *p3, *p4;
3924
- ssize_t s1, s2, s3, s4;
3925
- dtype x, min, max;
3926
- INIT_COUNTER(lp, i);
3927
- INIT_PTR(lp, 0, p1, s1);
3928
- INIT_PTR(lp, 1, p2, s2);
3929
- INIT_PTR(lp, 2, p3, s3);
3930
- INIT_PTR(lp, 3, p4, s4);
3931
- for (; i--;) {
3932
- GET_DATA_STRIDE(p1, s1, dtype, x);
3933
- GET_DATA_STRIDE(p2, s2, dtype, min);
3934
- GET_DATA_STRIDE(p3, s3, dtype, max);
3935
- if (m_gt(min, max)) {
3936
- rb_raise(nary_eOperationError, "min is greater than max");
3937
- }
3938
- if (m_lt(x, min)) {
3939
- x = min;
3940
- }
3941
- if (m_gt(x, max)) {
3942
- x = max;
3943
- }
3944
- SET_DATA_STRIDE(p4, s4, dtype, x);
3945
- }
3946
- }
3947
-
3948
- static void iter_sfloat_clip_min(na_loop_t* const lp) {
3949
- size_t i;
3950
- char *p1, *p2, *p3;
3951
- ssize_t s1, s2, s3;
3952
- dtype x, min;
3953
- INIT_COUNTER(lp, i);
3954
- INIT_PTR(lp, 0, p1, s1);
3955
- INIT_PTR(lp, 1, p2, s2);
3956
- INIT_PTR(lp, 2, p3, s3);
3957
- for (; i--;) {
3958
- GET_DATA_STRIDE(p1, s1, dtype, x);
3959
- GET_DATA_STRIDE(p2, s2, dtype, min);
3960
- if (m_lt(x, min)) {
3961
- x = min;
3962
- }
3963
- SET_DATA_STRIDE(p3, s3, dtype, x);
3964
- }
3965
- }
3966
-
3967
- static void iter_sfloat_clip_max(na_loop_t* const lp) {
3968
- size_t i;
3969
- char *p1, *p2, *p3;
3970
- ssize_t s1, s2, s3;
3971
- dtype x, max;
3972
- INIT_COUNTER(lp, i);
3973
- INIT_PTR(lp, 0, p1, s1);
3974
- INIT_PTR(lp, 1, p2, s2);
3975
- INIT_PTR(lp, 2, p3, s3);
3976
- for (; i--;) {
3977
- GET_DATA_STRIDE(p1, s1, dtype, x);
3978
- GET_DATA_STRIDE(p2, s2, dtype, max);
3979
- if (m_gt(x, max)) {
3980
- x = max;
3981
- }
3982
- SET_DATA_STRIDE(p3, s3, dtype, x);
3983
- }
3984
- }
3985
-
3986
- static VALUE sfloat_clip(VALUE self, VALUE min, VALUE max) {
3987
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { cT, 0 }, { cT, 0 } };
3988
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3989
- ndfunc_t ndf_min = { iter_sfloat_clip_min, STRIDE_LOOP, 2, 1, ain, aout };
3990
- ndfunc_t ndf_max = { iter_sfloat_clip_max, STRIDE_LOOP, 2, 1, ain, aout };
3991
- ndfunc_t ndf_both = { iter_sfloat_clip, STRIDE_LOOP, 3, 1, ain, aout };
3992
-
3993
- if (RTEST(min)) {
3994
- if (RTEST(max)) {
3995
- return na_ndloop(&ndf_both, 3, self, min, max);
3996
- } else {
3997
- return na_ndloop(&ndf_min, 2, self, min);
3998
- }
3999
- } else {
4000
- if (RTEST(max)) {
4001
- return na_ndloop(&ndf_max, 2, self, max);
4002
- }
4003
- }
4004
- rb_raise(rb_eArgError, "min and max are not given");
4005
- return Qnil;
4006
- }
4007
-
4008
- static void iter_sfloat_isnan(na_loop_t* const lp) {
4009
- size_t i;
4010
- char* p1;
4011
- BIT_DIGIT* a2;
4012
- size_t p2;
4013
- ssize_t s1, s2;
4014
- size_t* idx1;
4015
- dtype x;
4016
- BIT_DIGIT b;
4017
- INIT_COUNTER(lp, i);
4018
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4019
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4020
- if (idx1) {
4021
- for (; i--;) {
4022
- GET_DATA_INDEX(p1, idx1, dtype, x);
4023
- b = (m_isnan(x)) ? 1 : 0;
4024
- STORE_BIT(a2, p2, b);
4025
- p2 += s2;
4026
- }
4027
- } else {
4028
- for (; i--;) {
4029
- GET_DATA_STRIDE(p1, s1, dtype, x);
4030
- b = (m_isnan(x)) ? 1 : 0;
4031
- STORE_BIT(a2, p2, b);
4032
- p2 += s2;
4033
- }
4034
- }
4035
- }
4036
-
4037
- static VALUE sfloat_isnan(VALUE self) {
4038
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4039
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4040
- ndfunc_t ndf = { iter_sfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
4041
-
4042
- return na_ndloop(&ndf, 1, self);
4043
- }
4044
-
4045
- static void iter_sfloat_isinf(na_loop_t* const lp) {
4046
- size_t i;
4047
- char* p1;
4048
- BIT_DIGIT* a2;
4049
- size_t p2;
4050
- ssize_t s1, s2;
4051
- size_t* idx1;
4052
- dtype x;
4053
- BIT_DIGIT b;
4054
- INIT_COUNTER(lp, i);
4055
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4056
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4057
- if (idx1) {
4058
- for (; i--;) {
4059
- GET_DATA_INDEX(p1, idx1, dtype, x);
4060
- b = (m_isinf(x)) ? 1 : 0;
4061
- STORE_BIT(a2, p2, b);
4062
- p2 += s2;
4063
- }
4064
- } else {
4065
- for (; i--;) {
4066
- GET_DATA_STRIDE(p1, s1, dtype, x);
4067
- b = (m_isinf(x)) ? 1 : 0;
4068
- STORE_BIT(a2, p2, b);
4069
- p2 += s2;
4070
- }
4071
- }
4072
- }
4073
-
4074
- static VALUE sfloat_isinf(VALUE self) {
4075
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4076
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4077
- ndfunc_t ndf = { iter_sfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
4078
-
4079
- return na_ndloop(&ndf, 1, self);
4080
- }
4081
-
4082
- static void iter_sfloat_isposinf(na_loop_t* const lp) {
4083
- size_t i;
4084
- char* p1;
4085
- BIT_DIGIT* a2;
4086
- size_t p2;
4087
- ssize_t s1, s2;
4088
- size_t* idx1;
4089
- dtype x;
4090
- BIT_DIGIT b;
4091
- INIT_COUNTER(lp, i);
4092
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4093
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4094
- if (idx1) {
4095
- for (; i--;) {
4096
- GET_DATA_INDEX(p1, idx1, dtype, x);
4097
- b = (m_isposinf(x)) ? 1 : 0;
4098
- STORE_BIT(a2, p2, b);
4099
- p2 += s2;
4100
- }
4101
- } else {
4102
- for (; i--;) {
4103
- GET_DATA_STRIDE(p1, s1, dtype, x);
4104
- b = (m_isposinf(x)) ? 1 : 0;
4105
- STORE_BIT(a2, p2, b);
4106
- p2 += s2;
4107
- }
4108
- }
4109
- }
4110
-
4111
- static VALUE sfloat_isposinf(VALUE self) {
4112
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4113
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4114
- ndfunc_t ndf = { iter_sfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
4115
-
4116
- return na_ndloop(&ndf, 1, self);
4117
- }
4118
-
4119
- static void iter_sfloat_isneginf(na_loop_t* const lp) {
4120
- size_t i;
4121
- char* p1;
4122
- BIT_DIGIT* a2;
4123
- size_t p2;
4124
- ssize_t s1, s2;
4125
- size_t* idx1;
4126
- dtype x;
4127
- BIT_DIGIT b;
4128
- INIT_COUNTER(lp, i);
4129
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4130
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4131
- if (idx1) {
4132
- for (; i--;) {
4133
- GET_DATA_INDEX(p1, idx1, dtype, x);
4134
- b = (m_isneginf(x)) ? 1 : 0;
4135
- STORE_BIT(a2, p2, b);
4136
- p2 += s2;
4137
- }
4138
- } else {
4139
- for (; i--;) {
4140
- GET_DATA_STRIDE(p1, s1, dtype, x);
4141
- b = (m_isneginf(x)) ? 1 : 0;
4142
- STORE_BIT(a2, p2, b);
4143
- p2 += s2;
4144
- }
4145
- }
4146
- }
4147
-
4148
- static VALUE sfloat_isneginf(VALUE self) {
4149
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4150
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4151
- ndfunc_t ndf = { iter_sfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
4152
-
4153
- return na_ndloop(&ndf, 1, self);
4154
- }
4155
-
4156
- static void iter_sfloat_isfinite(na_loop_t* const lp) {
4157
- size_t i;
4158
- char* p1;
4159
- BIT_DIGIT* a2;
4160
- size_t p2;
4161
- ssize_t s1, s2;
4162
- size_t* idx1;
4163
- dtype x;
4164
- BIT_DIGIT b;
4165
- INIT_COUNTER(lp, i);
4166
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4167
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4168
- if (idx1) {
4169
- for (; i--;) {
4170
- GET_DATA_INDEX(p1, idx1, dtype, x);
4171
- b = (m_isfinite(x)) ? 1 : 0;
4172
- STORE_BIT(a2, p2, b);
4173
- p2 += s2;
4174
- }
4175
- } else {
4176
- for (; i--;) {
4177
- GET_DATA_STRIDE(p1, s1, dtype, x);
4178
- b = (m_isfinite(x)) ? 1 : 0;
4179
- STORE_BIT(a2, p2, b);
4180
- p2 += s2;
4181
- }
4182
- }
4183
- }
4184
-
4185
- static VALUE sfloat_isfinite(VALUE self) {
4186
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4187
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4188
- ndfunc_t ndf = { iter_sfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
4189
-
4190
- return na_ndloop(&ndf, 1, self);
4191
- }
4192
-
4193
- static void iter_sfloat_sum(na_loop_t* const lp) {
4194
- size_t n;
4195
- char *p1, *p2;
4196
- ssize_t s1;
4197
-
4198
- INIT_COUNTER(lp, n);
4199
- INIT_PTR(lp, 0, p1, s1);
4200
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4201
-
4202
- *(dtype*)p2 = f_sum(n, p1, s1);
4203
- }
4204
- static void iter_sfloat_sum_nan(na_loop_t* const lp) {
4205
- size_t n;
4206
- char *p1, *p2;
4207
- ssize_t s1;
4208
-
4209
- INIT_COUNTER(lp, n);
4210
- INIT_PTR(lp, 0, p1, s1);
4211
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4212
-
4213
- *(dtype*)p2 = f_sum_nan(n, p1, s1);
4214
- }
4215
-
4216
- static VALUE sfloat_sum(int argc, VALUE* argv, VALUE self) {
4217
- VALUE v, reduce;
4218
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4219
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4220
- ndfunc_t ndf = { iter_sfloat_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4221
-
4222
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_sum_nan);
4223
-
4224
- v = na_ndloop(&ndf, 2, self, reduce);
4225
-
4226
- return sfloat_extract(v);
4227
- }
4228
-
4229
- static void iter_sfloat_prod(na_loop_t* const lp) {
4230
- size_t n;
4231
- char *p1, *p2;
4232
- ssize_t s1;
4233
-
4234
- INIT_COUNTER(lp, n);
4235
- INIT_PTR(lp, 0, p1, s1);
4236
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4237
-
4238
- *(dtype*)p2 = f_prod(n, p1, s1);
4239
- }
4240
- static void iter_sfloat_prod_nan(na_loop_t* const lp) {
4241
- size_t n;
4242
- char *p1, *p2;
4243
- ssize_t s1;
4244
-
4245
- INIT_COUNTER(lp, n);
4246
- INIT_PTR(lp, 0, p1, s1);
4247
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4248
-
4249
- *(dtype*)p2 = f_prod_nan(n, p1, s1);
4250
- }
4251
-
4252
- static VALUE sfloat_prod(int argc, VALUE* argv, VALUE self) {
4253
- VALUE v, reduce;
4254
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4255
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4256
- ndfunc_t ndf = { iter_sfloat_prod, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4257
-
4258
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_prod_nan);
4259
-
4260
- v = na_ndloop(&ndf, 2, self, reduce);
4261
-
4262
- return sfloat_extract(v);
4263
- }
4264
-
4265
- static void iter_sfloat_min(na_loop_t* const lp) {
4266
- size_t n;
4267
- char *p1, *p2;
4268
- ssize_t s1;
4269
-
4270
- INIT_COUNTER(lp, n);
4271
- INIT_PTR(lp, 0, p1, s1);
4272
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4273
-
4274
- *(dtype*)p2 = f_min(n, p1, s1);
4275
- }
4276
- static void iter_sfloat_min_nan(na_loop_t* const lp) {
4277
- size_t n;
4278
- char *p1, *p2;
4279
- ssize_t s1;
4280
-
4281
- INIT_COUNTER(lp, n);
4282
- INIT_PTR(lp, 0, p1, s1);
4283
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4284
-
4285
- *(dtype*)p2 = f_min_nan(n, p1, s1);
4286
- }
4287
-
4288
- static VALUE sfloat_min(int argc, VALUE* argv, VALUE self) {
4289
- VALUE v, reduce;
4290
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4291
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4292
- ndfunc_t ndf = { iter_sfloat_min, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4293
-
4294
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_min_nan);
4295
-
4296
- v = na_ndloop(&ndf, 2, self, reduce);
4297
-
4298
- return sfloat_extract(v);
4299
- }
4300
-
4301
- static void iter_sfloat_max(na_loop_t* const lp) {
4302
- size_t n;
4303
- char *p1, *p2;
4304
- ssize_t s1;
4305
-
4306
- INIT_COUNTER(lp, n);
4307
- INIT_PTR(lp, 0, p1, s1);
4308
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4309
-
4310
- *(dtype*)p2 = f_max(n, p1, s1);
4311
- }
4312
- static void iter_sfloat_max_nan(na_loop_t* const lp) {
4313
- size_t n;
4314
- char *p1, *p2;
4315
- ssize_t s1;
4316
-
4317
- INIT_COUNTER(lp, n);
4318
- INIT_PTR(lp, 0, p1, s1);
4319
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4320
-
4321
- *(dtype*)p2 = f_max_nan(n, p1, s1);
4322
- }
4323
-
4324
- static VALUE sfloat_max(int argc, VALUE* argv, VALUE self) {
4325
- VALUE v, reduce;
4326
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4327
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4328
- ndfunc_t ndf = { iter_sfloat_max, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4329
-
4330
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_max_nan);
4331
-
4332
- v = na_ndloop(&ndf, 2, self, reduce);
4333
-
4334
- return sfloat_extract(v);
4335
- }
4336
-
4337
- static void iter_sfloat_ptp(na_loop_t* const lp) {
4338
- size_t n;
4339
- char *p1, *p2;
4340
- ssize_t s1;
4341
-
4342
- INIT_COUNTER(lp, n);
4343
- INIT_PTR(lp, 0, p1, s1);
4344
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4345
-
4346
- *(dtype*)p2 = f_ptp(n, p1, s1);
4347
- }
4348
- static void iter_sfloat_ptp_nan(na_loop_t* const lp) {
4349
- size_t n;
4350
- char *p1, *p2;
4351
- ssize_t s1;
4352
-
4353
- INIT_COUNTER(lp, n);
4354
- INIT_PTR(lp, 0, p1, s1);
4355
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4356
-
4357
- *(dtype*)p2 = f_ptp_nan(n, p1, s1);
4358
- }
4359
-
4360
- static VALUE sfloat_ptp(int argc, VALUE* argv, VALUE self) {
4361
- VALUE v, reduce;
4362
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4363
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4364
- ndfunc_t ndf = { iter_sfloat_ptp, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4365
-
4366
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_ptp_nan);
4367
-
4368
- v = na_ndloop(&ndf, 2, self, reduce);
4369
-
4370
- return sfloat_extract(v);
4371
- }
4372
-
4373
- #define idx_t int64_t
4374
- static void iter_sfloat_max_index_index64(na_loop_t* const lp) {
4375
- size_t n, idx;
4376
- char *d_ptr, *i_ptr, *o_ptr;
4377
- ssize_t d_step, i_step;
4378
-
4379
- INIT_COUNTER(lp, n);
4380
- INIT_PTR(lp, 0, d_ptr, d_step);
4381
-
4382
- idx = f_max_index(n, d_ptr, d_step);
4383
-
4384
- INIT_PTR(lp, 1, i_ptr, i_step);
4385
- o_ptr = NDL_PTR(lp, 2);
4386
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4387
- }
4388
- #undef idx_t
4389
-
4390
- #define idx_t int32_t
4391
- static void iter_sfloat_max_index_index32(na_loop_t* const lp) {
4392
- size_t n, idx;
4393
- char *d_ptr, *i_ptr, *o_ptr;
4394
- ssize_t d_step, i_step;
4395
-
4396
- INIT_COUNTER(lp, n);
4397
- INIT_PTR(lp, 0, d_ptr, d_step);
4398
-
4399
- idx = f_max_index(n, d_ptr, d_step);
4400
-
4401
- INIT_PTR(lp, 1, i_ptr, i_step);
4402
- o_ptr = NDL_PTR(lp, 2);
4403
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4404
- }
4405
- #undef idx_t
4406
-
4407
- #define idx_t int64_t
4408
- static void iter_sfloat_max_index_index64_nan(na_loop_t* const lp) {
4409
- size_t n, idx;
4410
- char *d_ptr, *i_ptr, *o_ptr;
4411
- ssize_t d_step, i_step;
4412
-
4413
- INIT_COUNTER(lp, n);
4414
- INIT_PTR(lp, 0, d_ptr, d_step);
4415
-
4416
- idx = f_max_index_nan(n, d_ptr, d_step);
4417
-
4418
- INIT_PTR(lp, 1, i_ptr, i_step);
4419
- o_ptr = NDL_PTR(lp, 2);
4420
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4421
- }
4422
- #undef idx_t
4423
-
4424
- #define idx_t int32_t
4425
- static void iter_sfloat_max_index_index32_nan(na_loop_t* const lp) {
4426
- size_t n, idx;
4427
- char *d_ptr, *i_ptr, *o_ptr;
4428
- ssize_t d_step, i_step;
4429
-
4430
- INIT_COUNTER(lp, n);
4431
- INIT_PTR(lp, 0, d_ptr, d_step);
4432
-
4433
- idx = f_max_index_nan(n, d_ptr, d_step);
4434
-
4435
- INIT_PTR(lp, 1, i_ptr, i_step);
4436
- o_ptr = NDL_PTR(lp, 2);
4437
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4438
- }
4439
- #undef idx_t
4440
-
4441
- static VALUE sfloat_max_index(int argc, VALUE* argv, VALUE self) {
4442
- narray_t* na;
4443
- VALUE idx, reduce;
4444
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { Qnil, 0 }, { sym_reduce, 0 } };
4445
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4446
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 3, 1, ain, aout };
4447
-
4448
- GetNArray(self, na);
4449
- if (na->ndim == 0) {
4450
- return INT2FIX(0);
4451
- }
4452
- if (na->size > (~(u_int32_t)0)) {
4453
- aout[0].type = numo_cInt64;
4454
- idx = nary_new(numo_cInt64, na->ndim, na->shape);
4455
- ndf.func = iter_sfloat_max_index_index64;
4456
-
4457
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_max_index_index64_nan);
4458
-
4459
- } else {
4460
- aout[0].type = numo_cInt32;
4461
- idx = nary_new(numo_cInt32, na->ndim, na->shape);
4462
- ndf.func = iter_sfloat_max_index_index32;
4463
-
4464
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_max_index_index32_nan);
4465
- }
4466
- rb_funcall(idx, rb_intern("seq"), 0);
4467
-
4468
- return na_ndloop(&ndf, 3, self, idx, reduce);
4469
- }
4470
-
4471
- #define idx_t int64_t
4472
- static void iter_sfloat_min_index_index64(na_loop_t* const lp) {
4473
- size_t n, idx;
4474
- char *d_ptr, *i_ptr, *o_ptr;
4475
- ssize_t d_step, i_step;
4476
-
4477
- INIT_COUNTER(lp, n);
4478
- INIT_PTR(lp, 0, d_ptr, d_step);
4479
-
4480
- idx = f_min_index(n, d_ptr, d_step);
4481
-
4482
- INIT_PTR(lp, 1, i_ptr, i_step);
4483
- o_ptr = NDL_PTR(lp, 2);
4484
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4485
- }
4486
- #undef idx_t
4487
-
4488
- #define idx_t int32_t
4489
- static void iter_sfloat_min_index_index32(na_loop_t* const lp) {
4490
- size_t n, idx;
4491
- char *d_ptr, *i_ptr, *o_ptr;
4492
- ssize_t d_step, i_step;
4493
-
4494
- INIT_COUNTER(lp, n);
4495
- INIT_PTR(lp, 0, d_ptr, d_step);
4496
-
4497
- idx = f_min_index(n, d_ptr, d_step);
4498
-
4499
- INIT_PTR(lp, 1, i_ptr, i_step);
4500
- o_ptr = NDL_PTR(lp, 2);
4501
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4502
- }
4503
- #undef idx_t
4504
-
4505
- #define idx_t int64_t
4506
- static void iter_sfloat_min_index_index64_nan(na_loop_t* const lp) {
4507
- size_t n, idx;
4508
- char *d_ptr, *i_ptr, *o_ptr;
4509
- ssize_t d_step, i_step;
4510
-
4511
- INIT_COUNTER(lp, n);
4512
- INIT_PTR(lp, 0, d_ptr, d_step);
4513
-
4514
- idx = f_min_index_nan(n, d_ptr, d_step);
4515
-
4516
- INIT_PTR(lp, 1, i_ptr, i_step);
4517
- o_ptr = NDL_PTR(lp, 2);
4518
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4519
- }
4520
- #undef idx_t
4521
-
4522
- #define idx_t int32_t
4523
- static void iter_sfloat_min_index_index32_nan(na_loop_t* const lp) {
4524
- size_t n, idx;
4525
- char *d_ptr, *i_ptr, *o_ptr;
4526
- ssize_t d_step, i_step;
4527
-
4528
- INIT_COUNTER(lp, n);
4529
- INIT_PTR(lp, 0, d_ptr, d_step);
4530
-
4531
- idx = f_min_index_nan(n, d_ptr, d_step);
4532
-
4533
- INIT_PTR(lp, 1, i_ptr, i_step);
4534
- o_ptr = NDL_PTR(lp, 2);
4535
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4536
- }
4537
- #undef idx_t
4538
-
4539
- static VALUE sfloat_min_index(int argc, VALUE* argv, VALUE self) {
4540
- narray_t* na;
4541
- VALUE idx, reduce;
4542
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { Qnil, 0 }, { sym_reduce, 0 } };
4543
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4544
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 3, 1, ain, aout };
4545
-
4546
- GetNArray(self, na);
4547
- if (na->ndim == 0) {
4548
- return INT2FIX(0);
4549
- }
4550
- if (na->size > (~(u_int32_t)0)) {
4551
- aout[0].type = numo_cInt64;
4552
- idx = nary_new(numo_cInt64, na->ndim, na->shape);
4553
- ndf.func = iter_sfloat_min_index_index64;
4554
-
4555
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_min_index_index64_nan);
4556
-
4557
- } else {
4558
- aout[0].type = numo_cInt32;
4559
- idx = nary_new(numo_cInt32, na->ndim, na->shape);
4560
- ndf.func = iter_sfloat_min_index_index32;
4561
-
4562
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_min_index_index32_nan);
4563
- }
4564
- rb_funcall(idx, rb_intern("seq"), 0);
4565
-
4566
- return na_ndloop(&ndf, 3, self, idx, reduce);
4567
- }
4568
-
4569
- #define idx_t int64_t
4570
- static void iter_sfloat_argmax_arg64(na_loop_t* const lp) {
4571
- size_t n, idx;
4572
- char *d_ptr, *o_ptr;
4573
- ssize_t d_step;
4574
-
4575
- INIT_COUNTER(lp, n);
4576
- INIT_PTR(lp, 0, d_ptr, d_step);
4577
-
4578
- idx = f_max_index(n, d_ptr, d_step);
4579
-
4580
- o_ptr = NDL_PTR(lp, 1);
4581
- *(idx_t*)o_ptr = (idx_t)idx;
4582
- }
4583
- #undef idx_t
4584
-
4585
- #define idx_t int32_t
4586
- static void iter_sfloat_argmax_arg32(na_loop_t* const lp) {
4587
- size_t n, idx;
4588
- char *d_ptr, *o_ptr;
4589
- ssize_t d_step;
4590
-
4591
- INIT_COUNTER(lp, n);
4592
- INIT_PTR(lp, 0, d_ptr, d_step);
4593
-
4594
- idx = f_max_index(n, d_ptr, d_step);
4595
-
4596
- o_ptr = NDL_PTR(lp, 1);
4597
- *(idx_t*)o_ptr = (idx_t)idx;
4598
- }
4599
- #undef idx_t
4600
-
4601
- #define idx_t int64_t
4602
- static void iter_sfloat_argmax_arg64_nan(na_loop_t* const lp) {
4603
- size_t n, idx;
4604
- char *d_ptr, *o_ptr;
4605
- ssize_t d_step;
4606
-
4607
- INIT_COUNTER(lp, n);
4608
- INIT_PTR(lp, 0, d_ptr, d_step);
4609
-
4610
- idx = f_max_index_nan(n, d_ptr, d_step);
4611
-
4612
- o_ptr = NDL_PTR(lp, 1);
4613
- *(idx_t*)o_ptr = (idx_t)idx;
4614
- }
4615
- #undef idx_t
4616
-
4617
- #define idx_t int32_t
4618
- static void iter_sfloat_argmax_arg32_nan(na_loop_t* const lp) {
4619
- size_t n, idx;
4620
- char *d_ptr, *o_ptr;
4621
- ssize_t d_step;
4622
-
4623
- INIT_COUNTER(lp, n);
4624
- INIT_PTR(lp, 0, d_ptr, d_step);
4625
-
4626
- idx = f_max_index_nan(n, d_ptr, d_step);
4627
-
4628
- o_ptr = NDL_PTR(lp, 1);
4629
- *(idx_t*)o_ptr = (idx_t)idx;
4630
- }
4631
- #undef idx_t
4632
-
4633
- static VALUE sfloat_argmax(int argc, VALUE* argv, VALUE self) {
4634
- narray_t* na;
4635
- VALUE reduce;
4636
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_reduce, 0 } };
4637
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4638
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 1, ain, aout };
4639
-
4640
- GetNArray(self, na);
4641
- if (na->ndim == 0) {
4642
- return INT2FIX(0);
4643
- }
4644
- if (na->size > (~(u_int32_t)0)) {
4645
- aout[0].type = numo_cInt64;
4646
- ndf.func = iter_sfloat_argmax_arg64;
4647
-
4648
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_argmax_arg64_nan);
4649
-
4650
- } else {
4651
- aout[0].type = numo_cInt32;
4652
- ndf.func = iter_sfloat_argmax_arg32;
4653
-
4654
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_argmax_arg32_nan);
4655
- }
4656
-
4657
- return na_ndloop(&ndf, 2, self, reduce);
4658
- }
4659
-
4660
- #define idx_t int64_t
4661
- static void iter_sfloat_argmin_arg64(na_loop_t* const lp) {
4662
- size_t n, idx;
4663
- char *d_ptr, *o_ptr;
4664
- ssize_t d_step;
4665
-
4666
- INIT_COUNTER(lp, n);
4667
- INIT_PTR(lp, 0, d_ptr, d_step);
4668
-
4669
- idx = f_min_index(n, d_ptr, d_step);
4670
-
4671
- o_ptr = NDL_PTR(lp, 1);
4672
- *(idx_t*)o_ptr = (idx_t)idx;
4673
- }
4674
- #undef idx_t
4675
-
4676
- #define idx_t int32_t
4677
- static void iter_sfloat_argmin_arg32(na_loop_t* const lp) {
4678
- size_t n, idx;
4679
- char *d_ptr, *o_ptr;
4680
- ssize_t d_step;
4681
-
4682
- INIT_COUNTER(lp, n);
4683
- INIT_PTR(lp, 0, d_ptr, d_step);
4684
-
4685
- idx = f_min_index(n, d_ptr, d_step);
4686
-
4687
- o_ptr = NDL_PTR(lp, 1);
4688
- *(idx_t*)o_ptr = (idx_t)idx;
4689
- }
4690
- #undef idx_t
4691
-
4692
- #define idx_t int64_t
4693
- static void iter_sfloat_argmin_arg64_nan(na_loop_t* const lp) {
4694
- size_t n, idx;
4695
- char *d_ptr, *o_ptr;
4696
- ssize_t d_step;
4697
-
4698
- INIT_COUNTER(lp, n);
4699
- INIT_PTR(lp, 0, d_ptr, d_step);
4700
-
4701
- idx = f_min_index_nan(n, d_ptr, d_step);
4702
-
4703
- o_ptr = NDL_PTR(lp, 1);
4704
- *(idx_t*)o_ptr = (idx_t)idx;
4705
- }
4706
- #undef idx_t
4707
-
4708
- #define idx_t int32_t
4709
- static void iter_sfloat_argmin_arg32_nan(na_loop_t* const lp) {
4710
- size_t n, idx;
4711
- char *d_ptr, *o_ptr;
4712
- ssize_t d_step;
4713
-
4714
- INIT_COUNTER(lp, n);
4715
- INIT_PTR(lp, 0, d_ptr, d_step);
4716
-
4717
- idx = f_min_index_nan(n, d_ptr, d_step);
4718
-
4719
- o_ptr = NDL_PTR(lp, 1);
4720
- *(idx_t*)o_ptr = (idx_t)idx;
4721
- }
4722
- #undef idx_t
4723
-
4724
- static VALUE sfloat_argmin(int argc, VALUE* argv, VALUE self) {
4725
- narray_t* na;
4726
- VALUE reduce;
4727
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_reduce, 0 } };
4728
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4729
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 1, ain, aout };
4730
-
4731
- GetNArray(self, na);
4732
- if (na->ndim == 0) {
4733
- return INT2FIX(0);
4734
- }
4735
- if (na->size > (~(u_int32_t)0)) {
4736
- aout[0].type = numo_cInt64;
4737
- ndf.func = iter_sfloat_argmin_arg64;
4738
-
4739
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_argmin_arg64_nan);
4740
-
4741
- } else {
4742
- aout[0].type = numo_cInt32;
4743
- ndf.func = iter_sfloat_argmin_arg32;
4744
-
4745
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_argmin_arg32_nan);
4746
- }
4747
-
4748
- return na_ndloop(&ndf, 2, self, reduce);
4749
- }
4750
-
4751
- static void iter_sfloat_minmax(na_loop_t* const lp) {
4752
- size_t n;
4753
- char* p1;
4754
- ssize_t s1;
4755
- dtype xmin, xmax;
4756
-
4757
- INIT_COUNTER(lp, n);
4758
- INIT_PTR(lp, 0, p1, s1);
4759
-
4760
- f_minmax(n, p1, s1, &xmin, &xmax);
4761
-
4762
- *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
4763
- *(dtype*)(lp->args[2].ptr + lp->args[2].iter[0].pos) = xmax;
4764
- }
4765
- static void iter_sfloat_minmax_nan(na_loop_t* const lp) {
4766
- size_t n;
4767
- char* p1;
4768
- ssize_t s1;
4769
- dtype xmin, xmax;
4770
-
4771
- INIT_COUNTER(lp, n);
4772
- INIT_PTR(lp, 0, p1, s1);
4773
-
4774
- f_minmax_nan(n, p1, s1, &xmin, &xmax);
4775
-
4776
- *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
4777
- *(dtype*)(lp->args[2].ptr + lp->args[2].iter[0].pos) = xmax;
4778
- }
4779
-
4780
- static VALUE sfloat_minmax(int argc, VALUE* argv, VALUE self) {
4781
- VALUE reduce;
4782
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4783
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
4784
- ndfunc_t ndf = {
4785
- iter_sfloat_minmax, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 2, ain, aout
4786
- };
4787
-
4788
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_minmax_nan);
4789
-
4790
- return na_ndloop(&ndf, 2, self, reduce);
4791
- }
4792
-
4793
- static void iter_sfloat_s_maximum(na_loop_t* const lp) {
4794
- size_t i, n;
4795
- char *p1, *p2, *p3;
4796
- ssize_t s1, s2, s3;
4797
-
4798
- INIT_COUNTER(lp, n);
4799
- INIT_PTR(lp, 0, p1, s1);
4800
- INIT_PTR(lp, 1, p2, s2);
4801
- INIT_PTR(lp, 2, p3, s3);
4802
-
4803
- for (i = 0; i < n; i++) {
4804
- dtype x, y, z;
4805
- GET_DATA_STRIDE(p1, s1, dtype, x);
4806
- GET_DATA_STRIDE(p2, s2, dtype, y);
4807
- GET_DATA(p3, dtype, z);
4808
- z = f_maximum(x, y);
4809
- SET_DATA_STRIDE(p3, s3, dtype, z);
4810
- }
4811
- }
4812
- static void iter_sfloat_s_maximum_nan(na_loop_t* const lp) {
4813
- size_t i, n;
4814
- char *p1, *p2, *p3;
4815
- ssize_t s1, s2, s3;
4816
-
4817
- INIT_COUNTER(lp, n);
4818
- INIT_PTR(lp, 0, p1, s1);
4819
- INIT_PTR(lp, 1, p2, s2);
4820
- INIT_PTR(lp, 2, p3, s3);
4821
-
4822
- for (i = 0; i < n; i++) {
4823
- dtype x, y, z;
4824
- GET_DATA_STRIDE(p1, s1, dtype, x);
4825
- GET_DATA_STRIDE(p2, s2, dtype, y);
4826
- GET_DATA(p3, dtype, z);
4827
- z = f_maximum_nan(x, y);
4828
- SET_DATA_STRIDE(p3, s3, dtype, z);
4829
- }
4830
- }
4831
-
4832
- static VALUE sfloat_s_maximum(int argc, VALUE* argv, VALUE mod) {
4833
- VALUE a1 = Qnil;
4834
- VALUE a2 = Qnil;
4835
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
4836
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4837
- ndfunc_t ndf = { iter_sfloat_s_maximum, STRIDE_LOOP_NIP, 2, 1, ain, aout };
4838
-
4839
- VALUE kw_hash = Qnil;
4840
- ID kw_table[1] = { id_nan };
4841
- VALUE opts[1] = { Qundef };
4842
-
4843
- rb_scan_args(argc, argv, "20:", &a1, &a2, &kw_hash);
4844
- rb_get_kwargs(kw_hash, kw_table, 0, 1, opts);
4845
- if (opts[0] != Qundef) {
4846
- ndf.func = iter_sfloat_s_maximum_nan;
4847
- }
4848
-
4849
- return na_ndloop(&ndf, 2, a1, a2);
4850
- }
4851
-
4852
- static void iter_sfloat_s_minimum(na_loop_t* const lp) {
4853
- size_t i, n;
4854
- char *p1, *p2, *p3;
4855
- ssize_t s1, s2, s3;
4856
-
4857
- INIT_COUNTER(lp, n);
4858
- INIT_PTR(lp, 0, p1, s1);
4859
- INIT_PTR(lp, 1, p2, s2);
4860
- INIT_PTR(lp, 2, p3, s3);
4861
-
4862
- for (i = 0; i < n; i++) {
4863
- dtype x, y, z;
4864
- GET_DATA_STRIDE(p1, s1, dtype, x);
4865
- GET_DATA_STRIDE(p2, s2, dtype, y);
4866
- GET_DATA(p3, dtype, z);
4867
- z = f_minimum(x, y);
4868
- SET_DATA_STRIDE(p3, s3, dtype, z);
4869
- }
4870
- }
4871
- static void iter_sfloat_s_minimum_nan(na_loop_t* const lp) {
4872
- size_t i, n;
4873
- char *p1, *p2, *p3;
4874
- ssize_t s1, s2, s3;
4875
-
4876
- INIT_COUNTER(lp, n);
4877
- INIT_PTR(lp, 0, p1, s1);
4878
- INIT_PTR(lp, 1, p2, s2);
4879
- INIT_PTR(lp, 2, p3, s3);
4880
-
4881
- for (i = 0; i < n; i++) {
4882
- dtype x, y, z;
4883
- GET_DATA_STRIDE(p1, s1, dtype, x);
4884
- GET_DATA_STRIDE(p2, s2, dtype, y);
4885
- GET_DATA(p3, dtype, z);
4886
- z = f_minimum_nan(x, y);
4887
- SET_DATA_STRIDE(p3, s3, dtype, z);
4888
- }
4889
- }
4890
-
4891
- static VALUE sfloat_s_minimum(int argc, VALUE* argv, VALUE mod) {
4892
- VALUE a1 = Qnil;
4893
- VALUE a2 = Qnil;
4894
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
4895
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4896
- ndfunc_t ndf = { iter_sfloat_s_minimum, STRIDE_LOOP_NIP, 2, 1, ain, aout };
4897
-
4898
- VALUE kw_hash = Qnil;
4899
- ID kw_table[1] = { id_nan };
4900
- VALUE opts[1] = { Qundef };
4901
-
4902
- rb_scan_args(argc, argv, "20:", &a1, &a2, &kw_hash);
4903
- rb_get_kwargs(kw_hash, kw_table, 0, 1, opts);
4904
- if (opts[0] != Qundef) {
4905
- ndf.func = iter_sfloat_s_minimum_nan;
4906
- }
4907
-
4908
- return na_ndloop(&ndf, 2, a1, a2);
4909
- }
4910
-
4911
- static void iter_sfloat_cumsum(na_loop_t* const lp) {
4912
- size_t i;
4913
- char *p1, *p2;
4914
- ssize_t s1, s2;
4915
- dtype x, y;
4916
-
4917
- INIT_COUNTER(lp, i);
4918
- INIT_PTR(lp, 0, p1, s1);
4919
- INIT_PTR(lp, 1, p2, s2);
4920
-
4921
- GET_DATA_STRIDE(p1, s1, dtype, x);
4922
- SET_DATA_STRIDE(p2, s2, dtype, x);
4923
- for (i--; i--;) {
4924
- GET_DATA_STRIDE(p1, s1, dtype, y);
4925
- m_cumsum(x, y);
4926
- SET_DATA_STRIDE(p2, s2, dtype, x);
4927
- }
4928
- }
4929
- static void iter_sfloat_cumsum_nan(na_loop_t* const lp) {
4930
- size_t i;
4931
- char *p1, *p2;
4932
- ssize_t s1, s2;
4933
- dtype x, y;
4934
-
4935
- INIT_COUNTER(lp, i);
4936
- INIT_PTR(lp, 0, p1, s1);
4937
- INIT_PTR(lp, 1, p2, s2);
4938
-
4939
- GET_DATA_STRIDE(p1, s1, dtype, x);
4940
- SET_DATA_STRIDE(p2, s2, dtype, x);
4941
- for (i--; i--;) {
4942
- GET_DATA_STRIDE(p1, s1, dtype, y);
4943
- m_cumsum_nan(x, y);
4944
- SET_DATA_STRIDE(p2, s2, dtype, x);
4945
- }
4946
- }
4947
-
4948
- static VALUE sfloat_cumsum(int argc, VALUE* argv, VALUE self) {
4949
- VALUE reduce;
4950
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4951
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4952
- ndfunc_t ndf = {
4953
- iter_sfloat_cumsum, STRIDE_LOOP | NDF_FLAT_REDUCE | NDF_CUM, 2, 1, ain, aout
4954
- };
4955
-
4956
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_cumsum_nan);
4957
-
4958
- return na_ndloop(&ndf, 2, self, reduce);
4959
- }
4960
-
4961
- static void iter_sfloat_cumprod(na_loop_t* const lp) {
4962
- size_t i;
4963
- char *p1, *p2;
4964
- ssize_t s1, s2;
4965
- dtype x, y;
4966
-
4967
- INIT_COUNTER(lp, i);
4968
- INIT_PTR(lp, 0, p1, s1);
4969
- INIT_PTR(lp, 1, p2, s2);
4970
-
4971
- GET_DATA_STRIDE(p1, s1, dtype, x);
4972
- SET_DATA_STRIDE(p2, s2, dtype, x);
4973
- for (i--; i--;) {
4974
- GET_DATA_STRIDE(p1, s1, dtype, y);
4975
- m_cumprod(x, y);
4976
- SET_DATA_STRIDE(p2, s2, dtype, x);
4977
- }
4978
- }
4979
- static void iter_sfloat_cumprod_nan(na_loop_t* const lp) {
4980
- size_t i;
4981
- char *p1, *p2;
4982
- ssize_t s1, s2;
4983
- dtype x, y;
4984
-
4985
- INIT_COUNTER(lp, i);
4986
- INIT_PTR(lp, 0, p1, s1);
4987
- INIT_PTR(lp, 1, p2, s2);
4988
-
4989
- GET_DATA_STRIDE(p1, s1, dtype, x);
4990
- SET_DATA_STRIDE(p2, s2, dtype, x);
4991
- for (i--; i--;) {
4992
- GET_DATA_STRIDE(p1, s1, dtype, y);
4993
- m_cumprod_nan(x, y);
4994
- SET_DATA_STRIDE(p2, s2, dtype, x);
1943
+ p2 += s2;
1944
+ }
1945
+ return;
1946
+ //
1947
+ }
1948
+ }
1949
+ for (i = 0; i < n; i++) {
1950
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1951
+ x = m_square(x);
1952
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1953
+ }
1954
+ //
1955
+ }
4995
1956
  }
4996
1957
  }
4997
1958
 
4998
- static VALUE sfloat_cumprod(int argc, VALUE* argv, VALUE self) {
4999
- VALUE reduce;
5000
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
1959
+ static VALUE sfloat_square(VALUE self) {
1960
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
5001
1961
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
5002
- ndfunc_t ndf = {
5003
- iter_sfloat_cumprod, STRIDE_LOOP | NDF_FLAT_REDUCE | NDF_CUM, 2, 1, ain, aout
5004
- };
5005
-
5006
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_sfloat_cumprod_nan);
1962
+ ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
5007
1963
 
5008
- return na_ndloop(&ndf, 2, self, reduce);
1964
+ return na_ndloop(&ndf, 1, self);
5009
1965
  }
5010
1966
 
5011
- //
5012
- static void iter_sfloat_mulsum(na_loop_t* const lp) {
5013
- size_t i, n;
5014
- char *p1, *p2, *p3;
5015
- ssize_t s1, s2, s3;
5016
-
5017
- INIT_COUNTER(lp, n);
5018
- INIT_PTR(lp, 0, p1, s1);
5019
- INIT_PTR(lp, 1, p2, s2);
5020
- INIT_PTR(lp, 2, p3, s3);
1967
+ #define check_intdivzero(y) \
1968
+ {}
5021
1969
 
5022
- if (s3 == 0) {
5023
- dtype z;
5024
- // Reduce loop
5025
- GET_DATA(p3, dtype, z);
5026
- for (i = 0; i < n; i++) {
5027
- dtype x, y;
5028
- GET_DATA_STRIDE(p1, s1, dtype, x);
5029
- GET_DATA_STRIDE(p2, s2, dtype, y);
5030
- m_mulsum(x, y, z);
5031
- }
5032
- SET_DATA(p3, dtype, z);
5033
- return;
5034
- } else {
5035
- for (i = 0; i < n; i++) {
5036
- dtype x, y, z;
5037
- GET_DATA_STRIDE(p1, s1, dtype, x);
5038
- GET_DATA_STRIDE(p2, s2, dtype, y);
5039
- GET_DATA(p3, dtype, z);
5040
- m_mulsum(x, y, z);
5041
- SET_DATA_STRIDE(p3, s3, dtype, z);
5042
- }
5043
- }
5044
- }
5045
- //
5046
- static void iter_sfloat_mulsum_nan(na_loop_t* const lp) {
5047
- size_t i, n;
1970
+ static void iter_sfloat_copysign(na_loop_t* const lp) {
1971
+ size_t i = 0;
1972
+ size_t n;
5048
1973
  char *p1, *p2, *p3;
5049
1974
  ssize_t s1, s2, s3;
5050
1975
 
@@ -5053,342 +1978,164 @@ static void iter_sfloat_mulsum_nan(na_loop_t* const lp) {
5053
1978
  INIT_PTR(lp, 1, p2, s2);
5054
1979
  INIT_PTR(lp, 2, p3, s3);
5055
1980
 
5056
- if (s3 == 0) {
5057
- dtype z;
5058
- // Reduce loop
5059
- GET_DATA(p3, dtype, z);
5060
- for (i = 0; i < n; i++) {
5061
- dtype x, y;
5062
- GET_DATA_STRIDE(p1, s1, dtype, x);
5063
- GET_DATA_STRIDE(p2, s2, dtype, y);
5064
- m_mulsum_nan(x, y, z);
5065
- }
5066
- SET_DATA(p3, dtype, z);
5067
- return;
5068
- } else {
5069
- for (i = 0; i < n; i++) {
5070
- dtype x, y, z;
5071
- GET_DATA_STRIDE(p1, s1, dtype, x);
5072
- GET_DATA_STRIDE(p2, s2, dtype, y);
5073
- GET_DATA(p3, dtype, z);
5074
- m_mulsum_nan(x, y, z);
5075
- SET_DATA_STRIDE(p3, s3, dtype, z);
5076
- }
5077
- }
5078
- }
5079
- //
1981
+ //
1982
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1983
+ is_aligned(p3, sizeof(dtype))) {
5080
1984
 
5081
- static VALUE sfloat_mulsum_self(int argc, VALUE* argv, VALUE self) {
5082
- VALUE v, reduce;
5083
- VALUE naryv[2];
5084
- ndfunc_arg_in_t ain[4] = { { cT, 0 }, { cT, 0 }, { sym_reduce, 0 }, { sym_init, 0 } };
5085
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
5086
- ndfunc_t ndf = { iter_sfloat_mulsum, STRIDE_LOOP_NIP, 4, 1, ain, aout };
1985
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1986
+ if (p1 == p3) { // inplace case
1987
+ for (; i < n; i++) {
1988
+ check_intdivzero(((dtype*)p2)[i]);
1989
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1990
+ }
1991
+ } else {
1992
+ for (; i < n; i++) {
1993
+ check_intdivzero(((dtype*)p2)[i]);
1994
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1995
+ }
1996
+ }
1997
+ return;
1998
+ }
5087
1999
 
5088
- if (argc < 1) {
5089
- rb_raise(rb_eArgError, "wrong number of arguments (%d for >=1)", argc);
5090
- }
5091
- // should fix below: [self.ndim,other.ndim].max or?
5092
- naryv[0] = self;
5093
- naryv[1] = argv[0];
5094
- //
5095
- reduce = na_reduce_dimension(argc - 1, argv + 1, 2, naryv, &ndf, iter_sfloat_mulsum_nan);
5096
- //
2000
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2001
+ is_aligned_step(s3, sizeof(dtype))) {
2002
+ //
5097
2003
 
5098
- v = na_ndloop(&ndf, 4, self, argv[0], reduce, m_mulsum_init);
5099
- return sfloat_extract(v);
5100
- }
2004
+ if (s2 == 0) { // Broadcasting from scalar value.
2005
+ check_intdivzero(*(dtype*)p2);
2006
+ if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2007
+ if (p1 == p3) { // inplace case
2008
+ for (; i < n; i++) {
2009
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2010
+ }
2011
+ } else {
2012
+ for (; i < n; i++) {
2013
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2014
+ }
2015
+ }
2016
+ } else {
2017
+ for (i = 0; i < n; i++) {
2018
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2019
+ p1 += s1;
2020
+ p3 += s3;
2021
+ }
2022
+ }
2023
+ } else {
2024
+ if (p1 == p3) { // inplace case
2025
+ for (i = 0; i < n; i++) {
2026
+ check_intdivzero(*(dtype*)p2);
2027
+ *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2028
+ p1 += s1;
2029
+ p2 += s2;
2030
+ }
2031
+ } else {
2032
+ for (i = 0; i < n; i++) {
2033
+ check_intdivzero(*(dtype*)p2);
2034
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2035
+ p1 += s1;
2036
+ p2 += s2;
2037
+ p3 += s3;
2038
+ }
2039
+ }
2040
+ }
5101
2041
 
5102
- static VALUE sfloat_mulsum(int argc, VALUE* argv, VALUE self) {
5103
- //
5104
- VALUE klass, v;
5105
- //
5106
- if (argc < 1) {
5107
- rb_raise(rb_eArgError, "wrong number of arguments (%d for >=1)", argc);
2042
+ return;
2043
+ //
2044
+ }
5108
2045
  }
5109
- //
5110
- klass = na_upcast(rb_obj_class(self), rb_obj_class(argv[0]));
5111
- if (klass == cT) {
5112
- return sfloat_mulsum_self(argc, argv, self);
5113
- } else {
5114
- v = rb_funcall(klass, id_cast, 1, self);
5115
- //
5116
- return rb_funcallv_kw(v, rb_intern("mulsum"), argc, argv, RB_PASS_CALLED_KEYWORDS);
5117
- //
2046
+ for (i = 0; i < n; i++) {
2047
+ dtype x, y, z;
2048
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2049
+ GET_DATA_STRIDE(p2, s2, dtype, y);
2050
+ check_intdivzero(y);
2051
+ z = m_copysign(x, y);
2052
+ SET_DATA_STRIDE(p3, s3, dtype, z);
5118
2053
  }
5119
2054
  //
5120
2055
  }
2056
+ #undef check_intdivzero
5121
2057
 
5122
- typedef dtype seq_data_t;
2058
+ static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
2059
+ ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2060
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2061
+ ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
5123
2062
 
5124
- typedef double seq_count_t;
2063
+ return na_ndloop(&ndf, 2, self, other);
2064
+ }
5125
2065
 
5126
- typedef struct {
5127
- seq_data_t beg;
5128
- seq_data_t step;
5129
- seq_count_t count;
5130
- } seq_opt_t;
2066
+ static VALUE sfloat_copysign(VALUE self, VALUE other) {
5131
2067
 
5132
- static void iter_sfloat_seq(na_loop_t* const lp) {
5133
- size_t i;
5134
- char* p1;
5135
- ssize_t s1;
5136
- size_t* idx1;
5137
- dtype x;
5138
- seq_data_t beg, step;
5139
- seq_count_t c;
5140
- seq_opt_t* g;
2068
+ VALUE klass, v;
5141
2069
 
5142
- INIT_COUNTER(lp, i);
5143
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
5144
- g = (seq_opt_t*)(lp->opt_ptr);
5145
- beg = g->beg;
5146
- step = g->step;
5147
- c = g->count;
5148
- if (idx1) {
5149
- for (; i--;) {
5150
- x = f_seq(beg, step, c++);
5151
- *(dtype*)(p1 + *idx1) = x;
5152
- idx1++;
5153
- }
2070
+ klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2071
+ if (klass == cT) {
2072
+ return sfloat_copysign_self(self, other);
5154
2073
  } else {
5155
- for (; i--;) {
5156
- x = f_seq(beg, step, c++);
5157
- *(dtype*)(p1) = x;
5158
- p1 += s1;
5159
- }
5160
- }
5161
- g->count = c;
5162
- }
5163
-
5164
- static VALUE sfloat_seq(int argc, VALUE* args, VALUE self) {
5165
- seq_opt_t* g;
5166
- VALUE vbeg = Qnil, vstep = Qnil;
5167
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
5168
- ndfunc_t ndf = { iter_sfloat_seq, FULL_LOOP, 1, 0, ain, 0 };
5169
-
5170
- g = ALLOCA_N(seq_opt_t, 1);
5171
- g->beg = m_zero;
5172
- g->step = m_one;
5173
- g->count = 0;
5174
- rb_scan_args(argc, args, "02", &vbeg, &vstep);
5175
- if (vbeg != Qnil) {
5176
- g->beg = m_num_to_data(vbeg);
5177
- }
5178
- if (vstep != Qnil) {
5179
- g->step = m_num_to_data(vstep);
2074
+ v = rb_funcall(klass, id_cast, 1, self);
2075
+ return rb_funcall(v, id_copysign, 1, other);
5180
2076
  }
5181
-
5182
- na_ndloop3(&ndf, g, 1, self);
5183
- return self;
5184
2077
  }
5185
2078
 
5186
- typedef struct {
5187
- seq_data_t beg;
5188
- seq_data_t step;
5189
- seq_data_t base;
5190
- seq_count_t count;
5191
- } logseq_opt_t;
5192
-
5193
- static void iter_sfloat_logseq(na_loop_t* const lp) {
2079
+ static void iter_sfloat_signbit(na_loop_t* const lp) {
5194
2080
  size_t i;
5195
2081
  char* p1;
5196
- ssize_t s1;
2082
+ BIT_DIGIT* a2;
2083
+ size_t p2;
2084
+ ssize_t s1, s2;
5197
2085
  size_t* idx1;
5198
2086
  dtype x;
5199
- seq_data_t beg, step, base;
5200
- seq_count_t c;
5201
- logseq_opt_t* g;
5202
-
2087
+ BIT_DIGIT b;
5203
2088
  INIT_COUNTER(lp, i);
5204
2089
  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
5205
- g = (logseq_opt_t*)(lp->opt_ptr);
5206
- beg = g->beg;
5207
- step = g->step;
5208
- base = g->base;
5209
- c = g->count;
2090
+ INIT_PTR_BIT(lp, 1, a2, p2, s2);
5210
2091
  if (idx1) {
5211
2092
  for (; i--;) {
5212
- x = f_seq(beg, step, c++);
5213
- *(dtype*)(p1 + *idx1) = m_pow(base, x);
5214
- idx1++;
2093
+ GET_DATA_INDEX(p1, idx1, dtype, x);
2094
+ b = (m_signbit(x)) ? 1 : 0;
2095
+ STORE_BIT(a2, p2, b);
2096
+ p2 += s2;
5215
2097
  }
5216
2098
  } else {
5217
2099
  for (; i--;) {
5218
- x = f_seq(beg, step, c++);
5219
- *(dtype*)(p1) = m_pow(base, x);
5220
- p1 += s1;
5221
- }
5222
- }
5223
- g->count = c;
5224
- }
5225
-
5226
- static VALUE sfloat_logseq(int argc, VALUE* args, VALUE self) {
5227
- logseq_opt_t* g;
5228
- VALUE vbeg, vstep, vbase;
5229
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
5230
- ndfunc_t ndf = { iter_sfloat_logseq, FULL_LOOP, 1, 0, ain, 0 };
5231
-
5232
- g = ALLOCA_N(logseq_opt_t, 1);
5233
- rb_scan_args(argc, args, "21", &vbeg, &vstep, &vbase);
5234
- g->beg = m_num_to_data(vbeg);
5235
- g->step = m_num_to_data(vstep);
5236
- if (vbase == Qnil) {
5237
- g->base = m_from_real(10);
5238
- } else {
5239
- g->base = m_num_to_data(vbase);
5240
- }
5241
- na_ndloop3(&ndf, g, 1, self);
5242
- return self;
5243
- }
5244
-
5245
- static void iter_sfloat_eye(na_loop_t* const lp) {
5246
- size_t n0, n1;
5247
- size_t i0, i1;
5248
- ssize_t s0, s1;
5249
- char *p0, *p1;
5250
- char* g;
5251
- ssize_t kofs;
5252
- dtype data;
5253
-
5254
- g = (char*)(lp->opt_ptr);
5255
- kofs = *(ssize_t*)g;
5256
- data = *(dtype*)(g + sizeof(ssize_t));
5257
-
5258
- n0 = lp->args[0].shape[0];
5259
- n1 = lp->args[0].shape[1];
5260
- s0 = lp->args[0].iter[0].step;
5261
- s1 = lp->args[0].iter[1].step;
5262
- p0 = NDL_PTR(lp, 0);
5263
-
5264
- for (i0 = 0; i0 < n0; i0++) {
5265
- p1 = p0;
5266
- for (i1 = 0; i1 < n1; i1++) {
5267
- *(dtype*)p1 = (i0 + kofs == i1) ? data : m_zero;
5268
- p1 += s1;
2100
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2101
+ b = (m_signbit(x)) ? 1 : 0;
2102
+ STORE_BIT(a2, p2, b);
2103
+ p2 += s2;
5269
2104
  }
5270
- p0 += s0;
5271
2105
  }
5272
2106
  }
5273
2107
 
5274
- static VALUE sfloat_eye(int argc, VALUE* argv, VALUE self) {
5275
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 2 } };
5276
- ndfunc_t ndf = { iter_sfloat_eye, NO_LOOP, 1, 0, ain, 0 };
5277
- ssize_t kofs;
5278
- dtype data;
5279
- char* g;
5280
- int nd;
5281
- narray_t* na;
5282
-
5283
- // check arguments
5284
- if (argc > 2) {
5285
- rb_raise(rb_eArgError, "too many arguments (%d for 0..2)", argc);
5286
- } else if (argc == 2) {
5287
- data = m_num_to_data(argv[0]);
5288
- kofs = NUM2SSIZET(argv[1]);
5289
- } else if (argc == 1) {
5290
- data = m_num_to_data(argv[0]);
5291
- kofs = 0;
5292
- } else {
5293
- data = m_one;
5294
- kofs = 0;
5295
- }
5296
-
5297
- GetNArray(self, na);
5298
- nd = na->ndim;
5299
- if (nd < 2) {
5300
- rb_raise(nary_eDimensionError, "less than 2-d array");
5301
- }
5302
-
5303
- // Diagonal offset from the main diagonal.
5304
- if (kofs >= 0) {
5305
- if ((size_t)(kofs) >= na->shape[nd - 1]) {
5306
- rb_raise(
5307
- rb_eArgError,
5308
- "invalid diagonal offset(%" SZF "d) for "
5309
- "last dimension size(%" SZF "d)",
5310
- kofs, na->shape[nd - 1]
5311
- );
5312
- }
5313
- } else {
5314
- if ((size_t)(-kofs) >= na->shape[nd - 2]) {
5315
- rb_raise(
5316
- rb_eArgError,
5317
- "invalid diagonal offset(%" SZF "d) for "
5318
- "last-1 dimension size(%" SZF "d)",
5319
- kofs, na->shape[nd - 2]
5320
- );
5321
- }
5322
- }
5323
-
5324
- g = ALLOCA_N(char, sizeof(ssize_t) + sizeof(dtype));
5325
- *(ssize_t*)g = kofs;
5326
- *(dtype*)(g + sizeof(ssize_t)) = data;
2108
+ static VALUE sfloat_signbit(VALUE self) {
2109
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2110
+ ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
2111
+ ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
5327
2112
 
5328
- na_ndloop3(&ndf, g, 1, self);
5329
- return self;
2113
+ return na_ndloop(&ndf, 1, self);
5330
2114
  }
5331
2115
 
5332
- typedef struct {
5333
- dtype low;
5334
- dtype max;
5335
- } rand_opt_t;
5336
-
5337
- static void iter_sfloat_rand(na_loop_t* const lp) {
2116
+ static void iter_sfloat_modf(na_loop_t* const lp) {
5338
2117
  size_t i;
5339
- char* p1;
5340
- ssize_t s1;
5341
- size_t* idx1;
5342
- dtype x;
5343
- rand_opt_t* g;
5344
- dtype low;
5345
- dtype max;
5346
-
2118
+ char *p1, *p2, *p3;
2119
+ ssize_t s1, s2, s3;
2120
+ dtype x, y, z;
5347
2121
  INIT_COUNTER(lp, i);
5348
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
5349
- g = (rand_opt_t*)(lp->opt_ptr);
5350
- low = g->low;
5351
- max = g->max;
5352
-
5353
- if (idx1) {
5354
- for (; i--;) {
5355
- x = m_add(m_rand(max), low);
5356
- SET_DATA_INDEX(p1, idx1, dtype, x);
5357
- }
5358
- } else {
5359
- for (; i--;) {
5360
- x = m_add(m_rand(max), low);
5361
- SET_DATA_STRIDE(p1, s1, dtype, x);
5362
- }
2122
+ INIT_PTR(lp, 0, p1, s1);
2123
+ INIT_PTR(lp, 1, p2, s2);
2124
+ INIT_PTR(lp, 2, p3, s3);
2125
+ for (; i--;) {
2126
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2127
+ m_modf(x, y, z);
2128
+ SET_DATA_STRIDE(p2, s2, dtype, y);
2129
+ SET_DATA_STRIDE(p3, s3, dtype, z);
5363
2130
  }
5364
2131
  }
5365
2132
 
5366
- static VALUE sfloat_rand(int argc, VALUE* args, VALUE self) {
5367
- rand_opt_t g;
5368
- VALUE v1 = Qnil, v2 = Qnil;
5369
- dtype high;
5370
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
5371
- ndfunc_t ndf = { iter_sfloat_rand, FULL_LOOP, 1, 0, ain, 0 };
5372
-
5373
- rb_scan_args(argc, args, "02", &v1, &v2);
5374
- if (v2 == Qnil) {
5375
- g.low = m_zero;
5376
- if (v1 == Qnil) {
5377
-
5378
- g.max = high = m_one;
5379
-
5380
- } else {
5381
- g.max = high = m_num_to_data(v1);
5382
- }
5383
-
5384
- } else {
5385
- g.low = m_num_to_data(v1);
5386
- high = m_num_to_data(v2);
5387
- g.max = m_sub(high, g.low);
5388
- }
2133
+ static VALUE sfloat_modf(VALUE self) {
2134
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2135
+ ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2136
+ ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
5389
2137
 
5390
- na_ndloop3(&ndf, &g, 1, self);
5391
- return self;
2138
+ return na_ndloop(&ndf, 1, self);
5392
2139
  }
5393
2140
 
5394
2141
  typedef struct {