numo-narray-alt 0.9.10 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -1
  3. data/LICENSE +1 -1
  4. data/README.md +7 -0
  5. data/ext/numo/narray/numo/narray.h +2 -2
  6. data/ext/numo/narray/numo/types/robj_macro.h +1 -1
  7. data/ext/numo/narray/numo/types/robject.h +1 -1
  8. data/ext/numo/narray/src/mh/argmax.h +154 -0
  9. data/ext/numo/narray/src/mh/argmin.h +154 -0
  10. data/ext/numo/narray/src/mh/bincount.h +233 -0
  11. data/ext/numo/narray/src/mh/bit/and.h +225 -0
  12. data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
  13. data/ext/numo/narray/src/mh/bit/not.h +173 -0
  14. data/ext/numo/narray/src/mh/bit/or.h +225 -0
  15. data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
  16. data/ext/numo/narray/src/mh/bit/xor.h +225 -0
  17. data/ext/numo/narray/src/mh/clip.h +115 -0
  18. data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
  19. data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
  20. data/ext/numo/narray/src/mh/comp/eq.h +26 -0
  21. data/ext/numo/narray/src/mh/comp/ge.h +26 -0
  22. data/ext/numo/narray/src/mh/comp/gt.h +26 -0
  23. data/ext/numo/narray/src/mh/comp/le.h +26 -0
  24. data/ext/numo/narray/src/mh/comp/lt.h +26 -0
  25. data/ext/numo/narray/src/mh/comp/ne.h +26 -0
  26. data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
  27. data/ext/numo/narray/src/mh/cumprod.h +98 -0
  28. data/ext/numo/narray/src/mh/cumsum.h +98 -0
  29. data/ext/numo/narray/src/mh/divmod.h +142 -0
  30. data/ext/numo/narray/src/mh/eye.h +82 -0
  31. data/ext/numo/narray/src/mh/fill.h +94 -0
  32. data/ext/numo/narray/src/mh/format.h +108 -0
  33. data/ext/numo/narray/src/mh/format_to_a.h +89 -0
  34. data/ext/numo/narray/src/mh/inspect.h +33 -0
  35. data/ext/numo/narray/src/mh/isfinite.h +42 -0
  36. data/ext/numo/narray/src/mh/isinf.h +42 -0
  37. data/ext/numo/narray/src/mh/isnan.h +42 -0
  38. data/ext/numo/narray/src/mh/isneginf.h +42 -0
  39. data/ext/numo/narray/src/mh/isposinf.h +42 -0
  40. data/ext/numo/narray/src/mh/logseq.h +69 -0
  41. data/ext/numo/narray/src/mh/math/acos.h +2 -2
  42. data/ext/numo/narray/src/mh/math/acosh.h +2 -2
  43. data/ext/numo/narray/src/mh/math/asin.h +2 -2
  44. data/ext/numo/narray/src/mh/math/asinh.h +2 -2
  45. data/ext/numo/narray/src/mh/math/atan.h +2 -2
  46. data/ext/numo/narray/src/mh/math/atan2.h +3 -3
  47. data/ext/numo/narray/src/mh/math/atanh.h +2 -2
  48. data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
  49. data/ext/numo/narray/src/mh/math/cos.h +2 -2
  50. data/ext/numo/narray/src/mh/math/cosh.h +2 -2
  51. data/ext/numo/narray/src/mh/math/erf.h +2 -2
  52. data/ext/numo/narray/src/mh/math/erfc.h +2 -2
  53. data/ext/numo/narray/src/mh/math/exp.h +2 -2
  54. data/ext/numo/narray/src/mh/math/exp10.h +2 -2
  55. data/ext/numo/narray/src/mh/math/exp2.h +2 -2
  56. data/ext/numo/narray/src/mh/math/expm1.h +2 -2
  57. data/ext/numo/narray/src/mh/math/frexp.h +3 -3
  58. data/ext/numo/narray/src/mh/math/hypot.h +3 -3
  59. data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
  60. data/ext/numo/narray/src/mh/math/log.h +2 -2
  61. data/ext/numo/narray/src/mh/math/log10.h +2 -2
  62. data/ext/numo/narray/src/mh/math/log1p.h +2 -2
  63. data/ext/numo/narray/src/mh/math/log2.h +2 -2
  64. data/ext/numo/narray/src/mh/math/sin.h +2 -2
  65. data/ext/numo/narray/src/mh/math/sinc.h +2 -2
  66. data/ext/numo/narray/src/mh/math/sinh.h +2 -2
  67. data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
  68. data/ext/numo/narray/src/mh/math/tan.h +2 -2
  69. data/ext/numo/narray/src/mh/math/tanh.h +2 -2
  70. data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
  71. data/ext/numo/narray/src/mh/max.h +69 -0
  72. data/ext/numo/narray/src/mh/max_index.h +184 -0
  73. data/ext/numo/narray/src/mh/maximum.h +116 -0
  74. data/ext/numo/narray/src/mh/min.h +69 -0
  75. data/ext/numo/narray/src/mh/min_index.h +184 -0
  76. data/ext/numo/narray/src/mh/minimum.h +116 -0
  77. data/ext/numo/narray/src/mh/minmax.h +77 -0
  78. data/ext/numo/narray/src/mh/mulsum.h +185 -0
  79. data/ext/numo/narray/src/mh/op/add.h +78 -0
  80. data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
  81. data/ext/numo/narray/src/mh/op/div.h +118 -0
  82. data/ext/numo/narray/src/mh/op/mod.h +108 -0
  83. data/ext/numo/narray/src/mh/op/mul.h +78 -0
  84. data/ext/numo/narray/src/mh/op/sub.h +78 -0
  85. data/ext/numo/narray/src/mh/prod.h +69 -0
  86. data/ext/numo/narray/src/mh/ptp.h +69 -0
  87. data/ext/numo/narray/src/mh/rand.h +315 -0
  88. data/ext/numo/narray/src/mh/round/ceil.h +11 -0
  89. data/ext/numo/narray/src/mh/round/floor.h +11 -0
  90. data/ext/numo/narray/src/mh/round/rint.h +9 -0
  91. data/ext/numo/narray/src/mh/round/round.h +11 -0
  92. data/ext/numo/narray/src/mh/round/trunc.h +11 -0
  93. data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
  94. data/ext/numo/narray/src/mh/seq.h +130 -0
  95. data/ext/numo/narray/src/mh/sum.h +69 -0
  96. data/ext/numo/narray/src/mh/to_a.h +78 -0
  97. data/ext/numo/narray/src/t_bit.c +45 -234
  98. data/ext/numo/narray/src/t_dcomplex.c +608 -2369
  99. data/ext/numo/narray/src/t_dfloat.c +485 -3736
  100. data/ext/numo/narray/src/t_int16.c +743 -3444
  101. data/ext/numo/narray/src/t_int32.c +745 -3445
  102. data/ext/numo/narray/src/t_int64.c +743 -3446
  103. data/ext/numo/narray/src/t_int8.c +678 -3040
  104. data/ext/numo/narray/src/t_robject.c +771 -3548
  105. data/ext/numo/narray/src/t_scomplex.c +607 -2368
  106. data/ext/numo/narray/src/t_sfloat.c +440 -3693
  107. data/ext/numo/narray/src/t_uint16.c +743 -3440
  108. data/ext/numo/narray/src/t_uint32.c +743 -3440
  109. data/ext/numo/narray/src/t_uint64.c +743 -3442
  110. data/ext/numo/narray/src/t_uint8.c +678 -3038
  111. data/lib/numo/narray.rb +2 -3
  112. metadata +62 -3
@@ -42,10 +42,59 @@ static ID id_to_a;
42
42
  VALUE cT;
43
43
  extern VALUE cRT;
44
44
 
45
+ #include "mh/coerce_cast.h"
46
+ #include "mh/to_a.h"
47
+ #include "mh/fill.h"
48
+ #include "mh/format.h"
49
+ #include "mh/format_to_a.h"
50
+ #include "mh/inspect.h"
51
+ #include "mh/op/add.h"
52
+ #include "mh/op/sub.h"
53
+ #include "mh/op/mul.h"
54
+ #include "mh/op/div.h"
55
+ #include "mh/op/mod.h"
56
+ #include "mh/divmod.h"
57
+ #include "mh/round/floor.h"
58
+ #include "mh/round/round.h"
59
+ #include "mh/round/ceil.h"
60
+ #include "mh/round/trunc.h"
61
+ #include "mh/round/rint.h"
62
+ #include "mh/comp/eq.h"
63
+ #include "mh/comp/ne.h"
64
+ #include "mh/comp/nearly_eq.h"
65
+ #include "mh/comp/gt.h"
66
+ #include "mh/comp/ge.h"
67
+ #include "mh/comp/lt.h"
68
+ #include "mh/comp/le.h"
69
+ #include "mh/clip.h"
70
+ #include "mh/isnan.h"
71
+ #include "mh/isinf.h"
72
+ #include "mh/isposinf.h"
73
+ #include "mh/isneginf.h"
74
+ #include "mh/isfinite.h"
75
+ #include "mh/sum.h"
76
+ #include "mh/prod.h"
45
77
  #include "mh/mean.h"
46
78
  #include "mh/var.h"
47
79
  #include "mh/stddev.h"
48
80
  #include "mh/rms.h"
81
+ #include "mh/min.h"
82
+ #include "mh/max.h"
83
+ #include "mh/ptp.h"
84
+ #include "mh/max_index.h"
85
+ #include "mh/min_index.h"
86
+ #include "mh/argmax.h"
87
+ #include "mh/argmin.h"
88
+ #include "mh/maximum.h"
89
+ #include "mh/minimum.h"
90
+ #include "mh/minmax.h"
91
+ #include "mh/cumsum.h"
92
+ #include "mh/cumprod.h"
93
+ #include "mh/mulsum.h"
94
+ #include "mh/seq.h"
95
+ #include "mh/logseq.h"
96
+ #include "mh/eye.h"
97
+ #include "mh/rand.h"
49
98
  #include "mh/math/sqrt.h"
50
99
  #include "mh/math/cbrt.h"
51
100
  #include "mh/math/log.h"
@@ -78,10 +127,66 @@ extern VALUE cRT;
78
127
 
79
128
  typedef double dfloat; // Type aliases for shorter notation
80
129
  // following the codebase naming convention.
130
+ DEF_NARRAY_COERCE_CAST_METHOD_FUNC(dfloat)
131
+ DEF_NARRAY_TO_A_METHOD_FUNC(dfloat)
132
+ DEF_NARRAY_FILL_METHOD_FUNC(dfloat)
133
+ DEF_NARRAY_FORMAT_METHOD_FUNC(dfloat)
134
+ DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(dfloat)
135
+ DEF_NARRAY_INSPECT_METHOD_FUNC(dfloat)
136
+ #ifdef __SSE2__
137
+ DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
138
+ DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
139
+ DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
140
+ DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
141
+ #else
142
+ DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
143
+ DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
144
+ DEF_NARRAY_MUL_METHOD_FUNC(dfloat, numo_cDFloat)
145
+ DEF_NARRAY_FLT_DIV_METHOD_FUNC(dfloat, numo_cDFloat)
146
+ #endif
147
+ DEF_NARRAY_FLT_MOD_METHOD_FUNC(dfloat, numo_cDFloat)
148
+ DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(dfloat, numo_cDFloat)
149
+ DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(dfloat, numo_cDFloat)
150
+ DEF_NARRAY_FLT_ROUND_METHOD_FUNC(dfloat, numo_cDFloat)
151
+ DEF_NARRAY_FLT_CEIL_METHOD_FUNC(dfloat, numo_cDFloat)
152
+ DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(dfloat, numo_cDFloat)
153
+ DEF_NARRAY_FLT_RINT_METHOD_FUNC(dfloat, numo_cDFloat)
154
+ DEF_NARRAY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
155
+ DEF_NARRAY_NE_METHOD_FUNC(dfloat, numo_cDFloat)
156
+ DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
157
+ DEF_NARRAY_GT_METHOD_FUNC(dfloat, numo_cDFloat)
158
+ DEF_NARRAY_GE_METHOD_FUNC(dfloat, numo_cDFloat)
159
+ DEF_NARRAY_LT_METHOD_FUNC(dfloat, numo_cDFloat)
160
+ DEF_NARRAY_LE_METHOD_FUNC(dfloat, numo_cDFloat)
161
+ DEF_NARRAY_CLIP_METHOD_FUNC(dfloat, numo_cDFloat)
162
+ DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(dfloat, numo_cDFloat)
163
+ DEF_NARRAY_FLT_ISINF_METHOD_FUNC(dfloat, numo_cDFloat)
164
+ DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(dfloat, numo_cDFloat)
165
+ DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(dfloat, numo_cDFloat)
166
+ DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(dfloat, numo_cDFloat)
167
+ DEF_NARRAY_FLT_SUM_METHOD_FUNC(dfloat, numo_cDFloat)
168
+ DEF_NARRAY_FLT_PROD_METHOD_FUNC(dfloat, numo_cDFloat)
81
169
  DEF_NARRAY_FLT_MEAN_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
82
170
  DEF_NARRAY_FLT_VAR_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
83
171
  DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
84
172
  DEF_NARRAY_FLT_RMS_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
173
+ DEF_NARRAY_FLT_MIN_METHOD_FUNC(dfloat, numo_cDFloat)
174
+ DEF_NARRAY_FLT_MAX_METHOD_FUNC(dfloat, numo_cDFloat)
175
+ DEF_NARRAY_FLT_PTP_METHOD_FUNC(dfloat, numo_cDFloat)
176
+ DEF_NARRAY_FLT_MAX_INDEX_METHOD_FUNC(dfloat)
177
+ DEF_NARRAY_FLT_MIN_INDEX_METHOD_FUNC(dfloat)
178
+ DEF_NARRAY_FLT_ARGMAX_METHOD_FUNC(dfloat)
179
+ DEF_NARRAY_FLT_ARGMIN_METHOD_FUNC(dfloat)
180
+ DEF_NARRAY_FLT_MAXIMUM_METHOD_FUNC(dfloat, numo_cDFloat)
181
+ DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(dfloat, numo_cDFloat)
182
+ DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(dfloat, numo_cDFloat)
183
+ DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(dfloat, numo_cDFloat)
184
+ DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(dfloat, numo_cDFloat)
185
+ DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(dfloat, numo_cDFloat)
186
+ DEF_NARRAY_FLT_SEQ_METHOD_FUNC(dfloat)
187
+ DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(dfloat)
188
+ DEF_NARRAY_EYE_METHOD_FUNC(dfloat)
189
+ DEF_NARRAY_FLT_RAND_METHOD_FUNC(dfloat)
85
190
  #ifdef __SSE2__
86
191
  DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
87
192
  #else
@@ -1235,171 +1340,6 @@ static VALUE dfloat_aset(int argc, VALUE* argv, VALUE self) {
1235
1340
  return argv[argc];
1236
1341
  }
1237
1342
 
1238
- static VALUE dfloat_coerce_cast(VALUE self, VALUE type) {
1239
- return Qnil;
1240
- }
1241
-
1242
- static void iter_dfloat_to_a(na_loop_t* const lp) {
1243
- size_t i, s1;
1244
- char* p1;
1245
- size_t* idx1;
1246
- dtype x;
1247
- volatile VALUE a, y;
1248
-
1249
- INIT_COUNTER(lp, i);
1250
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1251
- a = rb_ary_new2(i);
1252
- rb_ary_push(lp->args[1].value, a);
1253
- if (idx1) {
1254
- for (; i--;) {
1255
- GET_DATA_INDEX(p1, idx1, dtype, x);
1256
- y = m_data_to_num(x);
1257
- rb_ary_push(a, y);
1258
- }
1259
- } else {
1260
- for (; i--;) {
1261
- GET_DATA_STRIDE(p1, s1, dtype, x);
1262
- y = m_data_to_num(x);
1263
- rb_ary_push(a, y);
1264
- }
1265
- }
1266
- }
1267
-
1268
- static VALUE dfloat_to_a(VALUE self) {
1269
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1270
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1271
- ndfunc_t ndf = { iter_dfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1272
- return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
1273
- }
1274
-
1275
- static void iter_dfloat_fill(na_loop_t* const lp) {
1276
- size_t i;
1277
- char* p1;
1278
- ssize_t s1;
1279
- size_t* idx1;
1280
- VALUE x = lp->option;
1281
- dtype y;
1282
- INIT_COUNTER(lp, i);
1283
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1284
- y = m_num_to_data(x);
1285
- if (idx1) {
1286
- for (; i--;) {
1287
- SET_DATA_INDEX(p1, idx1, dtype, y);
1288
- }
1289
- } else {
1290
- for (; i--;) {
1291
- SET_DATA_STRIDE(p1, s1, dtype, y);
1292
- }
1293
- }
1294
- }
1295
-
1296
- static VALUE dfloat_fill(VALUE self, VALUE val) {
1297
- ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
1298
- ndfunc_t ndf = { iter_dfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
1299
-
1300
- na_ndloop(&ndf, 2, self, val);
1301
- return self;
1302
- }
1303
-
1304
- static VALUE format_dfloat(VALUE fmt, dtype* x) {
1305
- // fix-me
1306
- char s[48];
1307
- int n;
1308
-
1309
- if (NIL_P(fmt)) {
1310
- n = m_sprintf(s, *x);
1311
- return rb_str_new(s, n);
1312
- }
1313
- return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
1314
- }
1315
-
1316
- static void iter_dfloat_format(na_loop_t* const lp) {
1317
- size_t i;
1318
- char *p1, *p2;
1319
- ssize_t s1, s2;
1320
- size_t* idx1;
1321
- dtype* x;
1322
- VALUE y;
1323
- VALUE fmt = lp->option;
1324
- INIT_COUNTER(lp, i);
1325
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1326
- INIT_PTR(lp, 1, p2, s2);
1327
- if (idx1) {
1328
- for (; i--;) {
1329
- x = (dtype*)(p1 + *idx1);
1330
- idx1++;
1331
- y = format_dfloat(fmt, x);
1332
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1333
- }
1334
- } else {
1335
- for (; i--;) {
1336
- x = (dtype*)p1;
1337
- p1 += s1;
1338
- y = format_dfloat(fmt, x);
1339
- SET_DATA_STRIDE(p2, s2, VALUE, y);
1340
- }
1341
- }
1342
- }
1343
-
1344
- static VALUE dfloat_format(int argc, VALUE* argv, VALUE self) {
1345
- VALUE fmt = Qnil;
1346
-
1347
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
1348
- ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
1349
- ndfunc_t ndf = { iter_dfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
1350
-
1351
- rb_scan_args(argc, argv, "01", &fmt);
1352
- return na_ndloop(&ndf, 2, self, fmt);
1353
- }
1354
-
1355
- static void iter_dfloat_format_to_a(na_loop_t* const lp) {
1356
- size_t i;
1357
- char* p1;
1358
- ssize_t s1;
1359
- size_t* idx1;
1360
- dtype* x;
1361
- VALUE y;
1362
- volatile VALUE a;
1363
- VALUE fmt = lp->option;
1364
- INIT_COUNTER(lp, i);
1365
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1366
- a = rb_ary_new2(i);
1367
- rb_ary_push(lp->args[1].value, a);
1368
- if (idx1) {
1369
- for (; i--;) {
1370
- x = (dtype*)(p1 + *idx1);
1371
- idx1++;
1372
- y = format_dfloat(fmt, x);
1373
- rb_ary_push(a, y);
1374
- }
1375
- } else {
1376
- for (; i--;) {
1377
- x = (dtype*)p1;
1378
- p1 += s1;
1379
- y = format_dfloat(fmt, x);
1380
- rb_ary_push(a, y);
1381
- }
1382
- }
1383
- }
1384
-
1385
- static VALUE dfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
1386
- VALUE fmt = Qnil;
1387
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
1388
- ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
1389
- ndfunc_t ndf = { iter_dfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
1390
-
1391
- rb_scan_args(argc, argv, "01", &fmt);
1392
- return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
1393
- }
1394
-
1395
- static VALUE iter_dfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
1396
- return format_dfloat(fmt, (dtype*)(ptr + pos));
1397
- }
1398
-
1399
- static VALUE dfloat_inspect(VALUE ary) {
1400
- return na_ndloop_inspect(ary, iter_dfloat_inspect, Qnil);
1401
- }
1402
-
1403
1343
  static void iter_dfloat_each(na_loop_t* const lp) {
1404
1344
  size_t i, s1;
1405
1345
  char* p1;
@@ -1682,3369 +1622,354 @@ static VALUE dfloat_abs(VALUE self) {
1682
1622
  return na_ndloop(&ndf, 1, self);
1683
1623
  }
1684
1624
 
1685
- #define check_intdivzero(y) \
1686
- {}
1687
-
1688
- static void iter_dfloat_add(na_loop_t* const lp) {
1689
- size_t i = 0;
1690
- size_t n;
1625
+ static void iter_dfloat_pow(na_loop_t* const lp) {
1626
+ size_t i;
1691
1627
  char *p1, *p2, *p3;
1692
1628
  ssize_t s1, s2, s3;
1693
-
1694
- #ifdef __SSE2__
1695
- size_t cnt;
1696
- size_t cnt_simd_loop = -1;
1697
-
1698
- __m128d a;
1699
- __m128d b;
1700
-
1701
- size_t num_pack; // Number of elements packed for SIMD.
1702
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1703
- #endif
1704
- INIT_COUNTER(lp, n);
1629
+ dtype x, y;
1630
+ INIT_COUNTER(lp, i);
1705
1631
  INIT_PTR(lp, 0, p1, s1);
1706
1632
  INIT_PTR(lp, 1, p2, s2);
1707
1633
  INIT_PTR(lp, 2, p3, s3);
1708
-
1709
- //
1710
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1711
- is_aligned(p3, sizeof(dtype))) {
1712
-
1713
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1714
- #ifdef __SSE2__
1715
- // Check number of elements. & Check same alignment.
1716
- if ((n >= num_pack) &&
1717
- is_same_aligned3(
1718
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1719
- )) {
1720
- // Calculate up to the position just before the start of SIMD computation.
1721
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1722
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1723
- );
1724
- #endif
1725
- if (p1 == p3) { // inplace case
1726
- #ifdef __SSE2__
1727
- for (; i < cnt; i++) {
1728
- #else
1729
- for (; i < n; i++) {
1730
- check_intdivzero(((dtype*)p2)[i]);
1731
- #endif
1732
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1733
- }
1734
- } else {
1735
- #ifdef __SSE2__
1736
- for (; i < cnt; i++) {
1737
- #else
1738
- for (; i < n; i++) {
1739
- check_intdivzero(((dtype*)p2)[i]);
1740
- #endif
1741
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1742
- }
1743
- }
1744
- #ifdef __SSE2__
1745
- // Get the count of SIMD computation loops.
1746
- cnt_simd_loop = (n - i) % num_pack;
1747
-
1748
- // SIMD computation.
1749
- if (p1 == p3) { // inplace case
1750
- for (; i < n - cnt_simd_loop; i += num_pack) {
1751
- a = _mm_load_pd(&((dtype*)p1)[i]);
1752
- b = _mm_load_pd(&((dtype*)p2)[i]);
1753
- a = _mm_add_pd(a, b);
1754
- _mm_store_pd(&((dtype*)p1)[i], a);
1755
- }
1756
- } else {
1757
- for (; i < n - cnt_simd_loop; i += num_pack) {
1758
- a = _mm_load_pd(&((dtype*)p1)[i]);
1759
- b = _mm_load_pd(&((dtype*)p2)[i]);
1760
- a = _mm_add_pd(a, b);
1761
- _mm_stream_pd(&((dtype*)p3)[i], a);
1762
- }
1763
- }
1764
- }
1765
-
1766
- // Compute the remainder of the SIMD operation.
1767
- if (cnt_simd_loop != 0) {
1768
- if (p1 == p3) { // inplace case
1769
- for (; i < n; i++) {
1770
- check_intdivzero(((dtype*)p2)[i]);
1771
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1772
- }
1773
- } else {
1774
- for (; i < n; i++) {
1775
- check_intdivzero(((dtype*)p2)[i]);
1776
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
1777
- }
1778
- }
1779
- }
1780
- #endif
1781
- return;
1782
- }
1783
-
1784
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
1785
- is_aligned_step(s3, sizeof(dtype))) {
1786
- //
1787
-
1788
- if (s2 == 0) { // Broadcasting from scalar value.
1789
- check_intdivzero(*(dtype*)p2);
1790
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
1791
- #ifdef __SSE2__
1792
- // Broadcast a scalar value and use it for SIMD computation.
1793
- b = _mm_load1_pd(&((dtype*)p2)[0]);
1794
-
1795
- // Check number of elements. & Check same alignment.
1796
- if ((n >= num_pack) &&
1797
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
1798
- // Calculate up to the position just before the start of SIMD computation.
1799
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1800
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1801
- );
1802
- #endif
1803
- if (p1 == p3) { // inplace case
1804
- #ifdef __SSE2__
1805
- for (; i < cnt; i++) {
1806
- #else
1807
- for (; i < n; i++) {
1808
- #endif
1809
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1810
- }
1811
- } else {
1812
- #ifdef __SSE2__
1813
- for (; i < cnt; i++) {
1814
- #else
1815
- for (; i < n; i++) {
1816
- #endif
1817
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1818
- }
1819
- }
1820
- #ifdef __SSE2__
1821
- // Get the count of SIMD computation loops.
1822
- cnt_simd_loop = (n - i) % num_pack;
1823
-
1824
- // SIMD computation.
1825
- if (p1 == p3) { // inplace case
1826
- for (; i < n - cnt_simd_loop; i += num_pack) {
1827
- a = _mm_load_pd(&((dtype*)p1)[i]);
1828
- a = _mm_add_pd(a, b);
1829
- _mm_store_pd(&((dtype*)p1)[i], a);
1830
- }
1831
- } else {
1832
- for (; i < n - cnt_simd_loop; i += num_pack) {
1833
- a = _mm_load_pd(&((dtype*)p1)[i]);
1834
- a = _mm_add_pd(a, b);
1835
- _mm_stream_pd(&((dtype*)p3)[i], a);
1836
- }
1837
- }
1838
- }
1839
-
1840
- // Compute the remainder of the SIMD operation.
1841
- if (cnt_simd_loop != 0) {
1842
- if (p1 == p3) { // inplace case
1843
- for (; i < n; i++) {
1844
- ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1845
- }
1846
- } else {
1847
- for (; i < n; i++) {
1848
- ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
1849
- }
1850
- }
1851
- }
1852
- #endif
1853
- } else {
1854
- for (i = 0; i < n; i++) {
1855
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1856
- p1 += s1;
1857
- p3 += s3;
1858
- }
1859
- }
1860
- } else {
1861
- if (p1 == p3) { // inplace case
1862
- for (i = 0; i < n; i++) {
1863
- check_intdivzero(*(dtype*)p2);
1864
- *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
1865
- p1 += s1;
1866
- p2 += s2;
1867
- }
1868
- } else {
1869
- for (i = 0; i < n; i++) {
1870
- check_intdivzero(*(dtype*)p2);
1871
- *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
1872
- p1 += s1;
1873
- p2 += s2;
1874
- p3 += s3;
1875
- }
1876
- }
1877
- }
1878
-
1879
- return;
1880
- //
1881
- }
1882
- }
1883
- for (i = 0; i < n; i++) {
1884
- dtype x, y, z;
1634
+ for (; i--;) {
1885
1635
  GET_DATA_STRIDE(p1, s1, dtype, x);
1886
1636
  GET_DATA_STRIDE(p2, s2, dtype, y);
1887
- check_intdivzero(y);
1888
- z = m_add(x, y);
1889
- SET_DATA_STRIDE(p3, s3, dtype, z);
1637
+ x = m_pow(x, y);
1638
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1639
+ }
1640
+ }
1641
+
1642
+ static void iter_dfloat_pow_int32(na_loop_t* const lp) {
1643
+ size_t i;
1644
+ char *p1, *p2, *p3;
1645
+ ssize_t s1, s2, s3;
1646
+ dtype x;
1647
+ int32_t y;
1648
+ INIT_COUNTER(lp, i);
1649
+ INIT_PTR(lp, 0, p1, s1);
1650
+ INIT_PTR(lp, 1, p2, s2);
1651
+ INIT_PTR(lp, 2, p3, s3);
1652
+ for (; i--;) {
1653
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1654
+ GET_DATA_STRIDE(p2, s2, int32_t, y);
1655
+ x = m_pow_int(x, y);
1656
+ SET_DATA_STRIDE(p3, s3, dtype, x);
1890
1657
  }
1891
- //
1892
1658
  }
1893
- #undef check_intdivzero
1894
1659
 
1895
- static VALUE dfloat_add_self(VALUE self, VALUE other) {
1660
+ static VALUE dfloat_pow_self(VALUE self, VALUE other) {
1896
1661
  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1662
+ ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
1897
1663
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1898
- ndfunc_t ndf = { iter_dfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
1664
+ ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
1665
+ ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
1899
1666
 
1900
- return na_ndloop(&ndf, 2, self, other);
1667
+ // fixme : use na.integer?
1668
+ if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
1669
+ return na_ndloop(&ndf_i, 2, self, other);
1670
+ } else {
1671
+ return na_ndloop(&ndf, 2, self, other);
1672
+ }
1901
1673
  }
1902
1674
 
1903
- static VALUE dfloat_add(VALUE self, VALUE other) {
1675
+ static VALUE dfloat_pow(VALUE self, VALUE other) {
1904
1676
 
1905
1677
  VALUE klass, v;
1906
-
1907
1678
  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
1908
1679
  if (klass == cT) {
1909
- return dfloat_add_self(self, other);
1680
+ return dfloat_pow_self(self, other);
1910
1681
  } else {
1911
1682
  v = rb_funcall(klass, id_cast, 1, self);
1912
- return rb_funcall(v, '+', 1, other);
1683
+ return rb_funcall(v, id_pow, 1, other);
1913
1684
  }
1914
1685
  }
1915
1686
 
1916
- #define check_intdivzero(y) \
1917
- {}
1687
+ static void iter_dfloat_minus(na_loop_t* const lp) {
1688
+ size_t i, n;
1689
+ char *p1, *p2;
1690
+ ssize_t s1, s2;
1691
+ size_t *idx1, *idx2;
1692
+ dtype x;
1918
1693
 
1919
- static void iter_dfloat_sub(na_loop_t* const lp) {
1920
- size_t i = 0;
1921
- size_t n;
1922
- char *p1, *p2, *p3;
1923
- ssize_t s1, s2, s3;
1924
-
1925
- #ifdef __SSE2__
1926
- size_t cnt;
1927
- size_t cnt_simd_loop = -1;
1928
-
1929
- __m128d a;
1930
- __m128d b;
1931
-
1932
- size_t num_pack; // Number of elements packed for SIMD.
1933
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
1934
- #endif
1935
1694
  INIT_COUNTER(lp, n);
1936
- INIT_PTR(lp, 0, p1, s1);
1937
- INIT_PTR(lp, 1, p2, s2);
1938
- INIT_PTR(lp, 2, p3, s3);
1939
-
1940
- //
1941
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1942
- is_aligned(p3, sizeof(dtype))) {
1943
-
1944
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1945
- #ifdef __SSE2__
1946
- // Check number of elements. & Check same alignment.
1947
- if ((n >= num_pack) &&
1948
- is_same_aligned3(
1949
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
1950
- )) {
1951
- // Calculate up to the position just before the start of SIMD computation.
1952
- cnt = get_count_of_elements_not_aligned_to_simd_size(
1953
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
1954
- );
1955
- #endif
1956
- if (p1 == p3) { // inplace case
1957
- #ifdef __SSE2__
1958
- for (; i < cnt; i++) {
1959
- #else
1960
- for (; i < n; i++) {
1961
- check_intdivzero(((dtype*)p2)[i]);
1962
- #endif
1963
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
1964
- }
1965
- } else {
1966
- #ifdef __SSE2__
1967
- for (; i < cnt; i++) {
1968
- #else
1969
- for (; i < n; i++) {
1970
- check_intdivzero(((dtype*)p2)[i]);
1971
- #endif
1972
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
1973
- }
1974
- }
1975
-
1976
- #ifdef __SSE2__
1977
- // Get the count of SIMD computation loops.
1978
- cnt_simd_loop = (n - i) % num_pack;
1695
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1696
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
1979
1697
 
1980
- // SIMD computation.
1981
- if (p1 == p3) { // inplace case
1982
- for (; i < n - cnt_simd_loop; i += num_pack) {
1983
- a = _mm_load_pd(&((dtype*)p1)[i]);
1984
- b = _mm_load_pd(&((dtype*)p2)[i]);
1985
- a = _mm_sub_pd(a, b);
1986
- _mm_store_pd(&((dtype*)p1)[i], a);
1987
- }
1988
- } else {
1989
- for (; i < n - cnt_simd_loop; i += num_pack) {
1990
- a = _mm_load_pd(&((dtype*)p1)[i]);
1991
- b = _mm_load_pd(&((dtype*)p2)[i]);
1992
- a = _mm_sub_pd(a, b);
1993
- _mm_stream_pd(&((dtype*)p3)[i], a);
1994
- }
1995
- }
1698
+ if (idx1) {
1699
+ if (idx2) {
1700
+ for (i = 0; i < n; i++) {
1701
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1702
+ x = m_minus(x);
1703
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1996
1704
  }
1997
-
1998
- // Compute the remainder of the SIMD operation.
1999
- if (cnt_simd_loop != 0) {
2000
- if (p1 == p3) { // inplace case
2001
- for (; i < n; i++) {
2002
- check_intdivzero(((dtype*)p2)[i]);
2003
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2004
- }
2005
- } else {
2006
- for (; i < n; i++) {
2007
- check_intdivzero(((dtype*)p2)[i]);
2008
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
2009
- }
2010
- }
1705
+ } else {
1706
+ for (i = 0; i < n; i++) {
1707
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1708
+ x = m_minus(x);
1709
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2011
1710
  }
2012
- #endif
2013
- return;
2014
1711
  }
2015
-
2016
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2017
- is_aligned_step(s3, sizeof(dtype))) {
1712
+ } else {
1713
+ if (idx2) {
1714
+ for (i = 0; i < n; i++) {
1715
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1716
+ x = m_minus(x);
1717
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1718
+ }
1719
+ } else {
2018
1720
  //
2019
-
2020
- if (s2 == 0) { // Broadcasting from scalar value.
2021
- check_intdivzero(*(dtype*)p2);
2022
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2023
- #ifdef __SSE2__
2024
- // Broadcast a scalar value and use it for SIMD computation.
2025
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2026
-
2027
- // Check number of elements. & Check same alignment.
2028
- if ((n >= num_pack) &&
2029
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2030
- // Calculate up to the position just before the start of SIMD computation.
2031
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2032
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2033
- );
2034
- #endif
2035
- if (p1 == p3) { // inplace case
2036
- #ifdef __SSE2__
2037
- for (; i < cnt; i++) {
2038
- #else
2039
- for (; i < n; i++) {
2040
- #endif
2041
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2042
- }
2043
- } else {
2044
- #ifdef __SSE2__
2045
- for (; i < cnt; i++) {
2046
- #else
2047
- for (; i < n; i++) {
2048
- #endif
2049
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2050
- }
2051
- }
2052
-
2053
- #ifdef __SSE2__
2054
- // Get the count of SIMD computation loops.
2055
- cnt_simd_loop = (n - i) % num_pack;
2056
-
2057
- // SIMD computation.
2058
- if (p1 == p3) { // inplace case
2059
- for (; i < n - cnt_simd_loop; i += num_pack) {
2060
- a = _mm_load_pd(&((dtype*)p1)[i]);
2061
- a = _mm_sub_pd(a, b);
2062
- _mm_store_pd(&((dtype*)p1)[i], a);
2063
- }
2064
- } else {
2065
- for (; i < n - cnt_simd_loop; i += num_pack) {
2066
- a = _mm_load_pd(&((dtype*)p1)[i]);
2067
- a = _mm_sub_pd(a, b);
2068
- _mm_stream_pd(&((dtype*)p3)[i], a);
2069
- }
2070
- }
2071
- }
2072
-
2073
- // Compute the remainder of the SIMD operation.
2074
- if (cnt_simd_loop != 0) {
2075
- if (p1 == p3) { // inplace case
2076
- for (; i < n; i++) {
2077
- ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2078
- }
2079
- } else {
2080
- for (; i < n; i++) {
2081
- ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
2082
- }
2083
- }
2084
- }
2085
- #endif
2086
- } else {
1721
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1722
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2087
1723
  for (i = 0; i < n; i++) {
2088
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
2089
- p1 += s1;
2090
- p3 += s3;
1724
+ ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2091
1725
  }
1726
+ return;
2092
1727
  }
2093
- } else {
2094
- if (p1 == p3) { // inplace case
2095
- for (i = 0; i < n; i++) {
2096
- check_intdivzero(*(dtype*)p2);
2097
- *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
2098
- p1 += s1;
2099
- p2 += s2;
2100
- }
2101
- } else {
1728
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1729
+ //
2102
1730
  for (i = 0; i < n; i++) {
2103
- check_intdivzero(*(dtype*)p2);
2104
- *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
1731
+ *(dtype*)p2 = m_minus(*(dtype*)p1);
2105
1732
  p1 += s1;
2106
1733
  p2 += s2;
2107
- p3 += s3;
2108
1734
  }
1735
+ return;
1736
+ //
2109
1737
  }
2110
1738
  }
2111
-
2112
- return;
1739
+ for (i = 0; i < n; i++) {
1740
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1741
+ x = m_minus(x);
1742
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1743
+ }
2113
1744
  //
2114
1745
  }
2115
1746
  }
2116
- for (i = 0; i < n; i++) {
2117
- dtype x, y, z;
2118
- GET_DATA_STRIDE(p1, s1, dtype, x);
2119
- GET_DATA_STRIDE(p2, s2, dtype, y);
2120
- check_intdivzero(y);
2121
- z = m_sub(x, y);
2122
- SET_DATA_STRIDE(p3, s3, dtype, z);
2123
- }
2124
- //
2125
1747
  }
2126
- #undef check_intdivzero
2127
1748
 
2128
- static VALUE dfloat_sub_self(VALUE self, VALUE other) {
2129
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1749
+ static VALUE dfloat_minus(VALUE self) {
1750
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2130
1751
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2131
- ndfunc_t ndf = { iter_dfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
2132
-
2133
- return na_ndloop(&ndf, 2, self, other);
2134
- }
2135
-
2136
- static VALUE dfloat_sub(VALUE self, VALUE other) {
2137
-
2138
- VALUE klass, v;
1752
+ ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2139
1753
 
2140
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2141
- if (klass == cT) {
2142
- return dfloat_sub_self(self, other);
2143
- } else {
2144
- v = rb_funcall(klass, id_cast, 1, self);
2145
- return rb_funcall(v, '-', 1, other);
2146
- }
1754
+ return na_ndloop(&ndf, 1, self);
2147
1755
  }
2148
1756
 
2149
- #define check_intdivzero(y) \
2150
- {}
2151
-
2152
- static void iter_dfloat_mul(na_loop_t* const lp) {
2153
- size_t i = 0;
2154
- size_t n;
2155
- char *p1, *p2, *p3;
2156
- ssize_t s1, s2, s3;
2157
-
2158
- #ifdef __SSE2__
2159
- size_t cnt;
2160
- size_t cnt_simd_loop = -1;
2161
-
2162
- __m128d a;
2163
- __m128d b;
1757
+ static void iter_dfloat_reciprocal(na_loop_t* const lp) {
1758
+ size_t i, n;
1759
+ char *p1, *p2;
1760
+ ssize_t s1, s2;
1761
+ size_t *idx1, *idx2;
1762
+ dtype x;
2164
1763
 
2165
- size_t num_pack; // Number of elements packed for SIMD.
2166
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2167
- #endif
2168
1764
  INIT_COUNTER(lp, n);
2169
- INIT_PTR(lp, 0, p1, s1);
2170
- INIT_PTR(lp, 1, p2, s2);
2171
- INIT_PTR(lp, 2, p3, s3);
2172
-
2173
- //
2174
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2175
- is_aligned(p3, sizeof(dtype))) {
1765
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1766
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2176
1767
 
2177
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2178
- #ifdef __SSE2__
2179
- // Check number of elements. & Check same alignment.
2180
- if ((n >= num_pack) &&
2181
- is_same_aligned3(
2182
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2183
- )) {
2184
- // Calculate up to the position just before the start of SIMD computation.
2185
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2186
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2187
- );
2188
- #endif
2189
- if (p1 == p3) { // inplace case
2190
- #ifdef __SSE2__
2191
- for (; i < cnt; i++) {
2192
- #else
2193
- for (; i < n; i++) {
2194
- check_intdivzero(((dtype*)p2)[i]);
2195
- #endif
2196
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2197
- }
2198
- } else {
2199
- #ifdef __SSE2__
2200
- for (; i < cnt; i++) {
2201
- #else
2202
- for (; i < n; i++) {
2203
- check_intdivzero(((dtype*)p2)[i]);
2204
- #endif
2205
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
1768
+ if (idx1) {
1769
+ if (idx2) {
1770
+ for (i = 0; i < n; i++) {
1771
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1772
+ x = m_reciprocal(x);
1773
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1774
+ }
1775
+ } else {
1776
+ for (i = 0; i < n; i++) {
1777
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1778
+ x = m_reciprocal(x);
1779
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1780
+ }
1781
+ }
1782
+ } else {
1783
+ if (idx2) {
1784
+ for (i = 0; i < n; i++) {
1785
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1786
+ x = m_reciprocal(x);
1787
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1788
+ }
1789
+ } else {
1790
+ //
1791
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1792
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
1793
+ for (i = 0; i < n; i++) {
1794
+ ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2206
1795
  }
1796
+ return;
2207
1797
  }
2208
-
2209
- #ifdef __SSE2__
2210
- // Get the count of SIMD computation loops.
2211
- cnt_simd_loop = (n - i) % num_pack;
2212
-
2213
- // SIMD computation.
2214
- if (p1 == p3) { // inplace case
2215
- for (; i < n - cnt_simd_loop; i += num_pack) {
2216
- a = _mm_load_pd(&((dtype*)p1)[i]);
2217
- b = _mm_load_pd(&((dtype*)p2)[i]);
2218
- a = _mm_mul_pd(a, b);
2219
- _mm_store_pd(&((dtype*)p1)[i], a);
2220
- }
2221
- } else {
2222
- for (; i < n - cnt_simd_loop; i += num_pack) {
2223
- a = _mm_load_pd(&((dtype*)p1)[i]);
2224
- b = _mm_load_pd(&((dtype*)p2)[i]);
2225
- a = _mm_mul_pd(a, b);
2226
- _mm_stream_pd(&((dtype*)p3)[i], a);
1798
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1799
+ //
1800
+ for (i = 0; i < n; i++) {
1801
+ *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
1802
+ p1 += s1;
1803
+ p2 += s2;
2227
1804
  }
1805
+ return;
1806
+ //
2228
1807
  }
2229
1808
  }
2230
-
2231
- // Compute the remainder of the SIMD operation.
2232
- if (cnt_simd_loop != 0) {
2233
- if (p1 == p3) { // inplace case
2234
- for (; i < n; i++) {
2235
- check_intdivzero(((dtype*)p2)[i]);
2236
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2237
- }
2238
- } else {
2239
- for (; i < n; i++) {
2240
- check_intdivzero(((dtype*)p2)[i]);
2241
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
2242
- }
2243
- }
1809
+ for (i = 0; i < n; i++) {
1810
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1811
+ x = m_reciprocal(x);
1812
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2244
1813
  }
2245
- #endif
2246
- return;
2247
- }
2248
-
2249
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2250
- is_aligned_step(s3, sizeof(dtype))) {
2251
1814
  //
1815
+ }
1816
+ }
1817
+ }
2252
1818
 
2253
- if (s2 == 0) { // Broadcasting from scalar value.
2254
- check_intdivzero(*(dtype*)p2);
2255
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2256
- #ifdef __SSE2__
2257
- // Broadcast a scalar value and use it for SIMD computation.
2258
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2259
-
2260
- // Check number of elements. & Check same alignment.
2261
- if ((n >= num_pack) &&
2262
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2263
- // Calculate up to the position just before the start of SIMD computation.
2264
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2265
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2266
- );
2267
- #endif
2268
- if (p1 == p3) { // inplace case
2269
- #ifdef __SSE2__
2270
- for (; i < cnt; i++) {
2271
- #else
2272
- for (; i < n; i++) {
2273
- #endif
2274
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2275
- }
2276
- } else {
2277
- #ifdef __SSE2__
2278
- for (; i < cnt; i++) {
2279
- #else
2280
- for (; i < n; i++) {
2281
- #endif
2282
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2283
- }
2284
- }
1819
+ static VALUE dfloat_reciprocal(VALUE self) {
1820
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
1821
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
1822
+ ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
2285
1823
 
2286
- #ifdef __SSE2__
2287
- // Get the count of SIMD computation loops.
2288
- cnt_simd_loop = (n - i) % num_pack;
2289
-
2290
- // SIMD computation.
2291
- if (p1 == p3) { // inplace case
2292
- for (; i < n - cnt_simd_loop; i += num_pack) {
2293
- a = _mm_load_pd(&((dtype*)p1)[i]);
2294
- a = _mm_mul_pd(a, b);
2295
- _mm_store_pd(&((dtype*)p1)[i], a);
2296
- }
2297
- } else {
2298
- for (; i < n - cnt_simd_loop; i += num_pack) {
2299
- a = _mm_load_pd(&((dtype*)p1)[i]);
2300
- a = _mm_mul_pd(a, b);
2301
- _mm_stream_pd(&((dtype*)p3)[i], a);
2302
- }
2303
- }
2304
- }
1824
+ return na_ndloop(&ndf, 1, self);
1825
+ }
2305
1826
 
2306
- // Compute the remainder of the SIMD operation.
2307
- if (cnt_simd_loop != 0) {
2308
- if (p1 == p3) { // inplace case
2309
- for (; i < n; i++) {
2310
- ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2311
- }
2312
- } else {
2313
- for (; i < n; i++) {
2314
- ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
2315
- }
2316
- }
2317
- }
2318
- #endif
2319
- } else {
1827
+ static void iter_dfloat_sign(na_loop_t* const lp) {
1828
+ size_t i, n;
1829
+ char *p1, *p2;
1830
+ ssize_t s1, s2;
1831
+ size_t *idx1, *idx2;
1832
+ dtype x;
1833
+
1834
+ INIT_COUNTER(lp, n);
1835
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1836
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
1837
+
1838
+ if (idx1) {
1839
+ if (idx2) {
1840
+ for (i = 0; i < n; i++) {
1841
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1842
+ x = m_sign(x);
1843
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1844
+ }
1845
+ } else {
1846
+ for (i = 0; i < n; i++) {
1847
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1848
+ x = m_sign(x);
1849
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1850
+ }
1851
+ }
1852
+ } else {
1853
+ if (idx2) {
1854
+ for (i = 0; i < n; i++) {
1855
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1856
+ x = m_sign(x);
1857
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1858
+ }
1859
+ } else {
1860
+ //
1861
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1862
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2320
1863
  for (i = 0; i < n; i++) {
2321
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
2322
- p1 += s1;
2323
- p3 += s3;
1864
+ ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
2324
1865
  }
1866
+ return;
2325
1867
  }
2326
- } else {
2327
- if (p1 == p3) { // inplace case
2328
- for (i = 0; i < n; i++) {
2329
- check_intdivzero(*(dtype*)p2);
2330
- *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
2331
- p1 += s1;
2332
- p2 += s2;
2333
- }
2334
- } else {
1868
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1869
+ //
2335
1870
  for (i = 0; i < n; i++) {
2336
- check_intdivzero(*(dtype*)p2);
2337
- *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
1871
+ *(dtype*)p2 = m_sign(*(dtype*)p1);
2338
1872
  p1 += s1;
2339
1873
  p2 += s2;
2340
- p3 += s3;
2341
1874
  }
1875
+ return;
1876
+ //
2342
1877
  }
2343
1878
  }
2344
-
2345
- return;
1879
+ for (i = 0; i < n; i++) {
1880
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1881
+ x = m_sign(x);
1882
+ SET_DATA_STRIDE(p2, s2, dtype, x);
1883
+ }
2346
1884
  //
2347
1885
  }
2348
1886
  }
2349
- for (i = 0; i < n; i++) {
2350
- dtype x, y, z;
2351
- GET_DATA_STRIDE(p1, s1, dtype, x);
2352
- GET_DATA_STRIDE(p2, s2, dtype, y);
2353
- check_intdivzero(y);
2354
- z = m_mul(x, y);
2355
- SET_DATA_STRIDE(p3, s3, dtype, z);
2356
- }
2357
- //
2358
1887
  }
2359
- #undef check_intdivzero
2360
1888
 
2361
- static VALUE dfloat_mul_self(VALUE self, VALUE other) {
2362
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
1889
+ static VALUE dfloat_sign(VALUE self) {
1890
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2363
1891
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2364
- ndfunc_t ndf = { iter_dfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
2365
-
2366
- return na_ndloop(&ndf, 2, self, other);
2367
- }
2368
-
2369
- static VALUE dfloat_mul(VALUE self, VALUE other) {
2370
-
2371
- VALUE klass, v;
1892
+ ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
2372
1893
 
2373
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2374
- if (klass == cT) {
2375
- return dfloat_mul_self(self, other);
2376
- } else {
2377
- v = rb_funcall(klass, id_cast, 1, self);
2378
- return rb_funcall(v, '*', 1, other);
2379
- }
1894
+ return na_ndloop(&ndf, 1, self);
2380
1895
  }
2381
1896
 
2382
- #define check_intdivzero(y) \
2383
- {}
2384
-
2385
- static void iter_dfloat_div(na_loop_t* const lp) {
2386
- size_t i = 0;
2387
- size_t n;
2388
- char *p1, *p2, *p3;
2389
- ssize_t s1, s2, s3;
2390
-
2391
- #ifdef __SSE2__
2392
- size_t cnt;
2393
- size_t cnt_simd_loop = -1;
2394
-
2395
- __m128d a;
2396
- __m128d b;
1897
+ static void iter_dfloat_square(na_loop_t* const lp) {
1898
+ size_t i, n;
1899
+ char *p1, *p2;
1900
+ ssize_t s1, s2;
1901
+ size_t *idx1, *idx2;
1902
+ dtype x;
2397
1903
 
2398
- size_t num_pack; // Number of elements packed for SIMD.
2399
- num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
2400
- #endif
2401
1904
  INIT_COUNTER(lp, n);
2402
- INIT_PTR(lp, 0, p1, s1);
2403
- INIT_PTR(lp, 1, p2, s2);
2404
- INIT_PTR(lp, 2, p3, s3);
2405
-
2406
- //
2407
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2408
- is_aligned(p3, sizeof(dtype))) {
2409
-
2410
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2411
- #ifdef __SSE2__
2412
- // Check number of elements. & Check same alignment.
2413
- if ((n >= num_pack) &&
2414
- is_same_aligned3(
2415
- &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
2416
- )) {
2417
- // Calculate up to the position just before the start of SIMD computation.
2418
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2419
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2420
- );
2421
- #endif
2422
- if (p1 == p3) { // inplace case
2423
- #ifdef __SSE2__
2424
- for (; i < cnt; i++) {
2425
- #else
2426
- for (; i < n; i++) {
2427
- #endif
2428
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2429
- }
2430
- } else {
2431
- #ifdef __SSE2__
2432
- for (; i < cnt; i++) {
2433
- #else
2434
- for (; i < n; i++) {
2435
- #endif
2436
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2437
- }
2438
- }
2439
-
2440
- #ifdef __SSE2__
2441
- // Get the count of SIMD computation loops.
2442
- cnt_simd_loop = (n - i) % num_pack;
1905
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
1906
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2443
1907
 
2444
- // SIMD computation.
2445
- if (p1 == p3) { // inplace case
2446
- for (; i < n - cnt_simd_loop; i += num_pack) {
2447
- a = _mm_load_pd(&((dtype*)p1)[i]);
2448
- b = _mm_load_pd(&((dtype*)p2)[i]);
2449
- a = _mm_div_pd(a, b);
2450
- _mm_store_pd(&((dtype*)p1)[i], a);
2451
- }
2452
- } else {
2453
- for (; i < n - cnt_simd_loop; i += num_pack) {
2454
- a = _mm_load_pd(&((dtype*)p1)[i]);
2455
- b = _mm_load_pd(&((dtype*)p2)[i]);
2456
- a = _mm_div_pd(a, b);
2457
- _mm_stream_pd(&((dtype*)p3)[i], a);
2458
- }
2459
- }
1908
+ if (idx1) {
1909
+ if (idx2) {
1910
+ for (i = 0; i < n; i++) {
1911
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1912
+ x = m_square(x);
1913
+ SET_DATA_INDEX(p2, idx2, dtype, x);
2460
1914
  }
2461
-
2462
- // Compute the remainder of the SIMD operation.
2463
- if (cnt_simd_loop != 0) {
2464
- if (p1 == p3) { // inplace case
2465
- for (; i < n; i++) {
2466
- check_intdivzero(((dtype*)p2)[i]);
2467
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2468
- }
2469
- } else {
2470
- for (; i < n; i++) {
2471
- check_intdivzero(((dtype*)p2)[i]);
2472
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
2473
- }
2474
- }
1915
+ } else {
1916
+ for (i = 0; i < n; i++) {
1917
+ GET_DATA_INDEX(p1, idx1, dtype, x);
1918
+ x = m_square(x);
1919
+ SET_DATA_STRIDE(p2, s2, dtype, x);
2475
1920
  }
2476
- #endif
2477
- return;
2478
1921
  }
2479
-
2480
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2481
- is_aligned_step(s3, sizeof(dtype))) {
1922
+ } else {
1923
+ if (idx2) {
1924
+ for (i = 0; i < n; i++) {
1925
+ GET_DATA_STRIDE(p1, s1, dtype, x);
1926
+ x = m_square(x);
1927
+ SET_DATA_INDEX(p2, idx2, dtype, x);
1928
+ }
1929
+ } else {
2482
1930
  //
2483
-
2484
- if (s2 == 0) { // Broadcasting from scalar value.
2485
- check_intdivzero(*(dtype*)p2);
2486
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2487
- #ifdef __SSE2__
2488
- // Broadcast a scalar value and use it for SIMD computation.
2489
- b = _mm_load1_pd(&((dtype*)p2)[0]);
2490
-
2491
- // Check number of elements. & Check same alignment.
2492
- if ((n >= num_pack) &&
2493
- is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
2494
- // Calculate up to the position just before the start of SIMD computation.
2495
- cnt = get_count_of_elements_not_aligned_to_simd_size(
2496
- &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
2497
- );
2498
- #endif
2499
- if (p1 == p3) { // inplace case
2500
- #ifdef __SSE2__
2501
- for (; i < cnt; i++) {
2502
- #else
2503
- for (; i < n; i++) {
2504
- check_intdivzero(((dtype*)p2)[i]);
2505
- #endif
2506
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2507
- }
2508
- } else {
2509
- #ifdef __SSE2__
2510
- for (; i < cnt; i++) {
2511
- #else
2512
- for (; i < n; i++) {
2513
- check_intdivzero(((dtype*)p2)[i]);
2514
- #endif
2515
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2516
- }
2517
- }
2518
-
2519
- #ifdef __SSE2__
2520
- // Get the count of SIMD computation loops.
2521
- cnt_simd_loop = (n - i) % num_pack;
2522
-
2523
- // SIMD computation.
2524
- if (p1 == p3) { // inplace case
2525
- for (; i < n - cnt_simd_loop; i += num_pack) {
2526
- a = _mm_load_pd(&((dtype*)p1)[i]);
2527
- a = _mm_div_pd(a, b);
2528
- _mm_store_pd(&((dtype*)p1)[i], a);
2529
- }
2530
- } else {
2531
- for (; i < n - cnt_simd_loop; i += num_pack) {
2532
- a = _mm_load_pd(&((dtype*)p1)[i]);
2533
- a = _mm_div_pd(a, b);
2534
- _mm_stream_pd(&((dtype*)p3)[i], a);
2535
- }
2536
- }
2537
- }
2538
-
2539
- // Compute the remainder of the SIMD operation.
2540
- if (cnt_simd_loop != 0) {
2541
- if (p1 == p3) { // inplace case
2542
- for (; i < n; i++) {
2543
- ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2544
- }
2545
- } else {
2546
- for (; i < n; i++) {
2547
- ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
2548
- }
2549
- }
2550
- }
2551
- #endif
2552
- } else {
1931
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
1932
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2553
1933
  for (i = 0; i < n; i++) {
2554
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
2555
- p1 += s1;
2556
- p3 += s3;
1934
+ ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
2557
1935
  }
1936
+ return;
2558
1937
  }
2559
- } else {
2560
- if (p1 == p3) { // inplace case
2561
- for (i = 0; i < n; i++) {
2562
- check_intdivzero(*(dtype*)p2);
2563
- *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
2564
- p1 += s1;
2565
- p2 += s2;
2566
- }
2567
- } else {
1938
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
1939
+ //
2568
1940
  for (i = 0; i < n; i++) {
2569
- check_intdivzero(*(dtype*)p2);
2570
- *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
1941
+ *(dtype*)p2 = m_square(*(dtype*)p1);
2571
1942
  p1 += s1;
2572
1943
  p2 += s2;
2573
- p3 += s3;
2574
1944
  }
2575
- }
2576
- }
2577
-
2578
- return;
2579
- //
2580
- }
2581
- }
2582
- for (i = 0; i < n; i++) {
2583
- dtype x, y, z;
2584
- GET_DATA_STRIDE(p1, s1, dtype, x);
2585
- GET_DATA_STRIDE(p2, s2, dtype, y);
2586
- check_intdivzero(y);
2587
- z = m_div(x, y);
2588
- SET_DATA_STRIDE(p3, s3, dtype, z);
2589
- }
2590
- //
2591
- }
2592
- #undef check_intdivzero
2593
-
2594
- static VALUE dfloat_div_self(VALUE self, VALUE other) {
2595
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2596
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2597
- ndfunc_t ndf = { iter_dfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
2598
-
2599
- return na_ndloop(&ndf, 2, self, other);
2600
- }
2601
-
2602
- static VALUE dfloat_div(VALUE self, VALUE other) {
2603
-
2604
- VALUE klass, v;
2605
-
2606
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2607
- if (klass == cT) {
2608
- return dfloat_div_self(self, other);
2609
- } else {
2610
- v = rb_funcall(klass, id_cast, 1, self);
2611
- return rb_funcall(v, '/', 1, other);
2612
- }
2613
- }
2614
-
2615
- #define check_intdivzero(y) \
2616
- {}
2617
-
2618
- static void iter_dfloat_mod(na_loop_t* const lp) {
2619
- size_t i = 0;
2620
- size_t n;
2621
- char *p1, *p2, *p3;
2622
- ssize_t s1, s2, s3;
2623
-
2624
- INIT_COUNTER(lp, n);
2625
- INIT_PTR(lp, 0, p1, s1);
2626
- INIT_PTR(lp, 1, p2, s2);
2627
- INIT_PTR(lp, 2, p3, s3);
2628
-
2629
- //
2630
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
2631
- is_aligned(p3, sizeof(dtype))) {
2632
-
2633
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
2634
- if (p1 == p3) { // inplace case
2635
- for (; i < n; i++) {
2636
- check_intdivzero(((dtype*)p2)[i]);
2637
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2638
- }
2639
- } else {
2640
- for (; i < n; i++) {
2641
- check_intdivzero(((dtype*)p2)[i]);
2642
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
2643
- }
2644
- }
2645
- return;
2646
- }
2647
-
2648
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2649
- is_aligned_step(s3, sizeof(dtype))) {
2650
- //
2651
-
2652
- if (s2 == 0) { // Broadcasting from scalar value.
2653
- check_intdivzero(*(dtype*)p2);
2654
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2655
- if (p1 == p3) { // inplace case
2656
- for (; i < n; i++) {
2657
- ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2658
- }
2659
- } else {
2660
- for (; i < n; i++) {
2661
- ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
2662
- }
2663
- }
2664
- } else {
2665
- for (i = 0; i < n; i++) {
2666
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2667
- p1 += s1;
2668
- p3 += s3;
2669
- }
2670
- }
2671
- } else {
2672
- if (p1 == p3) { // inplace case
2673
- for (i = 0; i < n; i++) {
2674
- check_intdivzero(*(dtype*)p2);
2675
- *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
2676
- p1 += s1;
2677
- p2 += s2;
2678
- }
2679
- } else {
2680
- for (i = 0; i < n; i++) {
2681
- check_intdivzero(*(dtype*)p2);
2682
- *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
2683
- p1 += s1;
2684
- p2 += s2;
2685
- p3 += s3;
2686
- }
2687
- }
2688
- }
2689
-
2690
- return;
2691
- //
2692
- }
2693
- }
2694
- for (i = 0; i < n; i++) {
2695
- dtype x, y, z;
2696
- GET_DATA_STRIDE(p1, s1, dtype, x);
2697
- GET_DATA_STRIDE(p2, s2, dtype, y);
2698
- check_intdivzero(y);
2699
- z = m_mod(x, y);
2700
- SET_DATA_STRIDE(p3, s3, dtype, z);
2701
- }
2702
- //
2703
- }
2704
- #undef check_intdivzero
2705
-
2706
- static VALUE dfloat_mod_self(VALUE self, VALUE other) {
2707
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2708
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2709
- ndfunc_t ndf = { iter_dfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
2710
-
2711
- return na_ndloop(&ndf, 2, self, other);
2712
- }
2713
-
2714
- static VALUE dfloat_mod(VALUE self, VALUE other) {
2715
-
2716
- VALUE klass, v;
2717
-
2718
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2719
- if (klass == cT) {
2720
- return dfloat_mod_self(self, other);
2721
- } else {
2722
- v = rb_funcall(klass, id_cast, 1, self);
2723
- return rb_funcall(v, '%', 1, other);
2724
- }
2725
- }
2726
-
2727
- static void iter_dfloat_divmod(na_loop_t* const lp) {
2728
- size_t i, n;
2729
- char *p1, *p2, *p3, *p4;
2730
- ssize_t s1, s2, s3, s4;
2731
- dtype x, y, a, b;
2732
- INIT_COUNTER(lp, n);
2733
- INIT_PTR(lp, 0, p1, s1);
2734
- INIT_PTR(lp, 1, p2, s2);
2735
- INIT_PTR(lp, 2, p3, s3);
2736
- INIT_PTR(lp, 3, p4, s4);
2737
- for (i = n; i--;) {
2738
- GET_DATA_STRIDE(p1, s1, dtype, x);
2739
- GET_DATA_STRIDE(p2, s2, dtype, y);
2740
- m_divmod(x, y, a, b);
2741
- SET_DATA_STRIDE(p3, s3, dtype, a);
2742
- SET_DATA_STRIDE(p4, s4, dtype, b);
2743
- }
2744
- }
2745
-
2746
- static VALUE dfloat_divmod_self(VALUE self, VALUE other) {
2747
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2748
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2749
- ndfunc_t ndf = { iter_dfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
2750
-
2751
- return na_ndloop(&ndf, 2, self, other);
2752
- }
2753
-
2754
- static VALUE dfloat_divmod(VALUE self, VALUE other) {
2755
-
2756
- VALUE klass, v;
2757
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2758
- if (klass == cT) {
2759
- return dfloat_divmod_self(self, other);
2760
- } else {
2761
- v = rb_funcall(klass, id_cast, 1, self);
2762
- return rb_funcall(v, id_divmod, 1, other);
2763
- }
2764
- }
2765
-
2766
- static void iter_dfloat_pow(na_loop_t* const lp) {
2767
- size_t i;
2768
- char *p1, *p2, *p3;
2769
- ssize_t s1, s2, s3;
2770
- dtype x, y;
2771
- INIT_COUNTER(lp, i);
2772
- INIT_PTR(lp, 0, p1, s1);
2773
- INIT_PTR(lp, 1, p2, s2);
2774
- INIT_PTR(lp, 2, p3, s3);
2775
- for (; i--;) {
2776
- GET_DATA_STRIDE(p1, s1, dtype, x);
2777
- GET_DATA_STRIDE(p2, s2, dtype, y);
2778
- x = m_pow(x, y);
2779
- SET_DATA_STRIDE(p3, s3, dtype, x);
2780
- }
2781
- }
2782
-
2783
- static void iter_dfloat_pow_int32(na_loop_t* const lp) {
2784
- size_t i;
2785
- char *p1, *p2, *p3;
2786
- ssize_t s1, s2, s3;
2787
- dtype x;
2788
- int32_t y;
2789
- INIT_COUNTER(lp, i);
2790
- INIT_PTR(lp, 0, p1, s1);
2791
- INIT_PTR(lp, 1, p2, s2);
2792
- INIT_PTR(lp, 2, p3, s3);
2793
- for (; i--;) {
2794
- GET_DATA_STRIDE(p1, s1, dtype, x);
2795
- GET_DATA_STRIDE(p2, s2, int32_t, y);
2796
- x = m_pow_int(x, y);
2797
- SET_DATA_STRIDE(p3, s3, dtype, x);
2798
- }
2799
- }
2800
-
2801
- static VALUE dfloat_pow_self(VALUE self, VALUE other) {
2802
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
2803
- ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
2804
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2805
- ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
2806
- ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
2807
-
2808
- // fixme : use na.integer?
2809
- if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
2810
- return na_ndloop(&ndf_i, 2, self, other);
2811
- } else {
2812
- return na_ndloop(&ndf, 2, self, other);
2813
- }
2814
- }
2815
-
2816
- static VALUE dfloat_pow(VALUE self, VALUE other) {
2817
-
2818
- VALUE klass, v;
2819
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
2820
- if (klass == cT) {
2821
- return dfloat_pow_self(self, other);
2822
- } else {
2823
- v = rb_funcall(klass, id_cast, 1, self);
2824
- return rb_funcall(v, id_pow, 1, other);
2825
- }
2826
- }
2827
-
2828
- static void iter_dfloat_minus(na_loop_t* const lp) {
2829
- size_t i, n;
2830
- char *p1, *p2;
2831
- ssize_t s1, s2;
2832
- size_t *idx1, *idx2;
2833
- dtype x;
2834
-
2835
- INIT_COUNTER(lp, n);
2836
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2837
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2838
-
2839
- if (idx1) {
2840
- if (idx2) {
2841
- for (i = 0; i < n; i++) {
2842
- GET_DATA_INDEX(p1, idx1, dtype, x);
2843
- x = m_minus(x);
2844
- SET_DATA_INDEX(p2, idx2, dtype, x);
2845
- }
2846
- } else {
2847
- for (i = 0; i < n; i++) {
2848
- GET_DATA_INDEX(p1, idx1, dtype, x);
2849
- x = m_minus(x);
2850
- SET_DATA_STRIDE(p2, s2, dtype, x);
2851
- }
2852
- }
2853
- } else {
2854
- if (idx2) {
2855
- for (i = 0; i < n; i++) {
2856
- GET_DATA_STRIDE(p1, s1, dtype, x);
2857
- x = m_minus(x);
2858
- SET_DATA_INDEX(p2, idx2, dtype, x);
2859
- }
2860
- } else {
2861
- //
2862
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2863
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2864
- for (i = 0; i < n; i++) {
2865
- ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
2866
- }
2867
- return;
2868
- }
2869
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2870
- //
2871
- for (i = 0; i < n; i++) {
2872
- *(dtype*)p2 = m_minus(*(dtype*)p1);
2873
- p1 += s1;
2874
- p2 += s2;
2875
- }
2876
- return;
2877
- //
1945
+ return;
1946
+ //
2878
1947
  }
2879
1948
  }
2880
1949
  for (i = 0; i < n; i++) {
2881
1950
  GET_DATA_STRIDE(p1, s1, dtype, x);
2882
- x = m_minus(x);
1951
+ x = m_square(x);
2883
1952
  SET_DATA_STRIDE(p2, s2, dtype, x);
2884
1953
  }
2885
- //
2886
- }
2887
- }
2888
- }
2889
-
2890
- static VALUE dfloat_minus(VALUE self) {
2891
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2892
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2893
- ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
2894
-
2895
- return na_ndloop(&ndf, 1, self);
2896
- }
2897
-
2898
- static void iter_dfloat_reciprocal(na_loop_t* const lp) {
2899
- size_t i, n;
2900
- char *p1, *p2;
2901
- ssize_t s1, s2;
2902
- size_t *idx1, *idx2;
2903
- dtype x;
2904
-
2905
- INIT_COUNTER(lp, n);
2906
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2907
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2908
-
2909
- if (idx1) {
2910
- if (idx2) {
2911
- for (i = 0; i < n; i++) {
2912
- GET_DATA_INDEX(p1, idx1, dtype, x);
2913
- x = m_reciprocal(x);
2914
- SET_DATA_INDEX(p2, idx2, dtype, x);
2915
- }
2916
- } else {
2917
- for (i = 0; i < n; i++) {
2918
- GET_DATA_INDEX(p1, idx1, dtype, x);
2919
- x = m_reciprocal(x);
2920
- SET_DATA_STRIDE(p2, s2, dtype, x);
2921
- }
2922
- }
2923
- } else {
2924
- if (idx2) {
2925
- for (i = 0; i < n; i++) {
2926
- GET_DATA_STRIDE(p1, s1, dtype, x);
2927
- x = m_reciprocal(x);
2928
- SET_DATA_INDEX(p2, idx2, dtype, x);
2929
- }
2930
- } else {
2931
- //
2932
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
2933
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
2934
- for (i = 0; i < n; i++) {
2935
- ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
2936
- }
2937
- return;
2938
- }
2939
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
2940
- //
2941
- for (i = 0; i < n; i++) {
2942
- *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
2943
- p1 += s1;
2944
- p2 += s2;
2945
- }
2946
- return;
2947
- //
2948
- }
2949
- }
2950
- for (i = 0; i < n; i++) {
2951
- GET_DATA_STRIDE(p1, s1, dtype, x);
2952
- x = m_reciprocal(x);
2953
- SET_DATA_STRIDE(p2, s2, dtype, x);
2954
- }
2955
- //
2956
- }
2957
- }
2958
- }
2959
-
2960
- static VALUE dfloat_reciprocal(VALUE self) {
2961
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2962
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2963
- ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
2964
-
2965
- return na_ndloop(&ndf, 1, self);
2966
- }
2967
-
2968
- static void iter_dfloat_sign(na_loop_t* const lp) {
2969
- size_t i, n;
2970
- char *p1, *p2;
2971
- ssize_t s1, s2;
2972
- size_t *idx1, *idx2;
2973
- dtype x;
2974
-
2975
- INIT_COUNTER(lp, n);
2976
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
2977
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
2978
-
2979
- if (idx1) {
2980
- if (idx2) {
2981
- for (i = 0; i < n; i++) {
2982
- GET_DATA_INDEX(p1, idx1, dtype, x);
2983
- x = m_sign(x);
2984
- SET_DATA_INDEX(p2, idx2, dtype, x);
2985
- }
2986
- } else {
2987
- for (i = 0; i < n; i++) {
2988
- GET_DATA_INDEX(p1, idx1, dtype, x);
2989
- x = m_sign(x);
2990
- SET_DATA_STRIDE(p2, s2, dtype, x);
2991
- }
2992
- }
2993
- } else {
2994
- if (idx2) {
2995
- for (i = 0; i < n; i++) {
2996
- GET_DATA_STRIDE(p1, s1, dtype, x);
2997
- x = m_sign(x);
2998
- SET_DATA_INDEX(p2, idx2, dtype, x);
2999
- }
3000
- } else {
3001
- //
3002
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3003
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3004
- for (i = 0; i < n; i++) {
3005
- ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
3006
- }
3007
- return;
3008
- }
3009
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3010
- //
3011
- for (i = 0; i < n; i++) {
3012
- *(dtype*)p2 = m_sign(*(dtype*)p1);
3013
- p1 += s1;
3014
- p2 += s2;
3015
- }
3016
- return;
3017
- //
3018
- }
3019
- }
3020
- for (i = 0; i < n; i++) {
3021
- GET_DATA_STRIDE(p1, s1, dtype, x);
3022
- x = m_sign(x);
3023
- SET_DATA_STRIDE(p2, s2, dtype, x);
3024
- }
3025
- //
3026
- }
3027
- }
3028
- }
3029
-
3030
- static VALUE dfloat_sign(VALUE self) {
3031
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3032
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3033
- ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
3034
-
3035
- return na_ndloop(&ndf, 1, self);
3036
- }
3037
-
3038
- static void iter_dfloat_square(na_loop_t* const lp) {
3039
- size_t i, n;
3040
- char *p1, *p2;
3041
- ssize_t s1, s2;
3042
- size_t *idx1, *idx2;
3043
- dtype x;
3044
-
3045
- INIT_COUNTER(lp, n);
3046
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3047
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3048
-
3049
- if (idx1) {
3050
- if (idx2) {
3051
- for (i = 0; i < n; i++) {
3052
- GET_DATA_INDEX(p1, idx1, dtype, x);
3053
- x = m_square(x);
3054
- SET_DATA_INDEX(p2, idx2, dtype, x);
3055
- }
3056
- } else {
3057
- for (i = 0; i < n; i++) {
3058
- GET_DATA_INDEX(p1, idx1, dtype, x);
3059
- x = m_square(x);
3060
- SET_DATA_STRIDE(p2, s2, dtype, x);
3061
- }
3062
- }
3063
- } else {
3064
- if (idx2) {
3065
- for (i = 0; i < n; i++) {
3066
- GET_DATA_STRIDE(p1, s1, dtype, x);
3067
- x = m_square(x);
3068
- SET_DATA_INDEX(p2, idx2, dtype, x);
3069
- }
3070
- } else {
3071
- //
3072
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3073
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3074
- for (i = 0; i < n; i++) {
3075
- ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
3076
- }
3077
- return;
3078
- }
3079
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3080
- //
3081
- for (i = 0; i < n; i++) {
3082
- *(dtype*)p2 = m_square(*(dtype*)p1);
3083
- p1 += s1;
3084
- p2 += s2;
3085
- }
3086
- return;
3087
- //
3088
- }
3089
- }
3090
- for (i = 0; i < n; i++) {
3091
- GET_DATA_STRIDE(p1, s1, dtype, x);
3092
- x = m_square(x);
3093
- SET_DATA_STRIDE(p2, s2, dtype, x);
3094
- }
3095
- //
3096
- }
3097
- }
3098
- }
3099
-
3100
- static VALUE dfloat_square(VALUE self) {
3101
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3102
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3103
- ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
3104
-
3105
- return na_ndloop(&ndf, 1, self);
3106
- }
3107
-
3108
- static void iter_dfloat_eq(na_loop_t* const lp) {
3109
- size_t i;
3110
- char *p1, *p2;
3111
- BIT_DIGIT* a3;
3112
- size_t p3;
3113
- ssize_t s1, s2, s3;
3114
- dtype x, y;
3115
- BIT_DIGIT b;
3116
- INIT_COUNTER(lp, i);
3117
- INIT_PTR(lp, 0, p1, s1);
3118
- INIT_PTR(lp, 1, p2, s2);
3119
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3120
- for (; i--;) {
3121
- GET_DATA_STRIDE(p1, s1, dtype, x);
3122
- GET_DATA_STRIDE(p2, s2, dtype, y);
3123
- b = (m_eq(x, y)) ? 1 : 0;
3124
- STORE_BIT(a3, p3, b);
3125
- p3 += s3;
3126
- }
3127
- }
3128
-
3129
- static VALUE dfloat_eq_self(VALUE self, VALUE other) {
3130
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3131
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3132
- ndfunc_t ndf = { iter_dfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
3133
-
3134
- return na_ndloop(&ndf, 2, self, other);
3135
- }
3136
-
3137
- static VALUE dfloat_eq(VALUE self, VALUE other) {
3138
-
3139
- VALUE klass, v;
3140
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3141
- if (klass == cT) {
3142
- return dfloat_eq_self(self, other);
3143
- } else {
3144
- v = rb_funcall(klass, id_cast, 1, self);
3145
- return rb_funcall(v, id_eq, 1, other);
3146
- }
3147
- }
3148
-
3149
- static void iter_dfloat_ne(na_loop_t* const lp) {
3150
- size_t i;
3151
- char *p1, *p2;
3152
- BIT_DIGIT* a3;
3153
- size_t p3;
3154
- ssize_t s1, s2, s3;
3155
- dtype x, y;
3156
- BIT_DIGIT b;
3157
- INIT_COUNTER(lp, i);
3158
- INIT_PTR(lp, 0, p1, s1);
3159
- INIT_PTR(lp, 1, p2, s2);
3160
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3161
- for (; i--;) {
3162
- GET_DATA_STRIDE(p1, s1, dtype, x);
3163
- GET_DATA_STRIDE(p2, s2, dtype, y);
3164
- b = (m_ne(x, y)) ? 1 : 0;
3165
- STORE_BIT(a3, p3, b);
3166
- p3 += s3;
3167
- }
3168
- }
3169
-
3170
- static VALUE dfloat_ne_self(VALUE self, VALUE other) {
3171
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3172
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3173
- ndfunc_t ndf = { iter_dfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
3174
-
3175
- return na_ndloop(&ndf, 2, self, other);
3176
- }
3177
-
3178
- static VALUE dfloat_ne(VALUE self, VALUE other) {
3179
-
3180
- VALUE klass, v;
3181
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3182
- if (klass == cT) {
3183
- return dfloat_ne_self(self, other);
3184
- } else {
3185
- v = rb_funcall(klass, id_cast, 1, self);
3186
- return rb_funcall(v, id_ne, 1, other);
3187
- }
3188
- }
3189
-
3190
- static void iter_dfloat_nearly_eq(na_loop_t* const lp) {
3191
- size_t i;
3192
- char *p1, *p2;
3193
- BIT_DIGIT* a3;
3194
- size_t p3;
3195
- ssize_t s1, s2, s3;
3196
- dtype x, y;
3197
- BIT_DIGIT b;
3198
- INIT_COUNTER(lp, i);
3199
- INIT_PTR(lp, 0, p1, s1);
3200
- INIT_PTR(lp, 1, p2, s2);
3201
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3202
- for (; i--;) {
3203
- GET_DATA_STRIDE(p1, s1, dtype, x);
3204
- GET_DATA_STRIDE(p2, s2, dtype, y);
3205
- b = (m_nearly_eq(x, y)) ? 1 : 0;
3206
- STORE_BIT(a3, p3, b);
3207
- p3 += s3;
3208
- }
3209
- }
3210
-
3211
- static VALUE dfloat_nearly_eq_self(VALUE self, VALUE other) {
3212
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3213
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3214
- ndfunc_t ndf = { iter_dfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
3215
-
3216
- return na_ndloop(&ndf, 2, self, other);
3217
- }
3218
-
3219
- static VALUE dfloat_nearly_eq(VALUE self, VALUE other) {
3220
-
3221
- VALUE klass, v;
3222
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3223
- if (klass == cT) {
3224
- return dfloat_nearly_eq_self(self, other);
3225
- } else {
3226
- v = rb_funcall(klass, id_cast, 1, self);
3227
- return rb_funcall(v, id_nearly_eq, 1, other);
3228
- }
3229
- }
3230
-
3231
- static void iter_dfloat_floor(na_loop_t* const lp) {
3232
- size_t i, n;
3233
- char *p1, *p2;
3234
- ssize_t s1, s2;
3235
- size_t *idx1, *idx2;
3236
- dtype x;
3237
-
3238
- INIT_COUNTER(lp, n);
3239
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3240
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3241
-
3242
- if (idx1) {
3243
- if (idx2) {
3244
- for (i = 0; i < n; i++) {
3245
- GET_DATA_INDEX(p1, idx1, dtype, x);
3246
- x = m_floor(x);
3247
- SET_DATA_INDEX(p2, idx2, dtype, x);
3248
- }
3249
- } else {
3250
- for (i = 0; i < n; i++) {
3251
- GET_DATA_INDEX(p1, idx1, dtype, x);
3252
- x = m_floor(x);
3253
- SET_DATA_STRIDE(p2, s2, dtype, x);
3254
- }
3255
- }
3256
- } else {
3257
- if (idx2) {
3258
- for (i = 0; i < n; i++) {
3259
- GET_DATA_STRIDE(p1, s1, dtype, x);
3260
- x = m_floor(x);
3261
- SET_DATA_INDEX(p2, idx2, dtype, x);
3262
- }
3263
- } else {
3264
- //
3265
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3266
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3267
- for (i = 0; i < n; i++) {
3268
- ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
3269
- }
3270
- return;
3271
- }
3272
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3273
- //
3274
- for (i = 0; i < n; i++) {
3275
- *(dtype*)p2 = m_floor(*(dtype*)p1);
3276
- p1 += s1;
3277
- p2 += s2;
3278
- }
3279
- return;
3280
- //
3281
- }
3282
- }
3283
- for (i = 0; i < n; i++) {
3284
- GET_DATA_STRIDE(p1, s1, dtype, x);
3285
- x = m_floor(x);
3286
- SET_DATA_STRIDE(p2, s2, dtype, x);
3287
- }
3288
- //
3289
- }
3290
- }
3291
- }
3292
-
3293
- static VALUE dfloat_floor(VALUE self) {
3294
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3295
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3296
- ndfunc_t ndf = { iter_dfloat_floor, FULL_LOOP, 1, 1, ain, aout };
3297
-
3298
- return na_ndloop(&ndf, 1, self);
3299
- }
3300
-
3301
- static void iter_dfloat_round(na_loop_t* const lp) {
3302
- size_t i, n;
3303
- char *p1, *p2;
3304
- ssize_t s1, s2;
3305
- size_t *idx1, *idx2;
3306
- dtype x;
3307
-
3308
- INIT_COUNTER(lp, n);
3309
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3310
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3311
-
3312
- if (idx1) {
3313
- if (idx2) {
3314
- for (i = 0; i < n; i++) {
3315
- GET_DATA_INDEX(p1, idx1, dtype, x);
3316
- x = m_round(x);
3317
- SET_DATA_INDEX(p2, idx2, dtype, x);
3318
- }
3319
- } else {
3320
- for (i = 0; i < n; i++) {
3321
- GET_DATA_INDEX(p1, idx1, dtype, x);
3322
- x = m_round(x);
3323
- SET_DATA_STRIDE(p2, s2, dtype, x);
3324
- }
3325
- }
3326
- } else {
3327
- if (idx2) {
3328
- for (i = 0; i < n; i++) {
3329
- GET_DATA_STRIDE(p1, s1, dtype, x);
3330
- x = m_round(x);
3331
- SET_DATA_INDEX(p2, idx2, dtype, x);
3332
- }
3333
- } else {
3334
- //
3335
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3336
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3337
- for (i = 0; i < n; i++) {
3338
- ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
3339
- }
3340
- return;
3341
- }
3342
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3343
- //
3344
- for (i = 0; i < n; i++) {
3345
- *(dtype*)p2 = m_round(*(dtype*)p1);
3346
- p1 += s1;
3347
- p2 += s2;
3348
- }
3349
- return;
3350
- //
3351
- }
3352
- }
3353
- for (i = 0; i < n; i++) {
3354
- GET_DATA_STRIDE(p1, s1, dtype, x);
3355
- x = m_round(x);
3356
- SET_DATA_STRIDE(p2, s2, dtype, x);
3357
- }
3358
- //
3359
- }
3360
- }
3361
- }
3362
-
3363
- static VALUE dfloat_round(VALUE self) {
3364
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3365
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3366
- ndfunc_t ndf = { iter_dfloat_round, FULL_LOOP, 1, 1, ain, aout };
3367
-
3368
- return na_ndloop(&ndf, 1, self);
3369
- }
3370
-
3371
- static void iter_dfloat_ceil(na_loop_t* const lp) {
3372
- size_t i, n;
3373
- char *p1, *p2;
3374
- ssize_t s1, s2;
3375
- size_t *idx1, *idx2;
3376
- dtype x;
3377
-
3378
- INIT_COUNTER(lp, n);
3379
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3380
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3381
-
3382
- if (idx1) {
3383
- if (idx2) {
3384
- for (i = 0; i < n; i++) {
3385
- GET_DATA_INDEX(p1, idx1, dtype, x);
3386
- x = m_ceil(x);
3387
- SET_DATA_INDEX(p2, idx2, dtype, x);
3388
- }
3389
- } else {
3390
- for (i = 0; i < n; i++) {
3391
- GET_DATA_INDEX(p1, idx1, dtype, x);
3392
- x = m_ceil(x);
3393
- SET_DATA_STRIDE(p2, s2, dtype, x);
3394
- }
3395
- }
3396
- } else {
3397
- if (idx2) {
3398
- for (i = 0; i < n; i++) {
3399
- GET_DATA_STRIDE(p1, s1, dtype, x);
3400
- x = m_ceil(x);
3401
- SET_DATA_INDEX(p2, idx2, dtype, x);
3402
- }
3403
- } else {
3404
- //
3405
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3406
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3407
- for (i = 0; i < n; i++) {
3408
- ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
3409
- }
3410
- return;
3411
- }
3412
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3413
- //
3414
- for (i = 0; i < n; i++) {
3415
- *(dtype*)p2 = m_ceil(*(dtype*)p1);
3416
- p1 += s1;
3417
- p2 += s2;
3418
- }
3419
- return;
3420
- //
3421
- }
3422
- }
3423
- for (i = 0; i < n; i++) {
3424
- GET_DATA_STRIDE(p1, s1, dtype, x);
3425
- x = m_ceil(x);
3426
- SET_DATA_STRIDE(p2, s2, dtype, x);
3427
- }
3428
- //
3429
- }
3430
- }
3431
- }
3432
-
3433
- static VALUE dfloat_ceil(VALUE self) {
3434
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3435
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3436
- ndfunc_t ndf = { iter_dfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
3437
-
3438
- return na_ndloop(&ndf, 1, self);
3439
- }
3440
-
3441
- static void iter_dfloat_trunc(na_loop_t* const lp) {
3442
- size_t i, n;
3443
- char *p1, *p2;
3444
- ssize_t s1, s2;
3445
- size_t *idx1, *idx2;
3446
- dtype x;
3447
-
3448
- INIT_COUNTER(lp, n);
3449
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3450
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3451
-
3452
- if (idx1) {
3453
- if (idx2) {
3454
- for (i = 0; i < n; i++) {
3455
- GET_DATA_INDEX(p1, idx1, dtype, x);
3456
- x = m_trunc(x);
3457
- SET_DATA_INDEX(p2, idx2, dtype, x);
3458
- }
3459
- } else {
3460
- for (i = 0; i < n; i++) {
3461
- GET_DATA_INDEX(p1, idx1, dtype, x);
3462
- x = m_trunc(x);
3463
- SET_DATA_STRIDE(p2, s2, dtype, x);
3464
- }
3465
- }
3466
- } else {
3467
- if (idx2) {
3468
- for (i = 0; i < n; i++) {
3469
- GET_DATA_STRIDE(p1, s1, dtype, x);
3470
- x = m_trunc(x);
3471
- SET_DATA_INDEX(p2, idx2, dtype, x);
3472
- }
3473
- } else {
3474
- //
3475
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3476
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3477
- for (i = 0; i < n; i++) {
3478
- ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
3479
- }
3480
- return;
3481
- }
3482
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3483
- //
3484
- for (i = 0; i < n; i++) {
3485
- *(dtype*)p2 = m_trunc(*(dtype*)p1);
3486
- p1 += s1;
3487
- p2 += s2;
3488
- }
3489
- return;
3490
- //
3491
- }
3492
- }
3493
- for (i = 0; i < n; i++) {
3494
- GET_DATA_STRIDE(p1, s1, dtype, x);
3495
- x = m_trunc(x);
3496
- SET_DATA_STRIDE(p2, s2, dtype, x);
3497
- }
3498
- //
3499
- }
3500
- }
3501
- }
3502
-
3503
- static VALUE dfloat_trunc(VALUE self) {
3504
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3505
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3506
- ndfunc_t ndf = { iter_dfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
3507
-
3508
- return na_ndloop(&ndf, 1, self);
3509
- }
3510
-
3511
- static void iter_dfloat_rint(na_loop_t* const lp) {
3512
- size_t i, n;
3513
- char *p1, *p2;
3514
- ssize_t s1, s2;
3515
- size_t *idx1, *idx2;
3516
- dtype x;
3517
-
3518
- INIT_COUNTER(lp, n);
3519
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3520
- INIT_PTR_IDX(lp, 1, p2, s2, idx2);
3521
-
3522
- if (idx1) {
3523
- if (idx2) {
3524
- for (i = 0; i < n; i++) {
3525
- GET_DATA_INDEX(p1, idx1, dtype, x);
3526
- x = m_rint(x);
3527
- SET_DATA_INDEX(p2, idx2, dtype, x);
3528
- }
3529
- } else {
3530
- for (i = 0; i < n; i++) {
3531
- GET_DATA_INDEX(p1, idx1, dtype, x);
3532
- x = m_rint(x);
3533
- SET_DATA_STRIDE(p2, s2, dtype, x);
3534
- }
3535
- }
3536
- } else {
3537
- if (idx2) {
3538
- for (i = 0; i < n; i++) {
3539
- GET_DATA_STRIDE(p1, s1, dtype, x);
3540
- x = m_rint(x);
3541
- SET_DATA_INDEX(p2, idx2, dtype, x);
3542
- }
3543
- } else {
3544
- //
3545
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
3546
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
3547
- for (i = 0; i < n; i++) {
3548
- ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
3549
- }
3550
- return;
3551
- }
3552
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
3553
- //
3554
- for (i = 0; i < n; i++) {
3555
- *(dtype*)p2 = m_rint(*(dtype*)p1);
3556
- p1 += s1;
3557
- p2 += s2;
3558
- }
3559
- return;
3560
- //
3561
- }
3562
- }
3563
- for (i = 0; i < n; i++) {
3564
- GET_DATA_STRIDE(p1, s1, dtype, x);
3565
- x = m_rint(x);
3566
- SET_DATA_STRIDE(p2, s2, dtype, x);
3567
- }
3568
- //
3569
- }
3570
- }
3571
- }
3572
-
3573
- static VALUE dfloat_rint(VALUE self) {
3574
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3575
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3576
- ndfunc_t ndf = { iter_dfloat_rint, FULL_LOOP, 1, 1, ain, aout };
3577
-
3578
- return na_ndloop(&ndf, 1, self);
3579
- }
3580
-
3581
- #define check_intdivzero(y) \
3582
- {}
3583
-
3584
- static void iter_dfloat_copysign(na_loop_t* const lp) {
3585
- size_t i = 0;
3586
- size_t n;
3587
- char *p1, *p2, *p3;
3588
- ssize_t s1, s2, s3;
3589
-
3590
- INIT_COUNTER(lp, n);
3591
- INIT_PTR(lp, 0, p1, s1);
3592
- INIT_PTR(lp, 1, p2, s2);
3593
- INIT_PTR(lp, 2, p3, s3);
3594
-
3595
- //
3596
- if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
3597
- is_aligned(p3, sizeof(dtype))) {
3598
-
3599
- if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
3600
- if (p1 == p3) { // inplace case
3601
- for (; i < n; i++) {
3602
- check_intdivzero(((dtype*)p2)[i]);
3603
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3604
- }
3605
- } else {
3606
- for (; i < n; i++) {
3607
- check_intdivzero(((dtype*)p2)[i]);
3608
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
3609
- }
3610
- }
3611
- return;
3612
- }
3613
-
3614
- if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
3615
- is_aligned_step(s3, sizeof(dtype))) {
3616
- //
3617
-
3618
- if (s2 == 0) { // Broadcasting from scalar value.
3619
- check_intdivzero(*(dtype*)p2);
3620
- if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
3621
- if (p1 == p3) { // inplace case
3622
- for (; i < n; i++) {
3623
- ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3624
- }
3625
- } else {
3626
- for (; i < n; i++) {
3627
- ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
3628
- }
3629
- }
3630
- } else {
3631
- for (i = 0; i < n; i++) {
3632
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3633
- p1 += s1;
3634
- p3 += s3;
3635
- }
3636
- }
3637
- } else {
3638
- if (p1 == p3) { // inplace case
3639
- for (i = 0; i < n; i++) {
3640
- check_intdivzero(*(dtype*)p2);
3641
- *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3642
- p1 += s1;
3643
- p2 += s2;
3644
- }
3645
- } else {
3646
- for (i = 0; i < n; i++) {
3647
- check_intdivzero(*(dtype*)p2);
3648
- *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
3649
- p1 += s1;
3650
- p2 += s2;
3651
- p3 += s3;
3652
- }
3653
- }
3654
- }
3655
-
3656
- return;
3657
- //
3658
- }
3659
- }
3660
- for (i = 0; i < n; i++) {
3661
- dtype x, y, z;
3662
- GET_DATA_STRIDE(p1, s1, dtype, x);
3663
- GET_DATA_STRIDE(p2, s2, dtype, y);
3664
- check_intdivzero(y);
3665
- z = m_copysign(x, y);
3666
- SET_DATA_STRIDE(p3, s3, dtype, z);
3667
- }
3668
- //
3669
- }
3670
- #undef check_intdivzero
3671
-
3672
- static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
3673
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3674
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3675
- ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
3676
-
3677
- return na_ndloop(&ndf, 2, self, other);
3678
- }
3679
-
3680
- static VALUE dfloat_copysign(VALUE self, VALUE other) {
3681
-
3682
- VALUE klass, v;
3683
-
3684
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3685
- if (klass == cT) {
3686
- return dfloat_copysign_self(self, other);
3687
- } else {
3688
- v = rb_funcall(klass, id_cast, 1, self);
3689
- return rb_funcall(v, id_copysign, 1, other);
3690
- }
3691
- }
3692
-
3693
- static void iter_dfloat_signbit(na_loop_t* const lp) {
3694
- size_t i;
3695
- char* p1;
3696
- BIT_DIGIT* a2;
3697
- size_t p2;
3698
- ssize_t s1, s2;
3699
- size_t* idx1;
3700
- dtype x;
3701
- BIT_DIGIT b;
3702
- INIT_COUNTER(lp, i);
3703
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
3704
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
3705
- if (idx1) {
3706
- for (; i--;) {
3707
- GET_DATA_INDEX(p1, idx1, dtype, x);
3708
- b = (m_signbit(x)) ? 1 : 0;
3709
- STORE_BIT(a2, p2, b);
3710
- p2 += s2;
3711
- }
3712
- } else {
3713
- for (; i--;) {
3714
- GET_DATA_STRIDE(p1, s1, dtype, x);
3715
- b = (m_signbit(x)) ? 1 : 0;
3716
- STORE_BIT(a2, p2, b);
3717
- p2 += s2;
3718
- }
3719
- }
3720
- }
3721
-
3722
- static VALUE dfloat_signbit(VALUE self) {
3723
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3724
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3725
- ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
3726
-
3727
- return na_ndloop(&ndf, 1, self);
3728
- }
3729
-
3730
- static void iter_dfloat_modf(na_loop_t* const lp) {
3731
- size_t i;
3732
- char *p1, *p2, *p3;
3733
- ssize_t s1, s2, s3;
3734
- dtype x, y, z;
3735
- INIT_COUNTER(lp, i);
3736
- INIT_PTR(lp, 0, p1, s1);
3737
- INIT_PTR(lp, 1, p2, s2);
3738
- INIT_PTR(lp, 2, p3, s3);
3739
- for (; i--;) {
3740
- GET_DATA_STRIDE(p1, s1, dtype, x);
3741
- m_modf(x, y, z);
3742
- SET_DATA_STRIDE(p2, s2, dtype, y);
3743
- SET_DATA_STRIDE(p3, s3, dtype, z);
3744
- }
3745
- }
3746
-
3747
- static VALUE dfloat_modf(VALUE self) {
3748
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
3749
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
3750
- ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
3751
-
3752
- return na_ndloop(&ndf, 1, self);
3753
- }
3754
-
3755
- static void iter_dfloat_gt(na_loop_t* const lp) {
3756
- size_t i;
3757
- char *p1, *p2;
3758
- BIT_DIGIT* a3;
3759
- size_t p3;
3760
- ssize_t s1, s2, s3;
3761
- dtype x, y;
3762
- BIT_DIGIT b;
3763
- INIT_COUNTER(lp, i);
3764
- INIT_PTR(lp, 0, p1, s1);
3765
- INIT_PTR(lp, 1, p2, s2);
3766
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3767
- for (; i--;) {
3768
- GET_DATA_STRIDE(p1, s1, dtype, x);
3769
- GET_DATA_STRIDE(p2, s2, dtype, y);
3770
- b = (m_gt(x, y)) ? 1 : 0;
3771
- STORE_BIT(a3, p3, b);
3772
- p3 += s3;
3773
- }
3774
- }
3775
-
3776
- static VALUE dfloat_gt_self(VALUE self, VALUE other) {
3777
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3778
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3779
- ndfunc_t ndf = { iter_dfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
3780
-
3781
- return na_ndloop(&ndf, 2, self, other);
3782
- }
3783
-
3784
- static VALUE dfloat_gt(VALUE self, VALUE other) {
3785
-
3786
- VALUE klass, v;
3787
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3788
- if (klass == cT) {
3789
- return dfloat_gt_self(self, other);
3790
- } else {
3791
- v = rb_funcall(klass, id_cast, 1, self);
3792
- return rb_funcall(v, id_gt, 1, other);
3793
- }
3794
- }
3795
-
3796
- static void iter_dfloat_ge(na_loop_t* const lp) {
3797
- size_t i;
3798
- char *p1, *p2;
3799
- BIT_DIGIT* a3;
3800
- size_t p3;
3801
- ssize_t s1, s2, s3;
3802
- dtype x, y;
3803
- BIT_DIGIT b;
3804
- INIT_COUNTER(lp, i);
3805
- INIT_PTR(lp, 0, p1, s1);
3806
- INIT_PTR(lp, 1, p2, s2);
3807
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3808
- for (; i--;) {
3809
- GET_DATA_STRIDE(p1, s1, dtype, x);
3810
- GET_DATA_STRIDE(p2, s2, dtype, y);
3811
- b = (m_ge(x, y)) ? 1 : 0;
3812
- STORE_BIT(a3, p3, b);
3813
- p3 += s3;
3814
- }
3815
- }
3816
-
3817
- static VALUE dfloat_ge_self(VALUE self, VALUE other) {
3818
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3819
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3820
- ndfunc_t ndf = { iter_dfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
3821
-
3822
- return na_ndloop(&ndf, 2, self, other);
3823
- }
3824
-
3825
- static VALUE dfloat_ge(VALUE self, VALUE other) {
3826
-
3827
- VALUE klass, v;
3828
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3829
- if (klass == cT) {
3830
- return dfloat_ge_self(self, other);
3831
- } else {
3832
- v = rb_funcall(klass, id_cast, 1, self);
3833
- return rb_funcall(v, id_ge, 1, other);
3834
- }
3835
- }
3836
-
3837
- static void iter_dfloat_lt(na_loop_t* const lp) {
3838
- size_t i;
3839
- char *p1, *p2;
3840
- BIT_DIGIT* a3;
3841
- size_t p3;
3842
- ssize_t s1, s2, s3;
3843
- dtype x, y;
3844
- BIT_DIGIT b;
3845
- INIT_COUNTER(lp, i);
3846
- INIT_PTR(lp, 0, p1, s1);
3847
- INIT_PTR(lp, 1, p2, s2);
3848
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3849
- for (; i--;) {
3850
- GET_DATA_STRIDE(p1, s1, dtype, x);
3851
- GET_DATA_STRIDE(p2, s2, dtype, y);
3852
- b = (m_lt(x, y)) ? 1 : 0;
3853
- STORE_BIT(a3, p3, b);
3854
- p3 += s3;
3855
- }
3856
- }
3857
-
3858
- static VALUE dfloat_lt_self(VALUE self, VALUE other) {
3859
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3860
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3861
- ndfunc_t ndf = { iter_dfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
3862
-
3863
- return na_ndloop(&ndf, 2, self, other);
3864
- }
3865
-
3866
- static VALUE dfloat_lt(VALUE self, VALUE other) {
3867
-
3868
- VALUE klass, v;
3869
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3870
- if (klass == cT) {
3871
- return dfloat_lt_self(self, other);
3872
- } else {
3873
- v = rb_funcall(klass, id_cast, 1, self);
3874
- return rb_funcall(v, id_lt, 1, other);
3875
- }
3876
- }
3877
-
3878
- static void iter_dfloat_le(na_loop_t* const lp) {
3879
- size_t i;
3880
- char *p1, *p2;
3881
- BIT_DIGIT* a3;
3882
- size_t p3;
3883
- ssize_t s1, s2, s3;
3884
- dtype x, y;
3885
- BIT_DIGIT b;
3886
- INIT_COUNTER(lp, i);
3887
- INIT_PTR(lp, 0, p1, s1);
3888
- INIT_PTR(lp, 1, p2, s2);
3889
- INIT_PTR_BIT(lp, 2, a3, p3, s3);
3890
- for (; i--;) {
3891
- GET_DATA_STRIDE(p1, s1, dtype, x);
3892
- GET_DATA_STRIDE(p2, s2, dtype, y);
3893
- b = (m_le(x, y)) ? 1 : 0;
3894
- STORE_BIT(a3, p3, b);
3895
- p3 += s3;
3896
- }
3897
- }
3898
-
3899
- static VALUE dfloat_le_self(VALUE self, VALUE other) {
3900
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
3901
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
3902
- ndfunc_t ndf = { iter_dfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
3903
-
3904
- return na_ndloop(&ndf, 2, self, other);
3905
- }
3906
-
3907
- static VALUE dfloat_le(VALUE self, VALUE other) {
3908
-
3909
- VALUE klass, v;
3910
- klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
3911
- if (klass == cT) {
3912
- return dfloat_le_self(self, other);
3913
- } else {
3914
- v = rb_funcall(klass, id_cast, 1, self);
3915
- return rb_funcall(v, id_le, 1, other);
3916
- }
3917
- }
3918
-
3919
- static void iter_dfloat_clip(na_loop_t* const lp) {
3920
- size_t i;
3921
- char *p1, *p2, *p3, *p4;
3922
- ssize_t s1, s2, s3, s4;
3923
- dtype x, min, max;
3924
- INIT_COUNTER(lp, i);
3925
- INIT_PTR(lp, 0, p1, s1);
3926
- INIT_PTR(lp, 1, p2, s2);
3927
- INIT_PTR(lp, 2, p3, s3);
3928
- INIT_PTR(lp, 3, p4, s4);
3929
- for (; i--;) {
3930
- GET_DATA_STRIDE(p1, s1, dtype, x);
3931
- GET_DATA_STRIDE(p2, s2, dtype, min);
3932
- GET_DATA_STRIDE(p3, s3, dtype, max);
3933
- if (m_gt(min, max)) {
3934
- rb_raise(nary_eOperationError, "min is greater than max");
3935
- }
3936
- if (m_lt(x, min)) {
3937
- x = min;
3938
- }
3939
- if (m_gt(x, max)) {
3940
- x = max;
3941
- }
3942
- SET_DATA_STRIDE(p4, s4, dtype, x);
3943
- }
3944
- }
3945
-
3946
- static void iter_dfloat_clip_min(na_loop_t* const lp) {
3947
- size_t i;
3948
- char *p1, *p2, *p3;
3949
- ssize_t s1, s2, s3;
3950
- dtype x, min;
3951
- INIT_COUNTER(lp, i);
3952
- INIT_PTR(lp, 0, p1, s1);
3953
- INIT_PTR(lp, 1, p2, s2);
3954
- INIT_PTR(lp, 2, p3, s3);
3955
- for (; i--;) {
3956
- GET_DATA_STRIDE(p1, s1, dtype, x);
3957
- GET_DATA_STRIDE(p2, s2, dtype, min);
3958
- if (m_lt(x, min)) {
3959
- x = min;
3960
- }
3961
- SET_DATA_STRIDE(p3, s3, dtype, x);
3962
- }
3963
- }
3964
-
3965
- static void iter_dfloat_clip_max(na_loop_t* const lp) {
3966
- size_t i;
3967
- char *p1, *p2, *p3;
3968
- ssize_t s1, s2, s3;
3969
- dtype x, max;
3970
- INIT_COUNTER(lp, i);
3971
- INIT_PTR(lp, 0, p1, s1);
3972
- INIT_PTR(lp, 1, p2, s2);
3973
- INIT_PTR(lp, 2, p3, s3);
3974
- for (; i--;) {
3975
- GET_DATA_STRIDE(p1, s1, dtype, x);
3976
- GET_DATA_STRIDE(p2, s2, dtype, max);
3977
- if (m_gt(x, max)) {
3978
- x = max;
3979
- }
3980
- SET_DATA_STRIDE(p3, s3, dtype, x);
3981
- }
3982
- }
3983
-
3984
- static VALUE dfloat_clip(VALUE self, VALUE min, VALUE max) {
3985
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { cT, 0 }, { cT, 0 } };
3986
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
3987
- ndfunc_t ndf_min = { iter_dfloat_clip_min, STRIDE_LOOP, 2, 1, ain, aout };
3988
- ndfunc_t ndf_max = { iter_dfloat_clip_max, STRIDE_LOOP, 2, 1, ain, aout };
3989
- ndfunc_t ndf_both = { iter_dfloat_clip, STRIDE_LOOP, 3, 1, ain, aout };
3990
-
3991
- if (RTEST(min)) {
3992
- if (RTEST(max)) {
3993
- return na_ndloop(&ndf_both, 3, self, min, max);
3994
- } else {
3995
- return na_ndloop(&ndf_min, 2, self, min);
3996
- }
3997
- } else {
3998
- if (RTEST(max)) {
3999
- return na_ndloop(&ndf_max, 2, self, max);
4000
- }
4001
- }
4002
- rb_raise(rb_eArgError, "min and max are not given");
4003
- return Qnil;
4004
- }
4005
-
4006
- static void iter_dfloat_isnan(na_loop_t* const lp) {
4007
- size_t i;
4008
- char* p1;
4009
- BIT_DIGIT* a2;
4010
- size_t p2;
4011
- ssize_t s1, s2;
4012
- size_t* idx1;
4013
- dtype x;
4014
- BIT_DIGIT b;
4015
- INIT_COUNTER(lp, i);
4016
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4017
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4018
- if (idx1) {
4019
- for (; i--;) {
4020
- GET_DATA_INDEX(p1, idx1, dtype, x);
4021
- b = (m_isnan(x)) ? 1 : 0;
4022
- STORE_BIT(a2, p2, b);
4023
- p2 += s2;
4024
- }
4025
- } else {
4026
- for (; i--;) {
4027
- GET_DATA_STRIDE(p1, s1, dtype, x);
4028
- b = (m_isnan(x)) ? 1 : 0;
4029
- STORE_BIT(a2, p2, b);
4030
- p2 += s2;
4031
- }
4032
- }
4033
- }
4034
-
4035
- static VALUE dfloat_isnan(VALUE self) {
4036
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4037
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4038
- ndfunc_t ndf = { iter_dfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
4039
-
4040
- return na_ndloop(&ndf, 1, self);
4041
- }
4042
-
4043
- static void iter_dfloat_isinf(na_loop_t* const lp) {
4044
- size_t i;
4045
- char* p1;
4046
- BIT_DIGIT* a2;
4047
- size_t p2;
4048
- ssize_t s1, s2;
4049
- size_t* idx1;
4050
- dtype x;
4051
- BIT_DIGIT b;
4052
- INIT_COUNTER(lp, i);
4053
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4054
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4055
- if (idx1) {
4056
- for (; i--;) {
4057
- GET_DATA_INDEX(p1, idx1, dtype, x);
4058
- b = (m_isinf(x)) ? 1 : 0;
4059
- STORE_BIT(a2, p2, b);
4060
- p2 += s2;
4061
- }
4062
- } else {
4063
- for (; i--;) {
4064
- GET_DATA_STRIDE(p1, s1, dtype, x);
4065
- b = (m_isinf(x)) ? 1 : 0;
4066
- STORE_BIT(a2, p2, b);
4067
- p2 += s2;
4068
- }
4069
- }
4070
- }
4071
-
4072
- static VALUE dfloat_isinf(VALUE self) {
4073
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4074
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4075
- ndfunc_t ndf = { iter_dfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
4076
-
4077
- return na_ndloop(&ndf, 1, self);
4078
- }
4079
-
4080
- static void iter_dfloat_isposinf(na_loop_t* const lp) {
4081
- size_t i;
4082
- char* p1;
4083
- BIT_DIGIT* a2;
4084
- size_t p2;
4085
- ssize_t s1, s2;
4086
- size_t* idx1;
4087
- dtype x;
4088
- BIT_DIGIT b;
4089
- INIT_COUNTER(lp, i);
4090
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4091
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4092
- if (idx1) {
4093
- for (; i--;) {
4094
- GET_DATA_INDEX(p1, idx1, dtype, x);
4095
- b = (m_isposinf(x)) ? 1 : 0;
4096
- STORE_BIT(a2, p2, b);
4097
- p2 += s2;
4098
- }
4099
- } else {
4100
- for (; i--;) {
4101
- GET_DATA_STRIDE(p1, s1, dtype, x);
4102
- b = (m_isposinf(x)) ? 1 : 0;
4103
- STORE_BIT(a2, p2, b);
4104
- p2 += s2;
4105
- }
4106
- }
4107
- }
4108
-
4109
- static VALUE dfloat_isposinf(VALUE self) {
4110
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4111
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4112
- ndfunc_t ndf = { iter_dfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
4113
-
4114
- return na_ndloop(&ndf, 1, self);
4115
- }
4116
-
4117
- static void iter_dfloat_isneginf(na_loop_t* const lp) {
4118
- size_t i;
4119
- char* p1;
4120
- BIT_DIGIT* a2;
4121
- size_t p2;
4122
- ssize_t s1, s2;
4123
- size_t* idx1;
4124
- dtype x;
4125
- BIT_DIGIT b;
4126
- INIT_COUNTER(lp, i);
4127
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4128
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4129
- if (idx1) {
4130
- for (; i--;) {
4131
- GET_DATA_INDEX(p1, idx1, dtype, x);
4132
- b = (m_isneginf(x)) ? 1 : 0;
4133
- STORE_BIT(a2, p2, b);
4134
- p2 += s2;
4135
- }
4136
- } else {
4137
- for (; i--;) {
4138
- GET_DATA_STRIDE(p1, s1, dtype, x);
4139
- b = (m_isneginf(x)) ? 1 : 0;
4140
- STORE_BIT(a2, p2, b);
4141
- p2 += s2;
4142
- }
4143
- }
4144
- }
4145
-
4146
- static VALUE dfloat_isneginf(VALUE self) {
4147
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4148
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4149
- ndfunc_t ndf = { iter_dfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
4150
-
4151
- return na_ndloop(&ndf, 1, self);
4152
- }
4153
-
4154
- static void iter_dfloat_isfinite(na_loop_t* const lp) {
4155
- size_t i;
4156
- char* p1;
4157
- BIT_DIGIT* a2;
4158
- size_t p2;
4159
- ssize_t s1, s2;
4160
- size_t* idx1;
4161
- dtype x;
4162
- BIT_DIGIT b;
4163
- INIT_COUNTER(lp, i);
4164
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
4165
- INIT_PTR_BIT(lp, 1, a2, p2, s2);
4166
- if (idx1) {
4167
- for (; i--;) {
4168
- GET_DATA_INDEX(p1, idx1, dtype, x);
4169
- b = (m_isfinite(x)) ? 1 : 0;
4170
- STORE_BIT(a2, p2, b);
4171
- p2 += s2;
4172
- }
4173
- } else {
4174
- for (; i--;) {
4175
- GET_DATA_STRIDE(p1, s1, dtype, x);
4176
- b = (m_isfinite(x)) ? 1 : 0;
4177
- STORE_BIT(a2, p2, b);
4178
- p2 += s2;
4179
- }
4180
- }
4181
- }
4182
-
4183
- static VALUE dfloat_isfinite(VALUE self) {
4184
- ndfunc_arg_in_t ain[1] = { { cT, 0 } };
4185
- ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
4186
- ndfunc_t ndf = { iter_dfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
4187
-
4188
- return na_ndloop(&ndf, 1, self);
4189
- }
4190
-
4191
- static void iter_dfloat_sum(na_loop_t* const lp) {
4192
- size_t n;
4193
- char *p1, *p2;
4194
- ssize_t s1;
4195
-
4196
- INIT_COUNTER(lp, n);
4197
- INIT_PTR(lp, 0, p1, s1);
4198
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4199
-
4200
- *(dtype*)p2 = f_sum(n, p1, s1);
4201
- }
4202
- static void iter_dfloat_sum_nan(na_loop_t* const lp) {
4203
- size_t n;
4204
- char *p1, *p2;
4205
- ssize_t s1;
4206
-
4207
- INIT_COUNTER(lp, n);
4208
- INIT_PTR(lp, 0, p1, s1);
4209
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4210
-
4211
- *(dtype*)p2 = f_sum_nan(n, p1, s1);
4212
- }
4213
-
4214
- static VALUE dfloat_sum(int argc, VALUE* argv, VALUE self) {
4215
- VALUE v, reduce;
4216
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4217
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4218
- ndfunc_t ndf = { iter_dfloat_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4219
-
4220
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_sum_nan);
4221
-
4222
- v = na_ndloop(&ndf, 2, self, reduce);
4223
-
4224
- return dfloat_extract(v);
4225
- }
4226
-
4227
- static void iter_dfloat_prod(na_loop_t* const lp) {
4228
- size_t n;
4229
- char *p1, *p2;
4230
- ssize_t s1;
4231
-
4232
- INIT_COUNTER(lp, n);
4233
- INIT_PTR(lp, 0, p1, s1);
4234
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4235
-
4236
- *(dtype*)p2 = f_prod(n, p1, s1);
4237
- }
4238
- static void iter_dfloat_prod_nan(na_loop_t* const lp) {
4239
- size_t n;
4240
- char *p1, *p2;
4241
- ssize_t s1;
4242
-
4243
- INIT_COUNTER(lp, n);
4244
- INIT_PTR(lp, 0, p1, s1);
4245
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4246
-
4247
- *(dtype*)p2 = f_prod_nan(n, p1, s1);
4248
- }
4249
-
4250
- static VALUE dfloat_prod(int argc, VALUE* argv, VALUE self) {
4251
- VALUE v, reduce;
4252
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4253
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4254
- ndfunc_t ndf = { iter_dfloat_prod, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4255
-
4256
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_prod_nan);
4257
-
4258
- v = na_ndloop(&ndf, 2, self, reduce);
4259
-
4260
- return dfloat_extract(v);
4261
- }
4262
-
4263
- static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
4264
- size_t n;
4265
- char *p1, *p2;
4266
- ssize_t s1;
4267
-
4268
- INIT_COUNTER(lp, n);
4269
- INIT_PTR(lp, 0, p1, s1);
4270
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4271
-
4272
- *(dtype*)p2 = f_kahan_sum(n, p1, s1);
4273
- }
4274
- static void iter_dfloat_kahan_sum_nan(na_loop_t* const lp) {
4275
- size_t n;
4276
- char *p1, *p2;
4277
- ssize_t s1;
4278
-
4279
- INIT_COUNTER(lp, n);
4280
- INIT_PTR(lp, 0, p1, s1);
4281
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4282
-
4283
- *(dtype*)p2 = f_kahan_sum_nan(n, p1, s1);
4284
- }
4285
-
4286
- static VALUE dfloat_kahan_sum(int argc, VALUE* argv, VALUE self) {
4287
- VALUE v, reduce;
4288
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4289
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4290
- ndfunc_t ndf = { iter_dfloat_kahan_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4291
-
4292
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_kahan_sum_nan);
4293
-
4294
- v = na_ndloop(&ndf, 2, self, reduce);
4295
-
4296
- return dfloat_extract(v);
4297
- }
4298
-
4299
- static void iter_dfloat_min(na_loop_t* const lp) {
4300
- size_t n;
4301
- char *p1, *p2;
4302
- ssize_t s1;
4303
-
4304
- INIT_COUNTER(lp, n);
4305
- INIT_PTR(lp, 0, p1, s1);
4306
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4307
-
4308
- *(dtype*)p2 = f_min(n, p1, s1);
4309
- }
4310
- static void iter_dfloat_min_nan(na_loop_t* const lp) {
4311
- size_t n;
4312
- char *p1, *p2;
4313
- ssize_t s1;
4314
-
4315
- INIT_COUNTER(lp, n);
4316
- INIT_PTR(lp, 0, p1, s1);
4317
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4318
-
4319
- *(dtype*)p2 = f_min_nan(n, p1, s1);
4320
- }
4321
-
4322
- static VALUE dfloat_min(int argc, VALUE* argv, VALUE self) {
4323
- VALUE v, reduce;
4324
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4325
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4326
- ndfunc_t ndf = { iter_dfloat_min, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4327
-
4328
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_min_nan);
4329
-
4330
- v = na_ndloop(&ndf, 2, self, reduce);
4331
-
4332
- return dfloat_extract(v);
4333
- }
4334
-
4335
- static void iter_dfloat_max(na_loop_t* const lp) {
4336
- size_t n;
4337
- char *p1, *p2;
4338
- ssize_t s1;
4339
-
4340
- INIT_COUNTER(lp, n);
4341
- INIT_PTR(lp, 0, p1, s1);
4342
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4343
-
4344
- *(dtype*)p2 = f_max(n, p1, s1);
4345
- }
4346
- static void iter_dfloat_max_nan(na_loop_t* const lp) {
4347
- size_t n;
4348
- char *p1, *p2;
4349
- ssize_t s1;
4350
-
4351
- INIT_COUNTER(lp, n);
4352
- INIT_PTR(lp, 0, p1, s1);
4353
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4354
-
4355
- *(dtype*)p2 = f_max_nan(n, p1, s1);
4356
- }
4357
-
4358
- static VALUE dfloat_max(int argc, VALUE* argv, VALUE self) {
4359
- VALUE v, reduce;
4360
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4361
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4362
- ndfunc_t ndf = { iter_dfloat_max, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4363
-
4364
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_max_nan);
4365
-
4366
- v = na_ndloop(&ndf, 2, self, reduce);
4367
-
4368
- return dfloat_extract(v);
4369
- }
4370
-
4371
- static void iter_dfloat_ptp(na_loop_t* const lp) {
4372
- size_t n;
4373
- char *p1, *p2;
4374
- ssize_t s1;
4375
-
4376
- INIT_COUNTER(lp, n);
4377
- INIT_PTR(lp, 0, p1, s1);
4378
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4379
-
4380
- *(dtype*)p2 = f_ptp(n, p1, s1);
4381
- }
4382
- static void iter_dfloat_ptp_nan(na_loop_t* const lp) {
4383
- size_t n;
4384
- char *p1, *p2;
4385
- ssize_t s1;
4386
-
4387
- INIT_COUNTER(lp, n);
4388
- INIT_PTR(lp, 0, p1, s1);
4389
- p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
4390
-
4391
- *(dtype*)p2 = f_ptp_nan(n, p1, s1);
4392
- }
4393
-
4394
- static VALUE dfloat_ptp(int argc, VALUE* argv, VALUE self) {
4395
- VALUE v, reduce;
4396
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4397
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4398
- ndfunc_t ndf = { iter_dfloat_ptp, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
4399
-
4400
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_ptp_nan);
4401
-
4402
- v = na_ndloop(&ndf, 2, self, reduce);
4403
-
4404
- return dfloat_extract(v);
4405
- }
4406
-
4407
- #define idx_t int64_t
4408
- static void iter_dfloat_max_index_index64(na_loop_t* const lp) {
4409
- size_t n, idx;
4410
- char *d_ptr, *i_ptr, *o_ptr;
4411
- ssize_t d_step, i_step;
4412
-
4413
- INIT_COUNTER(lp, n);
4414
- INIT_PTR(lp, 0, d_ptr, d_step);
4415
-
4416
- idx = f_max_index(n, d_ptr, d_step);
4417
-
4418
- INIT_PTR(lp, 1, i_ptr, i_step);
4419
- o_ptr = NDL_PTR(lp, 2);
4420
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4421
- }
4422
- #undef idx_t
4423
-
4424
- #define idx_t int32_t
4425
- static void iter_dfloat_max_index_index32(na_loop_t* const lp) {
4426
- size_t n, idx;
4427
- char *d_ptr, *i_ptr, *o_ptr;
4428
- ssize_t d_step, i_step;
4429
-
4430
- INIT_COUNTER(lp, n);
4431
- INIT_PTR(lp, 0, d_ptr, d_step);
4432
-
4433
- idx = f_max_index(n, d_ptr, d_step);
4434
-
4435
- INIT_PTR(lp, 1, i_ptr, i_step);
4436
- o_ptr = NDL_PTR(lp, 2);
4437
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4438
- }
4439
- #undef idx_t
4440
-
4441
- #define idx_t int64_t
4442
- static void iter_dfloat_max_index_index64_nan(na_loop_t* const lp) {
4443
- size_t n, idx;
4444
- char *d_ptr, *i_ptr, *o_ptr;
4445
- ssize_t d_step, i_step;
4446
-
4447
- INIT_COUNTER(lp, n);
4448
- INIT_PTR(lp, 0, d_ptr, d_step);
4449
-
4450
- idx = f_max_index_nan(n, d_ptr, d_step);
4451
-
4452
- INIT_PTR(lp, 1, i_ptr, i_step);
4453
- o_ptr = NDL_PTR(lp, 2);
4454
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4455
- }
4456
- #undef idx_t
4457
-
4458
- #define idx_t int32_t
4459
- static void iter_dfloat_max_index_index32_nan(na_loop_t* const lp) {
4460
- size_t n, idx;
4461
- char *d_ptr, *i_ptr, *o_ptr;
4462
- ssize_t d_step, i_step;
4463
-
4464
- INIT_COUNTER(lp, n);
4465
- INIT_PTR(lp, 0, d_ptr, d_step);
4466
-
4467
- idx = f_max_index_nan(n, d_ptr, d_step);
4468
-
4469
- INIT_PTR(lp, 1, i_ptr, i_step);
4470
- o_ptr = NDL_PTR(lp, 2);
4471
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4472
- }
4473
- #undef idx_t
4474
-
4475
- static VALUE dfloat_max_index(int argc, VALUE* argv, VALUE self) {
4476
- narray_t* na;
4477
- VALUE idx, reduce;
4478
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { Qnil, 0 }, { sym_reduce, 0 } };
4479
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4480
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 3, 1, ain, aout };
4481
-
4482
- GetNArray(self, na);
4483
- if (na->ndim == 0) {
4484
- return INT2FIX(0);
4485
- }
4486
- if (na->size > (~(u_int32_t)0)) {
4487
- aout[0].type = numo_cInt64;
4488
- idx = nary_new(numo_cInt64, na->ndim, na->shape);
4489
- ndf.func = iter_dfloat_max_index_index64;
4490
-
4491
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_max_index_index64_nan);
4492
-
4493
- } else {
4494
- aout[0].type = numo_cInt32;
4495
- idx = nary_new(numo_cInt32, na->ndim, na->shape);
4496
- ndf.func = iter_dfloat_max_index_index32;
4497
-
4498
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_max_index_index32_nan);
4499
- }
4500
- rb_funcall(idx, rb_intern("seq"), 0);
4501
-
4502
- return na_ndloop(&ndf, 3, self, idx, reduce);
4503
- }
4504
-
4505
- #define idx_t int64_t
4506
- static void iter_dfloat_min_index_index64(na_loop_t* const lp) {
4507
- size_t n, idx;
4508
- char *d_ptr, *i_ptr, *o_ptr;
4509
- ssize_t d_step, i_step;
4510
-
4511
- INIT_COUNTER(lp, n);
4512
- INIT_PTR(lp, 0, d_ptr, d_step);
4513
-
4514
- idx = f_min_index(n, d_ptr, d_step);
4515
-
4516
- INIT_PTR(lp, 1, i_ptr, i_step);
4517
- o_ptr = NDL_PTR(lp, 2);
4518
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4519
- }
4520
- #undef idx_t
4521
-
4522
- #define idx_t int32_t
4523
- static void iter_dfloat_min_index_index32(na_loop_t* const lp) {
4524
- size_t n, idx;
4525
- char *d_ptr, *i_ptr, *o_ptr;
4526
- ssize_t d_step, i_step;
4527
-
4528
- INIT_COUNTER(lp, n);
4529
- INIT_PTR(lp, 0, d_ptr, d_step);
4530
-
4531
- idx = f_min_index(n, d_ptr, d_step);
4532
-
4533
- INIT_PTR(lp, 1, i_ptr, i_step);
4534
- o_ptr = NDL_PTR(lp, 2);
4535
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4536
- }
4537
- #undef idx_t
4538
-
4539
- #define idx_t int64_t
4540
- static void iter_dfloat_min_index_index64_nan(na_loop_t* const lp) {
4541
- size_t n, idx;
4542
- char *d_ptr, *i_ptr, *o_ptr;
4543
- ssize_t d_step, i_step;
4544
-
4545
- INIT_COUNTER(lp, n);
4546
- INIT_PTR(lp, 0, d_ptr, d_step);
4547
-
4548
- idx = f_min_index_nan(n, d_ptr, d_step);
4549
-
4550
- INIT_PTR(lp, 1, i_ptr, i_step);
4551
- o_ptr = NDL_PTR(lp, 2);
4552
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4553
- }
4554
- #undef idx_t
4555
-
4556
- #define idx_t int32_t
4557
- static void iter_dfloat_min_index_index32_nan(na_loop_t* const lp) {
4558
- size_t n, idx;
4559
- char *d_ptr, *i_ptr, *o_ptr;
4560
- ssize_t d_step, i_step;
4561
-
4562
- INIT_COUNTER(lp, n);
4563
- INIT_PTR(lp, 0, d_ptr, d_step);
4564
-
4565
- idx = f_min_index_nan(n, d_ptr, d_step);
4566
-
4567
- INIT_PTR(lp, 1, i_ptr, i_step);
4568
- o_ptr = NDL_PTR(lp, 2);
4569
- *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
4570
- }
4571
- #undef idx_t
4572
-
4573
- static VALUE dfloat_min_index(int argc, VALUE* argv, VALUE self) {
4574
- narray_t* na;
4575
- VALUE idx, reduce;
4576
- ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { Qnil, 0 }, { sym_reduce, 0 } };
4577
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4578
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 3, 1, ain, aout };
4579
-
4580
- GetNArray(self, na);
4581
- if (na->ndim == 0) {
4582
- return INT2FIX(0);
4583
- }
4584
- if (na->size > (~(u_int32_t)0)) {
4585
- aout[0].type = numo_cInt64;
4586
- idx = nary_new(numo_cInt64, na->ndim, na->shape);
4587
- ndf.func = iter_dfloat_min_index_index64;
4588
-
4589
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_min_index_index64_nan);
4590
-
4591
- } else {
4592
- aout[0].type = numo_cInt32;
4593
- idx = nary_new(numo_cInt32, na->ndim, na->shape);
4594
- ndf.func = iter_dfloat_min_index_index32;
4595
-
4596
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_min_index_index32_nan);
4597
- }
4598
- rb_funcall(idx, rb_intern("seq"), 0);
4599
-
4600
- return na_ndloop(&ndf, 3, self, idx, reduce);
4601
- }
4602
-
4603
- #define idx_t int64_t
4604
- static void iter_dfloat_argmax_arg64(na_loop_t* const lp) {
4605
- size_t n, idx;
4606
- char *d_ptr, *o_ptr;
4607
- ssize_t d_step;
4608
-
4609
- INIT_COUNTER(lp, n);
4610
- INIT_PTR(lp, 0, d_ptr, d_step);
4611
-
4612
- idx = f_max_index(n, d_ptr, d_step);
4613
-
4614
- o_ptr = NDL_PTR(lp, 1);
4615
- *(idx_t*)o_ptr = (idx_t)idx;
4616
- }
4617
- #undef idx_t
4618
-
4619
- #define idx_t int32_t
4620
- static void iter_dfloat_argmax_arg32(na_loop_t* const lp) {
4621
- size_t n, idx;
4622
- char *d_ptr, *o_ptr;
4623
- ssize_t d_step;
4624
-
4625
- INIT_COUNTER(lp, n);
4626
- INIT_PTR(lp, 0, d_ptr, d_step);
4627
-
4628
- idx = f_max_index(n, d_ptr, d_step);
4629
-
4630
- o_ptr = NDL_PTR(lp, 1);
4631
- *(idx_t*)o_ptr = (idx_t)idx;
4632
- }
4633
- #undef idx_t
4634
-
4635
- #define idx_t int64_t
4636
- static void iter_dfloat_argmax_arg64_nan(na_loop_t* const lp) {
4637
- size_t n, idx;
4638
- char *d_ptr, *o_ptr;
4639
- ssize_t d_step;
4640
-
4641
- INIT_COUNTER(lp, n);
4642
- INIT_PTR(lp, 0, d_ptr, d_step);
4643
-
4644
- idx = f_max_index_nan(n, d_ptr, d_step);
4645
-
4646
- o_ptr = NDL_PTR(lp, 1);
4647
- *(idx_t*)o_ptr = (idx_t)idx;
4648
- }
4649
- #undef idx_t
4650
-
4651
- #define idx_t int32_t
4652
- static void iter_dfloat_argmax_arg32_nan(na_loop_t* const lp) {
4653
- size_t n, idx;
4654
- char *d_ptr, *o_ptr;
4655
- ssize_t d_step;
4656
-
4657
- INIT_COUNTER(lp, n);
4658
- INIT_PTR(lp, 0, d_ptr, d_step);
4659
-
4660
- idx = f_max_index_nan(n, d_ptr, d_step);
4661
-
4662
- o_ptr = NDL_PTR(lp, 1);
4663
- *(idx_t*)o_ptr = (idx_t)idx;
4664
- }
4665
- #undef idx_t
4666
-
4667
- static VALUE dfloat_argmax(int argc, VALUE* argv, VALUE self) {
4668
- narray_t* na;
4669
- VALUE reduce;
4670
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_reduce, 0 } };
4671
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4672
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 1, ain, aout };
4673
-
4674
- GetNArray(self, na);
4675
- if (na->ndim == 0) {
4676
- return INT2FIX(0);
4677
- }
4678
- if (na->size > (~(u_int32_t)0)) {
4679
- aout[0].type = numo_cInt64;
4680
- ndf.func = iter_dfloat_argmax_arg64;
4681
-
4682
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmax_arg64_nan);
4683
-
4684
- } else {
4685
- aout[0].type = numo_cInt32;
4686
- ndf.func = iter_dfloat_argmax_arg32;
4687
-
4688
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmax_arg32_nan);
4689
- }
4690
-
4691
- return na_ndloop(&ndf, 2, self, reduce);
4692
- }
4693
-
4694
- #define idx_t int64_t
4695
- static void iter_dfloat_argmin_arg64(na_loop_t* const lp) {
4696
- size_t n, idx;
4697
- char *d_ptr, *o_ptr;
4698
- ssize_t d_step;
4699
-
4700
- INIT_COUNTER(lp, n);
4701
- INIT_PTR(lp, 0, d_ptr, d_step);
4702
-
4703
- idx = f_min_index(n, d_ptr, d_step);
4704
-
4705
- o_ptr = NDL_PTR(lp, 1);
4706
- *(idx_t*)o_ptr = (idx_t)idx;
4707
- }
4708
- #undef idx_t
4709
-
4710
- #define idx_t int32_t
4711
- static void iter_dfloat_argmin_arg32(na_loop_t* const lp) {
4712
- size_t n, idx;
4713
- char *d_ptr, *o_ptr;
4714
- ssize_t d_step;
4715
-
4716
- INIT_COUNTER(lp, n);
4717
- INIT_PTR(lp, 0, d_ptr, d_step);
4718
-
4719
- idx = f_min_index(n, d_ptr, d_step);
4720
-
4721
- o_ptr = NDL_PTR(lp, 1);
4722
- *(idx_t*)o_ptr = (idx_t)idx;
4723
- }
4724
- #undef idx_t
4725
-
4726
- #define idx_t int64_t
4727
- static void iter_dfloat_argmin_arg64_nan(na_loop_t* const lp) {
4728
- size_t n, idx;
4729
- char *d_ptr, *o_ptr;
4730
- ssize_t d_step;
4731
-
4732
- INIT_COUNTER(lp, n);
4733
- INIT_PTR(lp, 0, d_ptr, d_step);
4734
-
4735
- idx = f_min_index_nan(n, d_ptr, d_step);
4736
-
4737
- o_ptr = NDL_PTR(lp, 1);
4738
- *(idx_t*)o_ptr = (idx_t)idx;
4739
- }
4740
- #undef idx_t
4741
-
4742
- #define idx_t int32_t
4743
- static void iter_dfloat_argmin_arg32_nan(na_loop_t* const lp) {
4744
- size_t n, idx;
4745
- char *d_ptr, *o_ptr;
4746
- ssize_t d_step;
4747
-
4748
- INIT_COUNTER(lp, n);
4749
- INIT_PTR(lp, 0, d_ptr, d_step);
4750
-
4751
- idx = f_min_index_nan(n, d_ptr, d_step);
4752
-
4753
- o_ptr = NDL_PTR(lp, 1);
4754
- *(idx_t*)o_ptr = (idx_t)idx;
4755
- }
4756
- #undef idx_t
4757
-
4758
- static VALUE dfloat_argmin(int argc, VALUE* argv, VALUE self) {
4759
- narray_t* na;
4760
- VALUE reduce;
4761
- ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_reduce, 0 } };
4762
- ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
4763
- ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 1, ain, aout };
4764
-
4765
- GetNArray(self, na);
4766
- if (na->ndim == 0) {
4767
- return INT2FIX(0);
4768
- }
4769
- if (na->size > (~(u_int32_t)0)) {
4770
- aout[0].type = numo_cInt64;
4771
- ndf.func = iter_dfloat_argmin_arg64;
4772
-
4773
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmin_arg64_nan);
4774
-
4775
- } else {
4776
- aout[0].type = numo_cInt32;
4777
- ndf.func = iter_dfloat_argmin_arg32;
4778
-
4779
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmin_arg32_nan);
4780
- }
4781
-
4782
- return na_ndloop(&ndf, 2, self, reduce);
4783
- }
4784
-
4785
- static void iter_dfloat_minmax(na_loop_t* const lp) {
4786
- size_t n;
4787
- char* p1;
4788
- ssize_t s1;
4789
- dtype xmin, xmax;
4790
-
4791
- INIT_COUNTER(lp, n);
4792
- INIT_PTR(lp, 0, p1, s1);
4793
-
4794
- f_minmax(n, p1, s1, &xmin, &xmax);
4795
-
4796
- *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
4797
- *(dtype*)(lp->args[2].ptr + lp->args[2].iter[0].pos) = xmax;
4798
- }
4799
- static void iter_dfloat_minmax_nan(na_loop_t* const lp) {
4800
- size_t n;
4801
- char* p1;
4802
- ssize_t s1;
4803
- dtype xmin, xmax;
4804
-
4805
- INIT_COUNTER(lp, n);
4806
- INIT_PTR(lp, 0, p1, s1);
4807
-
4808
- f_minmax_nan(n, p1, s1, &xmin, &xmax);
4809
-
4810
- *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
4811
- *(dtype*)(lp->args[2].ptr + lp->args[2].iter[0].pos) = xmax;
4812
- }
4813
-
4814
- static VALUE dfloat_minmax(int argc, VALUE* argv, VALUE self) {
4815
- VALUE reduce;
4816
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4817
- ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
4818
- ndfunc_t ndf = {
4819
- iter_dfloat_minmax, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 2, ain, aout
4820
- };
4821
-
4822
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_minmax_nan);
4823
-
4824
- return na_ndloop(&ndf, 2, self, reduce);
4825
- }
4826
-
4827
- static void iter_dfloat_s_maximum(na_loop_t* const lp) {
4828
- size_t i, n;
4829
- char *p1, *p2, *p3;
4830
- ssize_t s1, s2, s3;
4831
-
4832
- INIT_COUNTER(lp, n);
4833
- INIT_PTR(lp, 0, p1, s1);
4834
- INIT_PTR(lp, 1, p2, s2);
4835
- INIT_PTR(lp, 2, p3, s3);
4836
-
4837
- for (i = 0; i < n; i++) {
4838
- dtype x, y, z;
4839
- GET_DATA_STRIDE(p1, s1, dtype, x);
4840
- GET_DATA_STRIDE(p2, s2, dtype, y);
4841
- GET_DATA(p3, dtype, z);
4842
- z = f_maximum(x, y);
4843
- SET_DATA_STRIDE(p3, s3, dtype, z);
4844
- }
4845
- }
4846
- static void iter_dfloat_s_maximum_nan(na_loop_t* const lp) {
4847
- size_t i, n;
4848
- char *p1, *p2, *p3;
4849
- ssize_t s1, s2, s3;
4850
-
4851
- INIT_COUNTER(lp, n);
4852
- INIT_PTR(lp, 0, p1, s1);
4853
- INIT_PTR(lp, 1, p2, s2);
4854
- INIT_PTR(lp, 2, p3, s3);
4855
-
4856
- for (i = 0; i < n; i++) {
4857
- dtype x, y, z;
4858
- GET_DATA_STRIDE(p1, s1, dtype, x);
4859
- GET_DATA_STRIDE(p2, s2, dtype, y);
4860
- GET_DATA(p3, dtype, z);
4861
- z = f_maximum_nan(x, y);
4862
- SET_DATA_STRIDE(p3, s3, dtype, z);
4863
- }
4864
- }
4865
-
4866
- static VALUE dfloat_s_maximum(int argc, VALUE* argv, VALUE mod) {
4867
- VALUE a1 = Qnil;
4868
- VALUE a2 = Qnil;
4869
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
4870
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4871
- ndfunc_t ndf = { iter_dfloat_s_maximum, STRIDE_LOOP_NIP, 2, 1, ain, aout };
4872
-
4873
- VALUE kw_hash = Qnil;
4874
- ID kw_table[1] = { id_nan };
4875
- VALUE opts[1] = { Qundef };
4876
-
4877
- rb_scan_args(argc, argv, "20:", &a1, &a2, &kw_hash);
4878
- rb_get_kwargs(kw_hash, kw_table, 0, 1, opts);
4879
- if (opts[0] != Qundef) {
4880
- ndf.func = iter_dfloat_s_maximum_nan;
4881
- }
4882
-
4883
- return na_ndloop(&ndf, 2, a1, a2);
4884
- }
4885
-
4886
- static void iter_dfloat_s_minimum(na_loop_t* const lp) {
4887
- size_t i, n;
4888
- char *p1, *p2, *p3;
4889
- ssize_t s1, s2, s3;
4890
-
4891
- INIT_COUNTER(lp, n);
4892
- INIT_PTR(lp, 0, p1, s1);
4893
- INIT_PTR(lp, 1, p2, s2);
4894
- INIT_PTR(lp, 2, p3, s3);
4895
-
4896
- for (i = 0; i < n; i++) {
4897
- dtype x, y, z;
4898
- GET_DATA_STRIDE(p1, s1, dtype, x);
4899
- GET_DATA_STRIDE(p2, s2, dtype, y);
4900
- GET_DATA(p3, dtype, z);
4901
- z = f_minimum(x, y);
4902
- SET_DATA_STRIDE(p3, s3, dtype, z);
4903
- }
4904
- }
4905
- static void iter_dfloat_s_minimum_nan(na_loop_t* const lp) {
4906
- size_t i, n;
4907
- char *p1, *p2, *p3;
4908
- ssize_t s1, s2, s3;
4909
-
4910
- INIT_COUNTER(lp, n);
4911
- INIT_PTR(lp, 0, p1, s1);
4912
- INIT_PTR(lp, 1, p2, s2);
4913
- INIT_PTR(lp, 2, p3, s3);
4914
-
4915
- for (i = 0; i < n; i++) {
4916
- dtype x, y, z;
4917
- GET_DATA_STRIDE(p1, s1, dtype, x);
4918
- GET_DATA_STRIDE(p2, s2, dtype, y);
4919
- GET_DATA(p3, dtype, z);
4920
- z = f_minimum_nan(x, y);
4921
- SET_DATA_STRIDE(p3, s3, dtype, z);
4922
- }
4923
- }
4924
-
4925
- static VALUE dfloat_s_minimum(int argc, VALUE* argv, VALUE mod) {
4926
- VALUE a1 = Qnil;
4927
- VALUE a2 = Qnil;
4928
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
4929
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4930
- ndfunc_t ndf = { iter_dfloat_s_minimum, STRIDE_LOOP_NIP, 2, 1, ain, aout };
4931
-
4932
- VALUE kw_hash = Qnil;
4933
- ID kw_table[1] = { id_nan };
4934
- VALUE opts[1] = { Qundef };
4935
-
4936
- rb_scan_args(argc, argv, "20:", &a1, &a2, &kw_hash);
4937
- rb_get_kwargs(kw_hash, kw_table, 0, 1, opts);
4938
- if (opts[0] != Qundef) {
4939
- ndf.func = iter_dfloat_s_minimum_nan;
4940
- }
4941
-
4942
- return na_ndloop(&ndf, 2, a1, a2);
4943
- }
4944
-
4945
- static void iter_dfloat_cumsum(na_loop_t* const lp) {
4946
- size_t i;
4947
- char *p1, *p2;
4948
- ssize_t s1, s2;
4949
- dtype x, y;
4950
-
4951
- INIT_COUNTER(lp, i);
4952
- INIT_PTR(lp, 0, p1, s1);
4953
- INIT_PTR(lp, 1, p2, s2);
4954
-
4955
- GET_DATA_STRIDE(p1, s1, dtype, x);
4956
- SET_DATA_STRIDE(p2, s2, dtype, x);
4957
- for (i--; i--;) {
4958
- GET_DATA_STRIDE(p1, s1, dtype, y);
4959
- m_cumsum(x, y);
4960
- SET_DATA_STRIDE(p2, s2, dtype, x);
4961
- }
4962
- }
4963
- static void iter_dfloat_cumsum_nan(na_loop_t* const lp) {
4964
- size_t i;
4965
- char *p1, *p2;
4966
- ssize_t s1, s2;
4967
- dtype x, y;
4968
-
4969
- INIT_COUNTER(lp, i);
4970
- INIT_PTR(lp, 0, p1, s1);
4971
- INIT_PTR(lp, 1, p2, s2);
4972
-
4973
- GET_DATA_STRIDE(p1, s1, dtype, x);
4974
- SET_DATA_STRIDE(p2, s2, dtype, x);
4975
- for (i--; i--;) {
4976
- GET_DATA_STRIDE(p1, s1, dtype, y);
4977
- m_cumsum_nan(x, y);
4978
- SET_DATA_STRIDE(p2, s2, dtype, x);
4979
- }
4980
- }
4981
-
4982
- static VALUE dfloat_cumsum(int argc, VALUE* argv, VALUE self) {
4983
- VALUE reduce;
4984
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
4985
- ndfunc_arg_out_t aout[1] = { { cT, 0 } };
4986
- ndfunc_t ndf = {
4987
- iter_dfloat_cumsum, STRIDE_LOOP | NDF_FLAT_REDUCE | NDF_CUM, 2, 1, ain, aout
4988
- };
4989
-
4990
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_cumsum_nan);
4991
-
4992
- return na_ndloop(&ndf, 2, self, reduce);
4993
- }
4994
-
4995
- static void iter_dfloat_cumprod(na_loop_t* const lp) {
4996
- size_t i;
4997
- char *p1, *p2;
4998
- ssize_t s1, s2;
4999
- dtype x, y;
5000
-
5001
- INIT_COUNTER(lp, i);
5002
- INIT_PTR(lp, 0, p1, s1);
5003
- INIT_PTR(lp, 1, p2, s2);
5004
-
5005
- GET_DATA_STRIDE(p1, s1, dtype, x);
5006
- SET_DATA_STRIDE(p2, s2, dtype, x);
5007
- for (i--; i--;) {
5008
- GET_DATA_STRIDE(p1, s1, dtype, y);
5009
- m_cumprod(x, y);
5010
- SET_DATA_STRIDE(p2, s2, dtype, x);
5011
- }
5012
- }
5013
- static void iter_dfloat_cumprod_nan(na_loop_t* const lp) {
5014
- size_t i;
5015
- char *p1, *p2;
5016
- ssize_t s1, s2;
5017
- dtype x, y;
5018
-
5019
- INIT_COUNTER(lp, i);
5020
- INIT_PTR(lp, 0, p1, s1);
5021
- INIT_PTR(lp, 1, p2, s2);
5022
-
5023
- GET_DATA_STRIDE(p1, s1, dtype, x);
5024
- SET_DATA_STRIDE(p2, s2, dtype, x);
5025
- for (i--; i--;) {
5026
- GET_DATA_STRIDE(p1, s1, dtype, y);
5027
- m_cumprod_nan(x, y);
5028
- SET_DATA_STRIDE(p2, s2, dtype, x);
1954
+ //
1955
+ }
5029
1956
  }
5030
1957
  }
5031
1958
 
5032
- static VALUE dfloat_cumprod(int argc, VALUE* argv, VALUE self) {
5033
- VALUE reduce;
5034
- ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
1959
+ static VALUE dfloat_square(VALUE self) {
1960
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
5035
1961
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
5036
- ndfunc_t ndf = {
5037
- iter_dfloat_cumprod, STRIDE_LOOP | NDF_FLAT_REDUCE | NDF_CUM, 2, 1, ain, aout
5038
- };
5039
-
5040
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_cumprod_nan);
1962
+ ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
5041
1963
 
5042
- return na_ndloop(&ndf, 2, self, reduce);
1964
+ return na_ndloop(&ndf, 1, self);
5043
1965
  }
5044
1966
 
5045
- //
5046
- static void iter_dfloat_mulsum(na_loop_t* const lp) {
5047
- size_t i, n;
1967
+ #define check_intdivzero(y) \
1968
+ {}
1969
+
1970
+ static void iter_dfloat_copysign(na_loop_t* const lp) {
1971
+ size_t i = 0;
1972
+ size_t n;
5048
1973
  char *p1, *p2, *p3;
5049
1974
  ssize_t s1, s2, s3;
5050
1975
 
@@ -5053,376 +1978,200 @@ static void iter_dfloat_mulsum(na_loop_t* const lp) {
5053
1978
  INIT_PTR(lp, 1, p2, s2);
5054
1979
  INIT_PTR(lp, 2, p3, s3);
5055
1980
 
5056
- if (s3 == 0) {
5057
- dtype z;
5058
- // Reduce loop
5059
- GET_DATA(p3, dtype, z);
5060
- for (i = 0; i < n; i++) {
5061
- dtype x, y;
5062
- GET_DATA_STRIDE(p1, s1, dtype, x);
5063
- GET_DATA_STRIDE(p2, s2, dtype, y);
5064
- m_mulsum(x, y, z);
5065
- }
5066
- SET_DATA(p3, dtype, z);
5067
- return;
5068
- } else {
5069
- for (i = 0; i < n; i++) {
5070
- dtype x, y, z;
5071
- GET_DATA_STRIDE(p1, s1, dtype, x);
5072
- GET_DATA_STRIDE(p2, s2, dtype, y);
5073
- GET_DATA(p3, dtype, z);
5074
- m_mulsum(x, y, z);
5075
- SET_DATA_STRIDE(p3, s3, dtype, z);
1981
+ //
1982
+ if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
1983
+ is_aligned(p3, sizeof(dtype))) {
1984
+
1985
+ if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
1986
+ if (p1 == p3) { // inplace case
1987
+ for (; i < n; i++) {
1988
+ check_intdivzero(((dtype*)p2)[i]);
1989
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1990
+ }
1991
+ } else {
1992
+ for (; i < n; i++) {
1993
+ check_intdivzero(((dtype*)p2)[i]);
1994
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
1995
+ }
1996
+ }
1997
+ return;
5076
1998
  }
5077
- }
5078
- }
5079
- //
5080
- static void iter_dfloat_mulsum_nan(na_loop_t* const lp) {
5081
- size_t i, n;
5082
- char *p1, *p2, *p3;
5083
- ssize_t s1, s2, s3;
5084
1999
 
5085
- INIT_COUNTER(lp, n);
5086
- INIT_PTR(lp, 0, p1, s1);
5087
- INIT_PTR(lp, 1, p2, s2);
5088
- INIT_PTR(lp, 2, p3, s3);
2000
+ if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
2001
+ is_aligned_step(s3, sizeof(dtype))) {
2002
+ //
5089
2003
 
5090
- if (s3 == 0) {
5091
- dtype z;
5092
- // Reduce loop
5093
- GET_DATA(p3, dtype, z);
5094
- for (i = 0; i < n; i++) {
5095
- dtype x, y;
5096
- GET_DATA_STRIDE(p1, s1, dtype, x);
5097
- GET_DATA_STRIDE(p2, s2, dtype, y);
5098
- m_mulsum_nan(x, y, z);
5099
- }
5100
- SET_DATA(p3, dtype, z);
5101
- return;
5102
- } else {
5103
- for (i = 0; i < n; i++) {
5104
- dtype x, y, z;
5105
- GET_DATA_STRIDE(p1, s1, dtype, x);
5106
- GET_DATA_STRIDE(p2, s2, dtype, y);
5107
- GET_DATA(p3, dtype, z);
5108
- m_mulsum_nan(x, y, z);
5109
- SET_DATA_STRIDE(p3, s3, dtype, z);
2004
+ if (s2 == 0) { // Broadcasting from scalar value.
2005
+ check_intdivzero(*(dtype*)p2);
2006
+ if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
2007
+ if (p1 == p3) { // inplace case
2008
+ for (; i < n; i++) {
2009
+ ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2010
+ }
2011
+ } else {
2012
+ for (; i < n; i++) {
2013
+ ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
2014
+ }
2015
+ }
2016
+ } else {
2017
+ for (i = 0; i < n; i++) {
2018
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2019
+ p1 += s1;
2020
+ p3 += s3;
2021
+ }
2022
+ }
2023
+ } else {
2024
+ if (p1 == p3) { // inplace case
2025
+ for (i = 0; i < n; i++) {
2026
+ check_intdivzero(*(dtype*)p2);
2027
+ *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2028
+ p1 += s1;
2029
+ p2 += s2;
2030
+ }
2031
+ } else {
2032
+ for (i = 0; i < n; i++) {
2033
+ check_intdivzero(*(dtype*)p2);
2034
+ *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
2035
+ p1 += s1;
2036
+ p2 += s2;
2037
+ p3 += s3;
2038
+ }
2039
+ }
2040
+ }
2041
+
2042
+ return;
2043
+ //
5110
2044
  }
5111
2045
  }
2046
+ for (i = 0; i < n; i++) {
2047
+ dtype x, y, z;
2048
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2049
+ GET_DATA_STRIDE(p2, s2, dtype, y);
2050
+ check_intdivzero(y);
2051
+ z = m_copysign(x, y);
2052
+ SET_DATA_STRIDE(p3, s3, dtype, z);
2053
+ }
2054
+ //
5112
2055
  }
5113
- //
2056
+ #undef check_intdivzero
5114
2057
 
5115
- static VALUE dfloat_mulsum_self(int argc, VALUE* argv, VALUE self) {
5116
- VALUE v, reduce;
5117
- VALUE naryv[2];
5118
- ndfunc_arg_in_t ain[4] = { { cT, 0 }, { cT, 0 }, { sym_reduce, 0 }, { sym_init, 0 } };
2058
+ static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
2059
+ ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
5119
2060
  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
5120
- ndfunc_t ndf = { iter_dfloat_mulsum, STRIDE_LOOP_NIP, 4, 1, ain, aout };
5121
-
5122
- if (argc < 1) {
5123
- rb_raise(rb_eArgError, "wrong number of arguments (%d for >=1)", argc);
5124
- }
5125
- // should fix below: [self.ndim,other.ndim].max or?
5126
- naryv[0] = self;
5127
- naryv[1] = argv[0];
5128
- //
5129
- reduce = na_reduce_dimension(argc - 1, argv + 1, 2, naryv, &ndf, iter_dfloat_mulsum_nan);
5130
- //
2061
+ ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
5131
2062
 
5132
- v = na_ndloop(&ndf, 4, self, argv[0], reduce, m_mulsum_init);
5133
- return dfloat_extract(v);
2063
+ return na_ndloop(&ndf, 2, self, other);
5134
2064
  }
5135
2065
 
5136
- static VALUE dfloat_mulsum(int argc, VALUE* argv, VALUE self) {
5137
- //
2066
+ static VALUE dfloat_copysign(VALUE self, VALUE other) {
2067
+
5138
2068
  VALUE klass, v;
5139
- //
5140
- if (argc < 1) {
5141
- rb_raise(rb_eArgError, "wrong number of arguments (%d for >=1)", argc);
5142
- }
5143
- //
5144
- klass = na_upcast(rb_obj_class(self), rb_obj_class(argv[0]));
2069
+
2070
+ klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
5145
2071
  if (klass == cT) {
5146
- return dfloat_mulsum_self(argc, argv, self);
2072
+ return dfloat_copysign_self(self, other);
5147
2073
  } else {
5148
2074
  v = rb_funcall(klass, id_cast, 1, self);
5149
- //
5150
- return rb_funcallv_kw(v, rb_intern("mulsum"), argc, argv, RB_PASS_CALLED_KEYWORDS);
5151
- //
2075
+ return rb_funcall(v, id_copysign, 1, other);
5152
2076
  }
5153
- //
5154
2077
  }
5155
2078
 
5156
- typedef dtype seq_data_t;
5157
-
5158
- typedef double seq_count_t;
5159
-
5160
- typedef struct {
5161
- seq_data_t beg;
5162
- seq_data_t step;
5163
- seq_count_t count;
5164
- } seq_opt_t;
5165
-
5166
- static void iter_dfloat_seq(na_loop_t* const lp) {
2079
+ static void iter_dfloat_signbit(na_loop_t* const lp) {
5167
2080
  size_t i;
5168
2081
  char* p1;
5169
- ssize_t s1;
2082
+ BIT_DIGIT* a2;
2083
+ size_t p2;
2084
+ ssize_t s1, s2;
5170
2085
  size_t* idx1;
5171
2086
  dtype x;
5172
- seq_data_t beg, step;
5173
- seq_count_t c;
5174
- seq_opt_t* g;
5175
-
2087
+ BIT_DIGIT b;
5176
2088
  INIT_COUNTER(lp, i);
5177
2089
  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
5178
- g = (seq_opt_t*)(lp->opt_ptr);
5179
- beg = g->beg;
5180
- step = g->step;
5181
- c = g->count;
2090
+ INIT_PTR_BIT(lp, 1, a2, p2, s2);
5182
2091
  if (idx1) {
5183
2092
  for (; i--;) {
5184
- x = f_seq(beg, step, c++);
5185
- *(dtype*)(p1 + *idx1) = x;
5186
- idx1++;
2093
+ GET_DATA_INDEX(p1, idx1, dtype, x);
2094
+ b = (m_signbit(x)) ? 1 : 0;
2095
+ STORE_BIT(a2, p2, b);
2096
+ p2 += s2;
5187
2097
  }
5188
2098
  } else {
5189
2099
  for (; i--;) {
5190
- x = f_seq(beg, step, c++);
5191
- *(dtype*)(p1) = x;
5192
- p1 += s1;
2100
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2101
+ b = (m_signbit(x)) ? 1 : 0;
2102
+ STORE_BIT(a2, p2, b);
2103
+ p2 += s2;
5193
2104
  }
5194
2105
  }
5195
- g->count = c;
5196
2106
  }
5197
2107
 
5198
- static VALUE dfloat_seq(int argc, VALUE* args, VALUE self) {
5199
- seq_opt_t* g;
5200
- VALUE vbeg = Qnil, vstep = Qnil;
5201
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
5202
- ndfunc_t ndf = { iter_dfloat_seq, FULL_LOOP, 1, 0, ain, 0 };
5203
-
5204
- g = ALLOCA_N(seq_opt_t, 1);
5205
- g->beg = m_zero;
5206
- g->step = m_one;
5207
- g->count = 0;
5208
- rb_scan_args(argc, args, "02", &vbeg, &vstep);
5209
- if (vbeg != Qnil) {
5210
- g->beg = m_num_to_data(vbeg);
5211
- }
5212
- if (vstep != Qnil) {
5213
- g->step = m_num_to_data(vstep);
5214
- }
2108
+ static VALUE dfloat_signbit(VALUE self) {
2109
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2110
+ ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
2111
+ ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
5215
2112
 
5216
- na_ndloop3(&ndf, g, 1, self);
5217
- return self;
2113
+ return na_ndloop(&ndf, 1, self);
5218
2114
  }
5219
2115
 
5220
- typedef struct {
5221
- seq_data_t beg;
5222
- seq_data_t step;
5223
- seq_data_t base;
5224
- seq_count_t count;
5225
- } logseq_opt_t;
5226
-
5227
- static void iter_dfloat_logseq(na_loop_t* const lp) {
2116
+ static void iter_dfloat_modf(na_loop_t* const lp) {
5228
2117
  size_t i;
5229
- char* p1;
5230
- ssize_t s1;
5231
- size_t* idx1;
5232
- dtype x;
5233
- seq_data_t beg, step, base;
5234
- seq_count_t c;
5235
- logseq_opt_t* g;
5236
-
2118
+ char *p1, *p2, *p3;
2119
+ ssize_t s1, s2, s3;
2120
+ dtype x, y, z;
5237
2121
  INIT_COUNTER(lp, i);
5238
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
5239
- g = (logseq_opt_t*)(lp->opt_ptr);
5240
- beg = g->beg;
5241
- step = g->step;
5242
- base = g->base;
5243
- c = g->count;
5244
- if (idx1) {
5245
- for (; i--;) {
5246
- x = f_seq(beg, step, c++);
5247
- *(dtype*)(p1 + *idx1) = m_pow(base, x);
5248
- idx1++;
5249
- }
5250
- } else {
5251
- for (; i--;) {
5252
- x = f_seq(beg, step, c++);
5253
- *(dtype*)(p1) = m_pow(base, x);
5254
- p1 += s1;
5255
- }
2122
+ INIT_PTR(lp, 0, p1, s1);
2123
+ INIT_PTR(lp, 1, p2, s2);
2124
+ INIT_PTR(lp, 2, p3, s3);
2125
+ for (; i--;) {
2126
+ GET_DATA_STRIDE(p1, s1, dtype, x);
2127
+ m_modf(x, y, z);
2128
+ SET_DATA_STRIDE(p2, s2, dtype, y);
2129
+ SET_DATA_STRIDE(p3, s3, dtype, z);
5256
2130
  }
5257
- g->count = c;
5258
2131
  }
5259
2132
 
5260
- static VALUE dfloat_logseq(int argc, VALUE* args, VALUE self) {
5261
- logseq_opt_t* g;
5262
- VALUE vbeg, vstep, vbase;
5263
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
5264
- ndfunc_t ndf = { iter_dfloat_logseq, FULL_LOOP, 1, 0, ain, 0 };
5265
-
5266
- g = ALLOCA_N(logseq_opt_t, 1);
5267
- rb_scan_args(argc, args, "21", &vbeg, &vstep, &vbase);
5268
- g->beg = m_num_to_data(vbeg);
5269
- g->step = m_num_to_data(vstep);
5270
- if (vbase == Qnil) {
5271
- g->base = m_from_real(10);
5272
- } else {
5273
- g->base = m_num_to_data(vbase);
5274
- }
5275
- na_ndloop3(&ndf, g, 1, self);
5276
- return self;
5277
- }
2133
+ static VALUE dfloat_modf(VALUE self) {
2134
+ ndfunc_arg_in_t ain[1] = { { cT, 0 } };
2135
+ ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
2136
+ ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
5278
2137
 
5279
- static void iter_dfloat_eye(na_loop_t* const lp) {
5280
- size_t n0, n1;
5281
- size_t i0, i1;
5282
- ssize_t s0, s1;
5283
- char *p0, *p1;
5284
- char* g;
5285
- ssize_t kofs;
5286
- dtype data;
5287
-
5288
- g = (char*)(lp->opt_ptr);
5289
- kofs = *(ssize_t*)g;
5290
- data = *(dtype*)(g + sizeof(ssize_t));
5291
-
5292
- n0 = lp->args[0].shape[0];
5293
- n1 = lp->args[0].shape[1];
5294
- s0 = lp->args[0].iter[0].step;
5295
- s1 = lp->args[0].iter[1].step;
5296
- p0 = NDL_PTR(lp, 0);
5297
-
5298
- for (i0 = 0; i0 < n0; i0++) {
5299
- p1 = p0;
5300
- for (i1 = 0; i1 < n1; i1++) {
5301
- *(dtype*)p1 = (i0 + kofs == i1) ? data : m_zero;
5302
- p1 += s1;
5303
- }
5304
- p0 += s0;
5305
- }
2138
+ return na_ndloop(&ndf, 1, self);
5306
2139
  }
5307
2140
 
5308
- static VALUE dfloat_eye(int argc, VALUE* argv, VALUE self) {
5309
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 2 } };
5310
- ndfunc_t ndf = { iter_dfloat_eye, NO_LOOP, 1, 0, ain, 0 };
5311
- ssize_t kofs;
5312
- dtype data;
5313
- char* g;
5314
- int nd;
5315
- narray_t* na;
5316
-
5317
- // check arguments
5318
- if (argc > 2) {
5319
- rb_raise(rb_eArgError, "too many arguments (%d for 0..2)", argc);
5320
- } else if (argc == 2) {
5321
- data = m_num_to_data(argv[0]);
5322
- kofs = NUM2SSIZET(argv[1]);
5323
- } else if (argc == 1) {
5324
- data = m_num_to_data(argv[0]);
5325
- kofs = 0;
5326
- } else {
5327
- data = m_one;
5328
- kofs = 0;
5329
- }
5330
-
5331
- GetNArray(self, na);
5332
- nd = na->ndim;
5333
- if (nd < 2) {
5334
- rb_raise(nary_eDimensionError, "less than 2-d array");
5335
- }
5336
-
5337
- // Diagonal offset from the main diagonal.
5338
- if (kofs >= 0) {
5339
- if ((size_t)(kofs) >= na->shape[nd - 1]) {
5340
- rb_raise(
5341
- rb_eArgError,
5342
- "invalid diagonal offset(%" SZF "d) for "
5343
- "last dimension size(%" SZF "d)",
5344
- kofs, na->shape[nd - 1]
5345
- );
5346
- }
5347
- } else {
5348
- if ((size_t)(-kofs) >= na->shape[nd - 2]) {
5349
- rb_raise(
5350
- rb_eArgError,
5351
- "invalid diagonal offset(%" SZF "d) for "
5352
- "last-1 dimension size(%" SZF "d)",
5353
- kofs, na->shape[nd - 2]
5354
- );
5355
- }
5356
- }
2141
+ static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
2142
+ size_t n;
2143
+ char *p1, *p2;
2144
+ ssize_t s1;
5357
2145
 
5358
- g = ALLOCA_N(char, sizeof(ssize_t) + sizeof(dtype));
5359
- *(ssize_t*)g = kofs;
5360
- *(dtype*)(g + sizeof(ssize_t)) = data;
2146
+ INIT_COUNTER(lp, n);
2147
+ INIT_PTR(lp, 0, p1, s1);
2148
+ p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
5361
2149
 
5362
- na_ndloop3(&ndf, g, 1, self);
5363
- return self;
2150
+ *(dtype*)p2 = f_kahan_sum(n, p1, s1);
5364
2151
  }
5365
-
5366
- typedef struct {
5367
- dtype low;
5368
- dtype max;
5369
- } rand_opt_t;
5370
-
5371
- static void iter_dfloat_rand(na_loop_t* const lp) {
5372
- size_t i;
5373
- char* p1;
2152
+ static void iter_dfloat_kahan_sum_nan(na_loop_t* const lp) {
2153
+ size_t n;
2154
+ char *p1, *p2;
5374
2155
  ssize_t s1;
5375
- size_t* idx1;
5376
- dtype x;
5377
- rand_opt_t* g;
5378
- dtype low;
5379
- dtype max;
5380
2156
 
5381
- INIT_COUNTER(lp, i);
5382
- INIT_PTR_IDX(lp, 0, p1, s1, idx1);
5383
- g = (rand_opt_t*)(lp->opt_ptr);
5384
- low = g->low;
5385
- max = g->max;
2157
+ INIT_COUNTER(lp, n);
2158
+ INIT_PTR(lp, 0, p1, s1);
2159
+ p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
5386
2160
 
5387
- if (idx1) {
5388
- for (; i--;) {
5389
- x = m_add(m_rand(max), low);
5390
- SET_DATA_INDEX(p1, idx1, dtype, x);
5391
- }
5392
- } else {
5393
- for (; i--;) {
5394
- x = m_add(m_rand(max), low);
5395
- SET_DATA_STRIDE(p1, s1, dtype, x);
5396
- }
5397
- }
2161
+ *(dtype*)p2 = f_kahan_sum_nan(n, p1, s1);
5398
2162
  }
5399
2163
 
5400
- static VALUE dfloat_rand(int argc, VALUE* args, VALUE self) {
5401
- rand_opt_t g;
5402
- VALUE v1 = Qnil, v2 = Qnil;
5403
- dtype high;
5404
- ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
5405
- ndfunc_t ndf = { iter_dfloat_rand, FULL_LOOP, 1, 0, ain, 0 };
5406
-
5407
- rb_scan_args(argc, args, "02", &v1, &v2);
5408
- if (v2 == Qnil) {
5409
- g.low = m_zero;
5410
- if (v1 == Qnil) {
5411
-
5412
- g.max = high = m_one;
2164
+ static VALUE dfloat_kahan_sum(int argc, VALUE* argv, VALUE self) {
2165
+ VALUE v, reduce;
2166
+ ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
2167
+ ndfunc_arg_out_t aout[1] = { { cT, 0 } };
2168
+ ndfunc_t ndf = { iter_dfloat_kahan_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
5413
2169
 
5414
- } else {
5415
- g.max = high = m_num_to_data(v1);
5416
- }
2170
+ reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_kahan_sum_nan);
5417
2171
 
5418
- } else {
5419
- g.low = m_num_to_data(v1);
5420
- high = m_num_to_data(v2);
5421
- g.max = m_sub(high, g.low);
5422
- }
2172
+ v = na_ndloop(&ndf, 2, self, reduce);
5423
2173
 
5424
- na_ndloop3(&ndf, &g, 1, self);
5425
- return self;
2174
+ return dfloat_extract(v);
5426
2175
  }
5427
2176
 
5428
2177
  typedef struct {