numo-narray-alt 0.10.5 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/ext/numo/narray/extconf.rb +2 -0
- data/ext/numo/narray/numo/narray.h +3 -3
- data/ext/numo/narray/src/mh/math/sqrt.h +372 -0
- data/ext/numo/narray/src/mh/mulsum.h +1360 -74
- data/ext/numo/narray/src/mh/op/add.h +20 -0
- data/ext/numo/narray/src/mh/op/binary_func.h +542 -0
- data/ext/numo/narray/src/mh/op/div.h +20 -0
- data/ext/numo/narray/src/mh/op/mul.h +20 -0
- data/ext/numo/narray/src/mh/op/sub.h +20 -0
- data/ext/numo/narray/src/mh/sort.h +4 -4
- data/ext/numo/narray/src/t_bit.c +0 -5
- data/ext/numo/narray/src/t_dcomplex.c +0 -5
- data/ext/numo/narray/src/t_dfloat.c +35 -3
- data/ext/numo/narray/src/t_int16.c +0 -5
- data/ext/numo/narray/src/t_int32.c +0 -5
- data/ext/numo/narray/src/t_int64.c +0 -5
- data/ext/numo/narray/src/t_int8.c +0 -5
- data/ext/numo/narray/src/t_robject.c +0 -5
- data/ext/numo/narray/src/t_scomplex.c +0 -5
- data/ext/numo/narray/src/t_sfloat.c +35 -3
- data/ext/numo/narray/src/t_uint16.c +0 -5
- data/ext/numo/narray/src/t_uint32.c +0 -5
- data/ext/numo/narray/src/t_uint64.c +0 -5
- data/ext/numo/narray/src/t_uint8.c +0 -5
- metadata +2 -2
|
@@ -75,4 +75,24 @@
|
|
|
75
75
|
DEF_BINARY_SELF_FUNC(mul, dfloat, numo_cDFloat) \
|
|
76
76
|
DEF_BINARY_FUNC(mul, '*', dfloat, numo_cDFloat)
|
|
77
77
|
|
|
78
|
+
#define DEF_NARRAY_SFLT_MUL_AVX_METHOD_FUNC() \
|
|
79
|
+
DEF_BINARY_SFLT_AVX_ITER_FUNC(mul, _mm256_mul_ps) \
|
|
80
|
+
DEF_BINARY_SELF_FUNC(mul, sfloat, numo_cSFloat) \
|
|
81
|
+
DEF_BINARY_FUNC(mul, '*', sfloat, numo_cSFloat)
|
|
82
|
+
|
|
83
|
+
#define DEF_NARRAY_DFLT_MUL_AVX_METHOD_FUNC() \
|
|
84
|
+
DEF_BINARY_DFLT_AVX_ITER_FUNC(mul, _mm256_mul_pd) \
|
|
85
|
+
DEF_BINARY_SELF_FUNC(mul, dfloat, numo_cDFloat) \
|
|
86
|
+
DEF_BINARY_FUNC(mul, '*', dfloat, numo_cDFloat)
|
|
87
|
+
|
|
88
|
+
#define DEF_NARRAY_SFLT_MUL_NEON_METHOD_FUNC() \
|
|
89
|
+
DEF_BINARY_SFLT_NEON_ITER_FUNC(mul, vmulq_f32) \
|
|
90
|
+
DEF_BINARY_SELF_FUNC(mul, sfloat, numo_cSFloat) \
|
|
91
|
+
DEF_BINARY_FUNC(mul, '*', sfloat, numo_cSFloat)
|
|
92
|
+
|
|
93
|
+
#define DEF_NARRAY_DFLT_MUL_NEON_METHOD_FUNC() \
|
|
94
|
+
DEF_BINARY_DFLT_NEON_ITER_FUNC(mul, vmulq_f64) \
|
|
95
|
+
DEF_BINARY_SELF_FUNC(mul, dfloat, numo_cDFloat) \
|
|
96
|
+
DEF_BINARY_FUNC(mul, '*', dfloat, numo_cDFloat)
|
|
97
|
+
|
|
78
98
|
#endif /* NUMO_NARRAY_MH_OP_MUL_H */
|
|
@@ -75,4 +75,24 @@
|
|
|
75
75
|
DEF_BINARY_SELF_FUNC(sub, dfloat, numo_cDFloat) \
|
|
76
76
|
DEF_BINARY_FUNC(sub, '-', dfloat, numo_cDFloat)
|
|
77
77
|
|
|
78
|
+
#define DEF_NARRAY_SFLT_SUB_AVX_METHOD_FUNC() \
|
|
79
|
+
DEF_BINARY_SFLT_AVX_ITER_FUNC(sub, _mm256_sub_ps) \
|
|
80
|
+
DEF_BINARY_SELF_FUNC(sub, sfloat, numo_cSFloat) \
|
|
81
|
+
DEF_BINARY_FUNC(sub, '-', sfloat, numo_cSFloat)
|
|
82
|
+
|
|
83
|
+
#define DEF_NARRAY_DFLT_SUB_AVX_METHOD_FUNC() \
|
|
84
|
+
DEF_BINARY_DFLT_AVX_ITER_FUNC(sub, _mm256_sub_pd) \
|
|
85
|
+
DEF_BINARY_SELF_FUNC(sub, dfloat, numo_cDFloat) \
|
|
86
|
+
DEF_BINARY_FUNC(sub, '-', dfloat, numo_cDFloat)
|
|
87
|
+
|
|
88
|
+
#define DEF_NARRAY_SFLT_SUB_NEON_METHOD_FUNC() \
|
|
89
|
+
DEF_BINARY_SFLT_NEON_ITER_FUNC(sub, vsubq_f32) \
|
|
90
|
+
DEF_BINARY_SELF_FUNC(sub, sfloat, numo_cSFloat) \
|
|
91
|
+
DEF_BINARY_FUNC(sub, '-', sfloat, numo_cSFloat)
|
|
92
|
+
|
|
93
|
+
#define DEF_NARRAY_DFLT_SUB_NEON_METHOD_FUNC() \
|
|
94
|
+
DEF_BINARY_DFLT_NEON_ITER_FUNC(sub, vsubq_f64) \
|
|
95
|
+
DEF_BINARY_SELF_FUNC(sub, dfloat, numo_cDFloat) \
|
|
96
|
+
DEF_BINARY_FUNC(sub, '-', dfloat, numo_cDFloat)
|
|
97
|
+
|
|
78
98
|
#endif /* NUMO_NARRAY_MH_OP_SUB_H */
|
|
@@ -321,9 +321,9 @@ static inline void swapfunc(char* a, char* b, size_t n, int swaptype) {
|
|
|
321
321
|
rb_funcall(idx, rb_intern("seq"), 0); \
|
|
322
322
|
size_t size = na->size * sizeof(void*); \
|
|
323
323
|
VALUE tmp; \
|
|
324
|
-
char* buf =
|
|
324
|
+
char* buf = RB_ALLOCV(tmp, size); \
|
|
325
325
|
VALUE res = na_ndloop3(&ndf, buf, 3, self, idx, reduce); \
|
|
326
|
-
|
|
326
|
+
RB_ALLOCV_END(tmp); \
|
|
327
327
|
return res; \
|
|
328
328
|
}
|
|
329
329
|
|
|
@@ -475,9 +475,9 @@ static inline void swapfunc(char* a, char* b, size_t n, int swaptype) {
|
|
|
475
475
|
rb_funcall(idx, rb_intern("seq"), 0); \
|
|
476
476
|
size_t size = na->size * sizeof(void*); \
|
|
477
477
|
VALUE tmp; \
|
|
478
|
-
char* buf =
|
|
478
|
+
char* buf = RB_ALLOCV(tmp, size); \
|
|
479
479
|
VALUE res = na_ndloop3(&ndf, buf, 3, self, idx, reduce); \
|
|
480
|
-
|
|
480
|
+
RB_ALLOCV_END(tmp); \
|
|
481
481
|
return res; \
|
|
482
482
|
}
|
|
483
483
|
|
data/ext/numo/narray/src/t_bit.c
CHANGED
|
@@ -14,11 +14,21 @@
|
|
|
14
14
|
|
|
15
15
|
#define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
|
|
16
16
|
|
|
17
|
-
#
|
|
17
|
+
#if !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
18
18
|
#include <emmintrin.h>
|
|
19
19
|
#define SIMD_ALIGNMENT_SIZE 16
|
|
20
20
|
#endif
|
|
21
21
|
|
|
22
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
23
|
+
#include <immintrin.h>
|
|
24
|
+
#define AVX_ALIGNMENT_SIZE 32
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
#if !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
28
|
+
#include <arm_neon.h>
|
|
29
|
+
#define NEON_ALIGNMENT_SIZE 16
|
|
30
|
+
#endif
|
|
31
|
+
|
|
22
32
|
static ID id_pow;
|
|
23
33
|
static ID id_cast;
|
|
24
34
|
static ID id_copysign;
|
|
@@ -167,11 +177,21 @@ DEF_NARRAY_MAP_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
|
167
177
|
DEF_NARRAY_EACH_WITH_INDEX_METHOD_FUNC(dfloat)
|
|
168
178
|
DEF_NARRAY_MAP_WITH_INDEX_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
169
179
|
DEF_NARRAY_ABS_METHOD_FUNC(dfloat, numo_cDFloat, dfloat, numo_cDFloat)
|
|
170
|
-
#
|
|
180
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
181
|
+
DEF_NARRAY_DFLT_ADD_AVX_METHOD_FUNC()
|
|
182
|
+
DEF_NARRAY_DFLT_SUB_AVX_METHOD_FUNC()
|
|
183
|
+
DEF_NARRAY_DFLT_MUL_AVX_METHOD_FUNC()
|
|
184
|
+
DEF_NARRAY_DFLT_DIV_AVX_METHOD_FUNC()
|
|
185
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
171
186
|
DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
|
|
172
187
|
DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
|
|
173
188
|
DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
|
|
174
189
|
DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
|
|
190
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
191
|
+
DEF_NARRAY_DFLT_ADD_NEON_METHOD_FUNC()
|
|
192
|
+
DEF_NARRAY_DFLT_SUB_NEON_METHOD_FUNC()
|
|
193
|
+
DEF_NARRAY_DFLT_MUL_NEON_METHOD_FUNC()
|
|
194
|
+
DEF_NARRAY_DFLT_DIV_NEON_METHOD_FUNC()
|
|
175
195
|
#else
|
|
176
196
|
DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
177
197
|
DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
@@ -225,7 +245,15 @@ DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
|
225
245
|
DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
226
246
|
DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
227
247
|
DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
248
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
249
|
+
DEF_NARRAY_DFLT_MULSUM_AVX_METHOD_FUNC()
|
|
250
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
251
|
+
DEF_NARRAY_DFLT_MULSUM_SSE2_METHOD_FUNC()
|
|
252
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
253
|
+
DEF_NARRAY_DFLT_MULSUM_NEON_METHOD_FUNC()
|
|
254
|
+
#else
|
|
228
255
|
DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
256
|
+
#endif
|
|
229
257
|
DEF_NARRAY_FLT_SEQ_METHOD_FUNC(dfloat)
|
|
230
258
|
DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(dfloat)
|
|
231
259
|
DEF_NARRAY_EYE_METHOD_FUNC(dfloat)
|
|
@@ -243,8 +271,12 @@ DEF_NARRAY_FLT_SORT_METHOD_FUNC(dfloat)
|
|
|
243
271
|
#define qsort_cast **(dfloat**)
|
|
244
272
|
DEF_NARRAY_FLT_SORT_INDEX_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
245
273
|
DEF_NARRAY_FLT_MEDIAN_METHOD_FUNC(dfloat)
|
|
246
|
-
#
|
|
274
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
275
|
+
DEF_NARRAY_FLT_SQRT_AVX_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
276
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
247
277
|
DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
278
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
279
|
+
DEF_NARRAY_FLT_SQRT_NEON_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
248
280
|
#else
|
|
249
281
|
DEF_NARRAY_FLT_SQRT_METHOD_FUNC(dfloat, numo_cDFloat)
|
|
250
282
|
#endif
|
|
@@ -14,11 +14,21 @@
|
|
|
14
14
|
|
|
15
15
|
#define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
|
|
16
16
|
|
|
17
|
-
#
|
|
17
|
+
#if !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
18
18
|
#include <emmintrin.h>
|
|
19
19
|
#define SIMD_ALIGNMENT_SIZE 16
|
|
20
20
|
#endif
|
|
21
21
|
|
|
22
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
23
|
+
#include <immintrin.h>
|
|
24
|
+
#define AVX_ALIGNMENT_SIZE 32
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
#if !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
28
|
+
#include <arm_neon.h>
|
|
29
|
+
#define NEON_ALIGNMENT_SIZE 16
|
|
30
|
+
#endif
|
|
31
|
+
|
|
22
32
|
static ID id_pow;
|
|
23
33
|
static ID id_cast;
|
|
24
34
|
static ID id_copysign;
|
|
@@ -166,11 +176,21 @@ DEF_NARRAY_MAP_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
|
166
176
|
DEF_NARRAY_EACH_WITH_INDEX_METHOD_FUNC(sfloat)
|
|
167
177
|
DEF_NARRAY_MAP_WITH_INDEX_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
168
178
|
DEF_NARRAY_ABS_METHOD_FUNC(sfloat, numo_cSFloat, sfloat, numo_cSFloat)
|
|
169
|
-
#
|
|
179
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
180
|
+
DEF_NARRAY_SFLT_ADD_AVX_METHOD_FUNC()
|
|
181
|
+
DEF_NARRAY_SFLT_SUB_AVX_METHOD_FUNC()
|
|
182
|
+
DEF_NARRAY_SFLT_MUL_AVX_METHOD_FUNC()
|
|
183
|
+
DEF_NARRAY_SFLT_DIV_AVX_METHOD_FUNC()
|
|
184
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
170
185
|
DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
|
|
171
186
|
DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
|
|
172
187
|
DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
|
|
173
188
|
DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
|
|
189
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
190
|
+
DEF_NARRAY_SFLT_ADD_NEON_METHOD_FUNC()
|
|
191
|
+
DEF_NARRAY_SFLT_SUB_NEON_METHOD_FUNC()
|
|
192
|
+
DEF_NARRAY_SFLT_MUL_NEON_METHOD_FUNC()
|
|
193
|
+
DEF_NARRAY_SFLT_DIV_NEON_METHOD_FUNC()
|
|
174
194
|
#else
|
|
175
195
|
DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
176
196
|
DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
@@ -223,7 +243,15 @@ DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
|
223
243
|
DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
224
244
|
DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
225
245
|
DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
246
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
247
|
+
DEF_NARRAY_SFLT_MULSUM_AVX_METHOD_FUNC()
|
|
248
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
249
|
+
DEF_NARRAY_SFLT_MULSUM_SSE2_METHOD_FUNC()
|
|
250
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
251
|
+
DEF_NARRAY_SFLT_MULSUM_NEON_METHOD_FUNC()
|
|
252
|
+
#else
|
|
226
253
|
DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
254
|
+
#endif
|
|
227
255
|
DEF_NARRAY_FLT_SEQ_METHOD_FUNC(sfloat)
|
|
228
256
|
DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(sfloat)
|
|
229
257
|
DEF_NARRAY_EYE_METHOD_FUNC(sfloat)
|
|
@@ -241,8 +269,12 @@ DEF_NARRAY_FLT_SORT_METHOD_FUNC(sfloat)
|
|
|
241
269
|
#define qsort_cast **(sfloat**)
|
|
242
270
|
DEF_NARRAY_FLT_SORT_INDEX_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
243
271
|
DEF_NARRAY_FLT_MEDIAN_METHOD_FUNC(sfloat)
|
|
244
|
-
#
|
|
272
|
+
#if !defined(NUMO_NO_SIMD) && defined(__AVX__)
|
|
273
|
+
DEF_NARRAY_FLT_SQRT_AVX_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
274
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
|
|
245
275
|
DEF_NARRAY_FLT_SQRT_SSE2_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
276
|
+
#elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
|
|
277
|
+
DEF_NARRAY_FLT_SQRT_NEON_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
246
278
|
#else
|
|
247
279
|
DEF_NARRAY_FLT_SQRT_METHOD_FUNC(sfloat, numo_cSFloat)
|
|
248
280
|
#endif
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: numo-narray-alt
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.11.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yoshoku
|
|
@@ -214,7 +214,7 @@ licenses:
|
|
|
214
214
|
metadata:
|
|
215
215
|
homepage_uri: https://github.com/yoshoku/numo-narray-alt
|
|
216
216
|
changelog_uri: https://github.com/yoshoku/numo-narray-alt/blob/main/CHANGELOG.md
|
|
217
|
-
documentation_uri: https://gemdocs.org/gems/numo-narray-alt/0.
|
|
217
|
+
documentation_uri: https://gemdocs.org/gems/numo-narray-alt/0.11.0/
|
|
218
218
|
rubygems_mfa_required: 'true'
|
|
219
219
|
post_install_message: |
|
|
220
220
|
===
|