numo-narray-alt 0.10.5 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,4 +75,24 @@
75
75
  DEF_BINARY_SELF_FUNC(mul, dfloat, numo_cDFloat) \
76
76
  DEF_BINARY_FUNC(mul, '*', dfloat, numo_cDFloat)
77
77
 
78
+ #define DEF_NARRAY_SFLT_MUL_AVX_METHOD_FUNC() \
79
+ DEF_BINARY_SFLT_AVX_ITER_FUNC(mul, _mm256_mul_ps) \
80
+ DEF_BINARY_SELF_FUNC(mul, sfloat, numo_cSFloat) \
81
+ DEF_BINARY_FUNC(mul, '*', sfloat, numo_cSFloat)
82
+
83
+ #define DEF_NARRAY_DFLT_MUL_AVX_METHOD_FUNC() \
84
+ DEF_BINARY_DFLT_AVX_ITER_FUNC(mul, _mm256_mul_pd) \
85
+ DEF_BINARY_SELF_FUNC(mul, dfloat, numo_cDFloat) \
86
+ DEF_BINARY_FUNC(mul, '*', dfloat, numo_cDFloat)
87
+
88
+ #define DEF_NARRAY_SFLT_MUL_NEON_METHOD_FUNC() \
89
+ DEF_BINARY_SFLT_NEON_ITER_FUNC(mul, vmulq_f32) \
90
+ DEF_BINARY_SELF_FUNC(mul, sfloat, numo_cSFloat) \
91
+ DEF_BINARY_FUNC(mul, '*', sfloat, numo_cSFloat)
92
+
93
+ #define DEF_NARRAY_DFLT_MUL_NEON_METHOD_FUNC() \
94
+ DEF_BINARY_DFLT_NEON_ITER_FUNC(mul, vmulq_f64) \
95
+ DEF_BINARY_SELF_FUNC(mul, dfloat, numo_cDFloat) \
96
+ DEF_BINARY_FUNC(mul, '*', dfloat, numo_cDFloat)
97
+
78
98
  #endif /* NUMO_NARRAY_MH_OP_MUL_H */
@@ -75,4 +75,24 @@
75
75
  DEF_BINARY_SELF_FUNC(sub, dfloat, numo_cDFloat) \
76
76
  DEF_BINARY_FUNC(sub, '-', dfloat, numo_cDFloat)
77
77
 
78
+ #define DEF_NARRAY_SFLT_SUB_AVX_METHOD_FUNC() \
79
+ DEF_BINARY_SFLT_AVX_ITER_FUNC(sub, _mm256_sub_ps) \
80
+ DEF_BINARY_SELF_FUNC(sub, sfloat, numo_cSFloat) \
81
+ DEF_BINARY_FUNC(sub, '-', sfloat, numo_cSFloat)
82
+
83
+ #define DEF_NARRAY_DFLT_SUB_AVX_METHOD_FUNC() \
84
+ DEF_BINARY_DFLT_AVX_ITER_FUNC(sub, _mm256_sub_pd) \
85
+ DEF_BINARY_SELF_FUNC(sub, dfloat, numo_cDFloat) \
86
+ DEF_BINARY_FUNC(sub, '-', dfloat, numo_cDFloat)
87
+
88
+ #define DEF_NARRAY_SFLT_SUB_NEON_METHOD_FUNC() \
89
+ DEF_BINARY_SFLT_NEON_ITER_FUNC(sub, vsubq_f32) \
90
+ DEF_BINARY_SELF_FUNC(sub, sfloat, numo_cSFloat) \
91
+ DEF_BINARY_FUNC(sub, '-', sfloat, numo_cSFloat)
92
+
93
+ #define DEF_NARRAY_DFLT_SUB_NEON_METHOD_FUNC() \
94
+ DEF_BINARY_DFLT_NEON_ITER_FUNC(sub, vsubq_f64) \
95
+ DEF_BINARY_SELF_FUNC(sub, dfloat, numo_cDFloat) \
96
+ DEF_BINARY_FUNC(sub, '-', dfloat, numo_cDFloat)
97
+
78
98
  #endif /* NUMO_NARRAY_MH_OP_SUB_H */
@@ -321,9 +321,9 @@ static inline void swapfunc(char* a, char* b, size_t n, int swaptype) {
321
321
  rb_funcall(idx, rb_intern("seq"), 0); \
322
322
  size_t size = na->size * sizeof(void*); \
323
323
  VALUE tmp; \
324
- char* buf = rb_alloc_tmp_buffer(&tmp, size); \
324
+ char* buf = RB_ALLOCV(tmp, size); \
325
325
  VALUE res = na_ndloop3(&ndf, buf, 3, self, idx, reduce); \
326
- rb_free_tmp_buffer(&tmp); \
326
+ RB_ALLOCV_END(tmp); \
327
327
  return res; \
328
328
  }
329
329
 
@@ -475,9 +475,9 @@ static inline void swapfunc(char* a, char* b, size_t n, int swaptype) {
475
475
  rb_funcall(idx, rb_intern("seq"), 0); \
476
476
  size_t size = na->size * sizeof(void*); \
477
477
  VALUE tmp; \
478
- char* buf = rb_alloc_tmp_buffer(&tmp, size); \
478
+ char* buf = RB_ALLOCV(tmp, size); \
479
479
  VALUE res = na_ndloop3(&ndf, buf, 3, self, idx, reduce); \
480
- rb_free_tmp_buffer(&tmp); \
480
+ RB_ALLOCV_END(tmp); \
481
481
  return res; \
482
482
  }
483
483
 
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_cast;
23
18
  static ID id_divmod;
24
19
  static ID id_eq;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_cast;
24
19
  static ID id_copysign;
@@ -14,11 +14,21 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
17
+ #if !defined(NUMO_NO_SIMD) && defined(__SSE2__)
18
18
  #include <emmintrin.h>
19
19
  #define SIMD_ALIGNMENT_SIZE 16
20
20
  #endif
21
21
 
22
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
23
+ #include <immintrin.h>
24
+ #define AVX_ALIGNMENT_SIZE 32
25
+ #endif
26
+
27
+ #if !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
28
+ #include <arm_neon.h>
29
+ #define NEON_ALIGNMENT_SIZE 16
30
+ #endif
31
+
22
32
  static ID id_pow;
23
33
  static ID id_cast;
24
34
  static ID id_copysign;
@@ -167,11 +177,21 @@ DEF_NARRAY_MAP_METHOD_FUNC(dfloat, numo_cDFloat)
167
177
  DEF_NARRAY_EACH_WITH_INDEX_METHOD_FUNC(dfloat)
168
178
  DEF_NARRAY_MAP_WITH_INDEX_METHOD_FUNC(dfloat, numo_cDFloat)
169
179
  DEF_NARRAY_ABS_METHOD_FUNC(dfloat, numo_cDFloat, dfloat, numo_cDFloat)
170
- #ifdef __SSE2__
180
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
181
+ DEF_NARRAY_DFLT_ADD_AVX_METHOD_FUNC()
182
+ DEF_NARRAY_DFLT_SUB_AVX_METHOD_FUNC()
183
+ DEF_NARRAY_DFLT_MUL_AVX_METHOD_FUNC()
184
+ DEF_NARRAY_DFLT_DIV_AVX_METHOD_FUNC()
185
+ #elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
171
186
  DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
172
187
  DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
173
188
  DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
174
189
  DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
190
+ #elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
191
+ DEF_NARRAY_DFLT_ADD_NEON_METHOD_FUNC()
192
+ DEF_NARRAY_DFLT_SUB_NEON_METHOD_FUNC()
193
+ DEF_NARRAY_DFLT_MUL_NEON_METHOD_FUNC()
194
+ DEF_NARRAY_DFLT_DIV_NEON_METHOD_FUNC()
175
195
  #else
176
196
  DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
177
197
  DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
@@ -225,7 +245,15 @@ DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(dfloat, numo_cDFloat)
225
245
  DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(dfloat, numo_cDFloat)
226
246
  DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(dfloat, numo_cDFloat)
227
247
  DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(dfloat, numo_cDFloat)
248
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
249
+ DEF_NARRAY_DFLT_MULSUM_AVX_METHOD_FUNC()
250
+ #elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
251
+ DEF_NARRAY_DFLT_MULSUM_SSE2_METHOD_FUNC()
252
+ #elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
253
+ DEF_NARRAY_DFLT_MULSUM_NEON_METHOD_FUNC()
254
+ #else
228
255
  DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(dfloat, numo_cDFloat)
256
+ #endif
229
257
  DEF_NARRAY_FLT_SEQ_METHOD_FUNC(dfloat)
230
258
  DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(dfloat)
231
259
  DEF_NARRAY_EYE_METHOD_FUNC(dfloat)
@@ -243,8 +271,12 @@ DEF_NARRAY_FLT_SORT_METHOD_FUNC(dfloat)
243
271
  #define qsort_cast **(dfloat**)
244
272
  DEF_NARRAY_FLT_SORT_INDEX_METHOD_FUNC(dfloat, numo_cDFloat)
245
273
  DEF_NARRAY_FLT_MEDIAN_METHOD_FUNC(dfloat)
246
- #ifdef __SSE2__
274
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
275
+ DEF_NARRAY_FLT_SQRT_AVX_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
276
+ #elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
247
277
  DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
278
+ #elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
279
+ DEF_NARRAY_FLT_SQRT_NEON_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
248
280
  #else
249
281
  DEF_NARRAY_FLT_SQRT_METHOD_FUNC(dfloat, numo_cDFloat)
250
282
  #endif
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_ne;
23
18
  static ID id_pow;
24
19
  static ID id_minus;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_cast;
24
19
  static ID id_copysign;
@@ -14,11 +14,21 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
17
+ #if !defined(NUMO_NO_SIMD) && defined(__SSE2__)
18
18
  #include <emmintrin.h>
19
19
  #define SIMD_ALIGNMENT_SIZE 16
20
20
  #endif
21
21
 
22
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
23
+ #include <immintrin.h>
24
+ #define AVX_ALIGNMENT_SIZE 32
25
+ #endif
26
+
27
+ #if !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
28
+ #include <arm_neon.h>
29
+ #define NEON_ALIGNMENT_SIZE 16
30
+ #endif
31
+
22
32
  static ID id_pow;
23
33
  static ID id_cast;
24
34
  static ID id_copysign;
@@ -166,11 +176,21 @@ DEF_NARRAY_MAP_METHOD_FUNC(sfloat, numo_cSFloat)
166
176
  DEF_NARRAY_EACH_WITH_INDEX_METHOD_FUNC(sfloat)
167
177
  DEF_NARRAY_MAP_WITH_INDEX_METHOD_FUNC(sfloat, numo_cSFloat)
168
178
  DEF_NARRAY_ABS_METHOD_FUNC(sfloat, numo_cSFloat, sfloat, numo_cSFloat)
169
- #ifdef __SSE2__
179
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
180
+ DEF_NARRAY_SFLT_ADD_AVX_METHOD_FUNC()
181
+ DEF_NARRAY_SFLT_SUB_AVX_METHOD_FUNC()
182
+ DEF_NARRAY_SFLT_MUL_AVX_METHOD_FUNC()
183
+ DEF_NARRAY_SFLT_DIV_AVX_METHOD_FUNC()
184
+ #elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
170
185
  DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
171
186
  DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
172
187
  DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
173
188
  DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
189
+ #elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
190
+ DEF_NARRAY_SFLT_ADD_NEON_METHOD_FUNC()
191
+ DEF_NARRAY_SFLT_SUB_NEON_METHOD_FUNC()
192
+ DEF_NARRAY_SFLT_MUL_NEON_METHOD_FUNC()
193
+ DEF_NARRAY_SFLT_DIV_NEON_METHOD_FUNC()
174
194
  #else
175
195
  DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
176
196
  DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
@@ -223,7 +243,15 @@ DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(sfloat, numo_cSFloat)
223
243
  DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(sfloat, numo_cSFloat)
224
244
  DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(sfloat, numo_cSFloat)
225
245
  DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(sfloat, numo_cSFloat)
246
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
247
+ DEF_NARRAY_SFLT_MULSUM_AVX_METHOD_FUNC()
248
+ #elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
249
+ DEF_NARRAY_SFLT_MULSUM_SSE2_METHOD_FUNC()
250
+ #elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
251
+ DEF_NARRAY_SFLT_MULSUM_NEON_METHOD_FUNC()
252
+ #else
226
253
  DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(sfloat, numo_cSFloat)
254
+ #endif
227
255
  DEF_NARRAY_FLT_SEQ_METHOD_FUNC(sfloat)
228
256
  DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(sfloat)
229
257
  DEF_NARRAY_EYE_METHOD_FUNC(sfloat)
@@ -241,8 +269,12 @@ DEF_NARRAY_FLT_SORT_METHOD_FUNC(sfloat)
241
269
  #define qsort_cast **(sfloat**)
242
270
  DEF_NARRAY_FLT_SORT_INDEX_METHOD_FUNC(sfloat, numo_cSFloat)
243
271
  DEF_NARRAY_FLT_MEDIAN_METHOD_FUNC(sfloat)
244
- #ifdef __SSE2__
272
+ #if !defined(NUMO_NO_SIMD) && defined(__AVX__)
273
+ DEF_NARRAY_FLT_SQRT_AVX_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
274
+ #elif !defined(NUMO_NO_SIMD) && defined(__SSE2__)
245
275
  DEF_NARRAY_FLT_SQRT_SSE2_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
276
+ #elif !defined(NUMO_NO_SIMD) && defined(__ARM_NEON) && defined(__aarch64__)
277
+ DEF_NARRAY_FLT_SQRT_NEON_SGL_METHOD_FUNC(sfloat, numo_cSFloat)
246
278
  #else
247
279
  DEF_NARRAY_FLT_SQRT_METHOD_FUNC(sfloat, numo_cSFloat)
248
280
  #endif
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
@@ -14,11 +14,6 @@
14
14
 
15
15
  #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
16
16
 
17
- #ifdef __SSE2__
18
- #include <emmintrin.h>
19
- #define SIMD_ALIGNMENT_SIZE 16
20
- #endif
21
-
22
17
  static ID id_pow;
23
18
  static ID id_left_shift;
24
19
  static ID id_right_shift;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: numo-narray-alt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.5
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -214,7 +214,7 @@ licenses:
214
214
  metadata:
215
215
  homepage_uri: https://github.com/yoshoku/numo-narray-alt
216
216
  changelog_uri: https://github.com/yoshoku/numo-narray-alt/blob/main/CHANGELOG.md
217
- documentation_uri: https://gemdocs.org/gems/numo-narray-alt/0.10.5/
217
+ documentation_uri: https://gemdocs.org/gems/numo-narray-alt/0.11.0/
218
218
  rubygems_mfa_required: 'true'
219
219
  post_install_message: |
220
220
  ===