numo-narray-alt 0.9.6 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +31 -10
  3. data/ext/numo/narray/SFMT-params19937.h +8 -16
  4. data/ext/numo/narray/numo/narray.h +2 -2
  5. data/ext/numo/narray/numo/types/complex.h +2 -2
  6. data/ext/numo/narray/src/mh/math/acos.h +9 -0
  7. data/ext/numo/narray/src/mh/math/acosh.h +9 -0
  8. data/ext/numo/narray/src/mh/math/asin.h +9 -0
  9. data/ext/numo/narray/src/mh/math/asinh.h +9 -0
  10. data/ext/numo/narray/src/mh/math/atan.h +9 -0
  11. data/ext/numo/narray/src/mh/math/atan2.h +29 -0
  12. data/ext/numo/narray/src/mh/math/atanh.h +9 -0
  13. data/ext/numo/narray/src/mh/math/cbrt.h +9 -0
  14. data/ext/numo/narray/src/mh/math/cos.h +9 -0
  15. data/ext/numo/narray/src/mh/math/cosh.h +9 -0
  16. data/ext/numo/narray/src/mh/math/erf.h +9 -0
  17. data/ext/numo/narray/src/mh/math/erfc.h +9 -0
  18. data/ext/numo/narray/src/mh/math/exp.h +9 -0
  19. data/ext/numo/narray/src/mh/math/exp10.h +9 -0
  20. data/ext/numo/narray/src/mh/math/exp2.h +9 -0
  21. data/ext/numo/narray/src/mh/math/expm1.h +9 -0
  22. data/ext/numo/narray/src/mh/math/frexp.h +30 -0
  23. data/ext/numo/narray/src/mh/math/hypot.h +29 -0
  24. data/ext/numo/narray/src/mh/math/ldexp.h +29 -0
  25. data/ext/numo/narray/src/mh/math/log.h +9 -0
  26. data/ext/numo/narray/src/mh/math/log10.h +9 -0
  27. data/ext/numo/narray/src/mh/math/log1p.h +9 -0
  28. data/ext/numo/narray/src/mh/math/log2.h +9 -0
  29. data/ext/numo/narray/src/mh/math/sin.h +9 -0
  30. data/ext/numo/narray/src/mh/math/sinc.h +9 -0
  31. data/ext/numo/narray/src/mh/math/sinh.h +9 -0
  32. data/ext/numo/narray/src/mh/math/sqrt.h +203 -0
  33. data/ext/numo/narray/src/mh/math/tan.h +9 -0
  34. data/ext/numo/narray/src/mh/math/tanh.h +9 -0
  35. data/ext/numo/narray/src/mh/math/unary_func.h +70 -0
  36. data/ext/numo/narray/src/mh/mean.h +1 -8
  37. data/ext/numo/narray/src/mh/rms.h +1 -8
  38. data/ext/numo/narray/src/mh/stddev.h +1 -8
  39. data/ext/numo/narray/src/mh/var.h +1 -8
  40. data/ext/numo/narray/src/t_dcomplex.c +236 -1707
  41. data/ext/numo/narray/src/t_dfloat.c +66 -1952
  42. data/ext/numo/narray/src/t_robject.c +4 -4
  43. data/ext/numo/narray/src/t_scomplex.c +236 -1707
  44. data/ext/numo/narray/src/t_sfloat.c +66 -1952
  45. data/lib/numo/narray/extra.rb +77 -0
  46. metadata +33 -3
@@ -0,0 +1,203 @@
1
+ #ifndef NUMO_NARRAY_MH_MATH_SQRT_H
2
+ #define NUMO_NARRAY_MH_MATH_SQRT_H 1
3
+
4
+ #include "unary_func.h"
5
+
6
+ #define DEF_NARRAY_FLT_SQRT_METHOD_FUNC(tDType, tNAryType) \
7
+ DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(sqrt, tDType, tNAryType)
8
+
9
+ #define DEF_NARRAY_FLT_SQRT_SSE2_SGL_METHOD_FUNC(tDType, tNAryType) \
10
+ static void iter_##tDType##_math_s_sqrt(na_loop_t* const lp) { \
11
+ size_t i = 0; \
12
+ size_t n; \
13
+ char *p1, *p2; \
14
+ ssize_t s1, s2; \
15
+ size_t *idx1, *idx2; \
16
+ tDType x; \
17
+ size_t cnt; \
18
+ size_t cnt_simd_loop = -1; \
19
+ __m128 a; \
20
+ size_t num_pack; \
21
+ num_pack = SIMD_ALIGNMENT_SIZE / sizeof(tDType); \
22
+ \
23
+ INIT_COUNTER(lp, n); \
24
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1); \
25
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2); \
26
+ \
27
+ if (idx1) { \
28
+ if (idx2) { \
29
+ for (i = 0; i < n; i++) { \
30
+ GET_DATA_INDEX(p1, idx1, tDType, x); \
31
+ x = m_sqrt(x); \
32
+ SET_DATA_INDEX(p2, idx2, tDType, x); \
33
+ } \
34
+ } else { \
35
+ for (i = 0; i < n; i++) { \
36
+ GET_DATA_INDEX(p1, idx1, tDType, x); \
37
+ x = m_sqrt(x); \
38
+ SET_DATA_STRIDE(p2, s2, tDType, x); \
39
+ } \
40
+ } \
41
+ } else { \
42
+ if (idx2) { \
43
+ for (i = 0; i < n; i++) { \
44
+ GET_DATA_STRIDE(p1, s1, tDType, x); \
45
+ x = m_sqrt(x); \
46
+ SET_DATA_INDEX(p2, idx2, tDType, x); \
47
+ } \
48
+ } else { \
49
+ if (is_aligned(p1, sizeof(tDType)) && is_aligned(p2, sizeof(tDType))) { \
50
+ if (s1 == sizeof(tDType) && s2 == sizeof(tDType)) { \
51
+ if ((n >= num_pack) && \
52
+ is_same_aligned2(&((tDType*)p1)[i], &((tDType*)p2)[i], SIMD_ALIGNMENT_SIZE)) { \
53
+ cnt = get_count_of_elements_not_aligned_to_simd_size( \
54
+ &((tDType*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(tDType) \
55
+ ); \
56
+ for (i = 0; i < cnt; i++) { \
57
+ ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]); \
58
+ } \
59
+ cnt_simd_loop = (n - i) % num_pack; \
60
+ if (p1 == p2) { \
61
+ for (; i < n - cnt_simd_loop; i += num_pack) { \
62
+ a = _mm_load_ps(&((tDType*)p1)[i]); \
63
+ a = _mm_sqrt_ps(a); \
64
+ _mm_store_ps(&((tDType*)p1)[i], a); \
65
+ } \
66
+ } else { \
67
+ for (; i < n - cnt_simd_loop; i += num_pack) { \
68
+ a = _mm_load_ps(&((tDType*)p1)[i]); \
69
+ a = _mm_sqrt_ps(a); \
70
+ _mm_stream_ps(&((tDType*)p2)[i], a); \
71
+ } \
72
+ } \
73
+ } \
74
+ if (cnt_simd_loop != 0) { \
75
+ for (; i < n; i++) { \
76
+ ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]); \
77
+ } \
78
+ } \
79
+ return; \
80
+ } \
81
+ if (is_aligned_step(s1, sizeof(tDType)) && is_aligned_step(s2, sizeof(tDType))) { \
82
+ for (i = 0; i < n; i++) { \
83
+ *(tDType*)p2 = m_sqrt(*(tDType*)p1); \
84
+ p1 += s1; \
85
+ p2 += s2; \
86
+ } \
87
+ return; \
88
+ } \
89
+ } \
90
+ for (i = 0; i < n; i++) { \
91
+ GET_DATA_STRIDE(p1, s1, tDType, x); \
92
+ x = m_sqrt(x); \
93
+ SET_DATA_STRIDE(p2, s2, tDType, x); \
94
+ } \
95
+ } \
96
+ } \
97
+ } \
98
+ \
99
+ static VALUE tDType##_math_s_sqrt(VALUE mod, VALUE a1) { \
100
+ ndfunc_arg_in_t ain[1] = { { tNAryType, 0 } }; \
101
+ ndfunc_arg_out_t aout[1] = { { tNAryType, 0 } }; \
102
+ ndfunc_t ndf = { iter_##tDType##_math_s_sqrt, FULL_LOOP, 1, 1, ain, aout }; \
103
+ return na_ndloop(&ndf, 1, a1); \
104
+ }
105
+
106
+ #define DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(tDType, tNAryType) \
107
+ static void iter_##tDType##_math_s_sqrt(na_loop_t* const lp) { \
108
+ size_t i = 0; \
109
+ size_t n; \
110
+ char *p1, *p2; \
111
+ ssize_t s1, s2; \
112
+ size_t *idx1, *idx2; \
113
+ tDType x; \
114
+ size_t cnt; \
115
+ size_t cnt_simd_loop = -1; \
116
+ __m128d a; \
117
+ size_t num_pack; \
118
+ num_pack = SIMD_ALIGNMENT_SIZE / sizeof(tDType); \
119
+ \
120
+ INIT_COUNTER(lp, n); \
121
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1); \
122
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2); \
123
+ \
124
+ if (idx1) { \
125
+ if (idx2) { \
126
+ for (i = 0; i < n; i++) { \
127
+ GET_DATA_INDEX(p1, idx1, tDType, x); \
128
+ x = m_sqrt(x); \
129
+ SET_DATA_INDEX(p2, idx2, tDType, x); \
130
+ } \
131
+ } else { \
132
+ for (i = 0; i < n; i++) { \
133
+ GET_DATA_INDEX(p1, idx1, tDType, x); \
134
+ x = m_sqrt(x); \
135
+ SET_DATA_STRIDE(p2, s2, tDType, x); \
136
+ } \
137
+ } \
138
+ } else { \
139
+ if (idx2) { \
140
+ for (i = 0; i < n; i++) { \
141
+ GET_DATA_STRIDE(p1, s1, tDType, x); \
142
+ x = m_sqrt(x); \
143
+ SET_DATA_INDEX(p2, idx2, tDType, x); \
144
+ } \
145
+ } else { \
146
+ if (is_aligned(p1, sizeof(tDType)) && is_aligned(p2, sizeof(tDType))) { \
147
+ if (s1 == sizeof(tDType) && s2 == sizeof(tDType)) { \
148
+ if ((n >= num_pack) && \
149
+ is_same_aligned2(&((tDType*)p1)[i], &((tDType*)p2)[i], SIMD_ALIGNMENT_SIZE)) { \
150
+ cnt = get_count_of_elements_not_aligned_to_simd_size( \
151
+ &((tDType*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(tDType) \
152
+ ); \
153
+ for (i = 0; i < cnt; i++) { \
154
+ ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]); \
155
+ } \
156
+ cnt_simd_loop = (n - i) % num_pack; \
157
+ if (p1 == p2) { \
158
+ for (; i < n - cnt_simd_loop; i += num_pack) { \
159
+ a = _mm_load_pd(&((tDType*)p1)[i]); \
160
+ a = _mm_sqrt_pd(a); \
161
+ _mm_store_pd(&((tDType*)p1)[i], a); \
162
+ } \
163
+ } else { \
164
+ for (; i < n - cnt_simd_loop; i += num_pack) { \
165
+ a = _mm_load_pd(&((tDType*)p1)[i]); \
166
+ a = _mm_sqrt_pd(a); \
167
+ _mm_stream_pd(&((tDType*)p2)[i], a); \
168
+ } \
169
+ } \
170
+ } \
171
+ if (cnt_simd_loop != 0) { \
172
+ for (; i < n; i++) { \
173
+ ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]); \
174
+ } \
175
+ } \
176
+ return; \
177
+ } \
178
+ if (is_aligned_step(s1, sizeof(tDType)) && is_aligned_step(s2, sizeof(tDType))) { \
179
+ for (i = 0; i < n; i++) { \
180
+ *(tDType*)p2 = m_sqrt(*(tDType*)p1); \
181
+ p1 += s1; \
182
+ p2 += s2; \
183
+ } \
184
+ return; \
185
+ } \
186
+ } \
187
+ for (i = 0; i < n; i++) { \
188
+ GET_DATA_STRIDE(p1, s1, tDType, x); \
189
+ x = m_sqrt(x); \
190
+ SET_DATA_STRIDE(p2, s2, tDType, x); \
191
+ } \
192
+ } \
193
+ } \
194
+ } \
195
+ \
196
+ static VALUE tDType##_math_s_sqrt(VALUE mod, VALUE a1) { \
197
+ ndfunc_arg_in_t ain[1] = { { tNAryType, 0 } }; \
198
+ ndfunc_arg_out_t aout[1] = { { tNAryType, 0 } }; \
199
+ ndfunc_t ndf = { iter_##tDType##_math_s_sqrt, FULL_LOOP, 1, 1, ain, aout }; \
200
+ return na_ndloop(&ndf, 1, a1); \
201
+ }
202
+
203
+ #endif /* NUMO_NARRAY_MH_MATH_SQRT_H */
@@ -0,0 +1,9 @@
1
+ #ifndef NUMO_NARRAY_MH_MATH_TAN_H
2
+ #define NUMO_NARRAY_MH_MATH_TAN_H 1
3
+
4
+ #include "unary_func.h"
5
+
6
+ #define DEF_NARRAY_FLT_TAN_METHOD_FUNC(tDType, tNAryType) \
7
+ DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(tan, tDType, tNAryType)
8
+
9
+ #endif /* NUMO_NARRAY_MH_MATH_TAN_H */
@@ -0,0 +1,9 @@
1
+ #ifndef NUMO_NARRAY_MH_MATH_TANH_H
2
+ #define NUMO_NARRAY_MH_MATH_TANH_H 1
3
+
4
+ #include "unary_func.h"
5
+
6
+ #define DEF_NARRAY_FLT_TANH_METHOD_FUNC(tDType, tNAryType) \
7
+ DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(tanh, tDType, tNAryType)
8
+
9
+ #endif /* NUMO_NARRAY_MH_MATH_TANH_H */
@@ -0,0 +1,70 @@
1
+ #ifndef NUMO_NARRAY_MH_MATH_UNARY_FUNC_H
2
+ #define NUMO_NARRAY_MH_MATH_UNARY_FUNC_H 1
3
+
4
+ #define DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(fMathFunc, tDType, tNAryType) \
5
+ static void iter_##tDType##_math_s_##fMathFunc(na_loop_t* const lp) { \
6
+ size_t n; \
7
+ char *p1, *p2; \
8
+ ssize_t s1, s2; \
9
+ size_t *idx1, *idx2; \
10
+ tDType x; \
11
+ \
12
+ INIT_COUNTER(lp, n); \
13
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1); \
14
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2); \
15
+ \
16
+ if (idx1) { \
17
+ if (idx2) { \
18
+ for (size_t i = 0; i < n; i++) { \
19
+ GET_DATA_INDEX(p1, idx1, tDType, x); \
20
+ x = m_##fMathFunc(x); \
21
+ SET_DATA_INDEX(p2, idx2, tDType, x); \
22
+ } \
23
+ } else { \
24
+ for (size_t i = 0; i < n; i++) { \
25
+ GET_DATA_INDEX(p1, idx1, tDType, x); \
26
+ x = m_##fMathFunc(x); \
27
+ SET_DATA_STRIDE(p2, s2, tDType, x); \
28
+ } \
29
+ } \
30
+ } else { \
31
+ if (idx2) { \
32
+ for (size_t i = 0; i < n; i++) { \
33
+ GET_DATA_STRIDE(p1, s1, tDType, x); \
34
+ x = m_##fMathFunc(x); \
35
+ SET_DATA_INDEX(p2, idx2, tDType, x); \
36
+ } \
37
+ } else { \
38
+ if (is_aligned(p1, sizeof(tDType)) && is_aligned(p2, sizeof(tDType))) { \
39
+ if (s1 == sizeof(tDType) && s2 == sizeof(tDType)) { \
40
+ for (size_t i = 0; i < n; i++) { \
41
+ ((tDType*)p2)[i] = m_##fMathFunc(((tDType*)p1)[i]); \
42
+ } \
43
+ return; \
44
+ } \
45
+ if (is_aligned_step(s1, sizeof(tDType)) && is_aligned_step(s2, sizeof(tDType))) { \
46
+ for (size_t i = 0; i < n; i++) { \
47
+ *(tDType*)p2 = m_##fMathFunc(*(tDType*)p1); \
48
+ p1 += s1; \
49
+ p2 += s2; \
50
+ } \
51
+ return; \
52
+ } \
53
+ } \
54
+ for (size_t i = 0; i < n; i++) { \
55
+ GET_DATA_STRIDE(p1, s1, tDType, x); \
56
+ x = m_##fMathFunc(x); \
57
+ SET_DATA_STRIDE(p2, s2, tDType, x); \
58
+ } \
59
+ } \
60
+ } \
61
+ } \
62
+ \
63
+ static VALUE tDType##_math_s_##fMathFunc(VALUE mod, VALUE a1) { \
64
+ ndfunc_arg_in_t ain[1] = { { tNAryType, 0 } }; \
65
+ ndfunc_arg_out_t aout[1] = { { tNAryType, 0 } }; \
66
+ ndfunc_t ndf = { iter_##tDType##_math_s_##fMathFunc, FULL_LOOP, 1, 1, ain, aout }; \
67
+ return na_ndloop(&ndf, 1, a1); \
68
+ }
69
+
70
+ #endif /* NUMO_NARRAY_MH_MATH_UNARY_FUNC_H */
@@ -1,14 +1,7 @@
1
- /*
2
- mean.h
3
- Numo::NArray Alternative
4
-
5
- created on: 2025-10-08
6
- Copyright (C) 2025 Atsushi Tatsuma
7
- */
8
1
  #ifndef NUMO_NARRAY_MH_MEAN_H
9
2
  #define NUMO_NARRAY_MH_MEAN_H 1
10
3
 
11
- #define DEF_NARRAY_FLT_MEAN_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass) \
4
+ #define DEF_NARRAY_FLT_MEAN_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass) \
12
5
  static void iter_##tDType##_mean(na_loop_t* const lp) { \
13
6
  size_t n; \
14
7
  char* p1; \
@@ -1,14 +1,7 @@
1
- /*
2
- rms.h
3
- Numo::NArray Alternative
4
-
5
- created on: 2025-10-15
6
- Copyright (C) 2025 Atsushi Tatsuma
7
- */
8
1
  #ifndef NUMO_NARRAY_MH_RMS_H
9
2
  #define NUMO_NARRAY_MH_RMS_H 1
10
3
 
11
- #define DEF_NARRAY_FLT_RMS_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass) \
4
+ #define DEF_NARRAY_FLT_RMS_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass) \
12
5
  static void iter_##tDType##_rms(na_loop_t* const lp) { \
13
6
  size_t n; \
14
7
  char* p1; \
@@ -1,14 +1,7 @@
1
- /*
2
- stddev.h
3
- Numo::NArray Alternative
4
-
5
- created on: 2025-10-15
6
- Copyright (C) 2025 Atsushi Tatsuma
7
- */
8
1
  #ifndef NUMO_NARRAY_MH_STDDEV_H
9
2
  #define NUMO_NARRAY_MH_STDDEV_H 1
10
3
 
11
- #define DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass) \
4
+ #define DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass) \
12
5
  static void iter_##tDType##_stddev(na_loop_t* const lp) { \
13
6
  size_t n; \
14
7
  char* p1; \
@@ -1,14 +1,7 @@
1
- /*
2
- var.h
3
- Numo::NArray Alternative
4
-
5
- created on: 2025-10-15
6
- Copyright (C) 2025 Atsushi Tatsuma
7
- */
8
1
  #ifndef NUMO_NARRAY_MH_VAR_H
9
2
  #define NUMO_NARRAY_MH_VAR_H 1
10
3
 
11
- #define DEF_NARRAY_FLT_VAR_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass) \
4
+ #define DEF_NARRAY_FLT_VAR_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass) \
12
5
  static void iter_##tDType##_var(na_loop_t* const lp) { \
13
6
  size_t n; \
14
7
  char* p1; \