RubyGems - numo-narray-alt - Versions diffs - 0.9.6 → 0.9.8 - Mend

numo-narray-alt 0.9.6 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +4 -4
data/README.md +31 -10
data/ext/numo/narray/SFMT-params19937.h +8 -16
data/ext/numo/narray/numo/narray.h +2 -2
data/ext/numo/narray/numo/types/complex.h +2 -2
data/ext/numo/narray/src/mh/math/acos.h +9 -0
data/ext/numo/narray/src/mh/math/acosh.h +9 -0
data/ext/numo/narray/src/mh/math/asin.h +9 -0
data/ext/numo/narray/src/mh/math/asinh.h +9 -0
data/ext/numo/narray/src/mh/math/atan.h +9 -0
data/ext/numo/narray/src/mh/math/atan2.h +29 -0
data/ext/numo/narray/src/mh/math/atanh.h +9 -0
data/ext/numo/narray/src/mh/math/cbrt.h +9 -0
data/ext/numo/narray/src/mh/math/cos.h +9 -0
data/ext/numo/narray/src/mh/math/cosh.h +9 -0
data/ext/numo/narray/src/mh/math/erf.h +9 -0
data/ext/numo/narray/src/mh/math/erfc.h +9 -0
data/ext/numo/narray/src/mh/math/exp.h +9 -0
data/ext/numo/narray/src/mh/math/exp10.h +9 -0
data/ext/numo/narray/src/mh/math/exp2.h +9 -0
data/ext/numo/narray/src/mh/math/expm1.h +9 -0
data/ext/numo/narray/src/mh/math/frexp.h +30 -0
data/ext/numo/narray/src/mh/math/hypot.h +29 -0
data/ext/numo/narray/src/mh/math/ldexp.h +29 -0
data/ext/numo/narray/src/mh/math/log.h +9 -0
data/ext/numo/narray/src/mh/math/log10.h +9 -0
data/ext/numo/narray/src/mh/math/log1p.h +9 -0
data/ext/numo/narray/src/mh/math/log2.h +9 -0
data/ext/numo/narray/src/mh/math/sin.h +9 -0
data/ext/numo/narray/src/mh/math/sinc.h +9 -0
data/ext/numo/narray/src/mh/math/sinh.h +9 -0
data/ext/numo/narray/src/mh/math/sqrt.h +203 -0
data/ext/numo/narray/src/mh/math/tan.h +9 -0
data/ext/numo/narray/src/mh/math/tanh.h +9 -0
data/ext/numo/narray/src/mh/math/unary_func.h +70 -0
data/ext/numo/narray/src/mh/mean.h +1 -8
data/ext/numo/narray/src/mh/rms.h +1 -8
data/ext/numo/narray/src/mh/stddev.h +1 -8
data/ext/numo/narray/src/mh/var.h +1 -8
data/ext/numo/narray/src/t_dcomplex.c +236 -1707
data/ext/numo/narray/src/t_dfloat.c +66 -1952
data/ext/numo/narray/src/t_robject.c +4 -4
data/ext/numo/narray/src/t_scomplex.c +236 -1707
data/ext/numo/narray/src/t_sfloat.c +66 -1952
data/lib/numo/narray/extra.rb +77 -0
metadata +33 -3

data/ext/numo/narray/src/mh/math/sqrt.h ADDED Viewed

@@ -0,0 +1,203 @@
+#ifndef NUMO_NARRAY_MH_MATH_SQRT_H
+#define NUMO_NARRAY_MH_MATH_SQRT_H 1
+#include "unary_func.h"
+#define DEF_NARRAY_FLT_SQRT_METHOD_FUNC(tDType, tNAryType)                                     \
+  DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(sqrt, tDType, tNAryType)
+#define DEF_NARRAY_FLT_SQRT_SSE2_SGL_METHOD_FUNC(tDType, tNAryType)                            \
+  static void iter_##tDType##_math_s_sqrt(na_loop_t* const lp) {                               \
+    size_t i = 0;                                                                              \
+    size_t n;                                                                                  \
+    char *p1, *p2;                                                                             \
+    ssize_t s1, s2;                                                                            \
+    size_t *idx1, *idx2;                                                                       \
+    tDType x;                                                                                  \
+    size_t cnt;                                                                                \
+    size_t cnt_simd_loop = -1;                                                                 \
+    __m128 a;                                                                                  \
+    size_t num_pack;                                                                           \
+    num_pack = SIMD_ALIGNMENT_SIZE / sizeof(tDType);                                           \
+                                                                                               \
+    INIT_COUNTER(lp, n);                                                                       \
+    INIT_PTR_IDX(lp, 0, p1, s1, idx1);                                                         \
+    INIT_PTR_IDX(lp, 1, p2, s2, idx2);                                                         \
+                                                                                               \
+    if (idx1) {                                                                                \
+      if (idx2) {                                                                              \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_INDEX(p1, idx1, tDType, x);                                                 \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_INDEX(p2, idx2, tDType, x);                                                 \
+        }                                                                                      \
+      } else {                                                                                 \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_INDEX(p1, idx1, tDType, x);                                                 \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_STRIDE(p2, s2, tDType, x);                                                  \
+        }                                                                                      \
+      }                                                                                        \
+    } else {                                                                                   \
+      if (idx2) {                                                                              \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_STRIDE(p1, s1, tDType, x);                                                  \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_INDEX(p2, idx2, tDType, x);                                                 \
+        }                                                                                      \
+      } else {                                                                                 \
+        if (is_aligned(p1, sizeof(tDType)) && is_aligned(p2, sizeof(tDType))) {                \
+          if (s1 == sizeof(tDType) && s2 == sizeof(tDType)) {                                  \
+            if ((n >= num_pack) &&                                                             \
+                is_same_aligned2(&((tDType*)p1)[i], &((tDType*)p2)[i], SIMD_ALIGNMENT_SIZE)) { \
+              cnt = get_count_of_elements_not_aligned_to_simd_size(                            \
+                &((tDType*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(tDType)                         \
+              );                                                                               \
+              for (i = 0; i < cnt; i++) {                                                      \
+                ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]);                                   \
+              }                                                                                \
+              cnt_simd_loop = (n - i) % num_pack;                                              \
+              if (p1 == p2) {                                                                  \
+                for (; i < n - cnt_simd_loop; i += num_pack) {                                 \
+                  a = _mm_load_ps(&((tDType*)p1)[i]);                                          \
+                  a = _mm_sqrt_ps(a);                                                          \
+                  _mm_store_ps(&((tDType*)p1)[i], a);                                          \
+                }                                                                              \
+              } else {                                                                         \
+                for (; i < n - cnt_simd_loop; i += num_pack) {                                 \
+                  a = _mm_load_ps(&((tDType*)p1)[i]);                                          \
+                  a = _mm_sqrt_ps(a);                                                          \
+                  _mm_stream_ps(&((tDType*)p2)[i], a);                                         \
+                }                                                                              \
+              }                                                                                \
+            }                                                                                  \
+            if (cnt_simd_loop != 0) {                                                          \
+              for (; i < n; i++) {                                                             \
+                ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]);                                   \
+              }                                                                                \
+            }                                                                                  \
+            return;                                                                            \
+          }                                                                                    \
+          if (is_aligned_step(s1, sizeof(tDType)) && is_aligned_step(s2, sizeof(tDType))) {    \
+            for (i = 0; i < n; i++) {                                                          \
+              *(tDType*)p2 = m_sqrt(*(tDType*)p1);                                             \
+              p1 += s1;                                                                        \
+              p2 += s2;                                                                        \
+            }                                                                                  \
+            return;                                                                            \
+          }                                                                                    \
+        }                                                                                      \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_STRIDE(p1, s1, tDType, x);                                                  \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_STRIDE(p2, s2, tDType, x);                                                  \
+        }                                                                                      \
+      }                                                                                        \
+    }                                                                                          \
+  }                                                                                            \
+                                                                                               \
+  static VALUE tDType##_math_s_sqrt(VALUE mod, VALUE a1) {                                     \
+    ndfunc_arg_in_t ain[1] = { { tNAryType, 0 } };                                             \
+    ndfunc_arg_out_t aout[1] = { { tNAryType, 0 } };                                           \
+    ndfunc_t ndf = { iter_##tDType##_math_s_sqrt, FULL_LOOP, 1, 1, ain, aout };                \
+    return na_ndloop(&ndf, 1, a1);                                                             \
+  }
+#define DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(tDType, tNAryType)                            \
+  static void iter_##tDType##_math_s_sqrt(na_loop_t* const lp) {                               \
+    size_t i = 0;                                                                              \
+    size_t n;                                                                                  \
+    char *p1, *p2;                                                                             \
+    ssize_t s1, s2;                                                                            \
+    size_t *idx1, *idx2;                                                                       \
+    tDType x;                                                                                  \
+    size_t cnt;                                                                                \
+    size_t cnt_simd_loop = -1;                                                                 \
+    __m128d a;                                                                                 \
+    size_t num_pack;                                                                           \
+    num_pack = SIMD_ALIGNMENT_SIZE / sizeof(tDType);                                           \
+                                                                                               \
+    INIT_COUNTER(lp, n);                                                                       \
+    INIT_PTR_IDX(lp, 0, p1, s1, idx1);                                                         \
+    INIT_PTR_IDX(lp, 1, p2, s2, idx2);                                                         \
+                                                                                               \
+    if (idx1) {                                                                                \
+      if (idx2) {                                                                              \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_INDEX(p1, idx1, tDType, x);                                                 \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_INDEX(p2, idx2, tDType, x);                                                 \
+        }                                                                                      \
+      } else {                                                                                 \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_INDEX(p1, idx1, tDType, x);                                                 \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_STRIDE(p2, s2, tDType, x);                                                  \
+        }                                                                                      \
+      }                                                                                        \
+    } else {                                                                                   \
+      if (idx2) {                                                                              \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_STRIDE(p1, s1, tDType, x);                                                  \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_INDEX(p2, idx2, tDType, x);                                                 \
+        }                                                                                      \
+      } else {                                                                                 \
+        if (is_aligned(p1, sizeof(tDType)) && is_aligned(p2, sizeof(tDType))) {                \
+          if (s1 == sizeof(tDType) && s2 == sizeof(tDType)) {                                  \
+            if ((n >= num_pack) &&                                                             \
+                is_same_aligned2(&((tDType*)p1)[i], &((tDType*)p2)[i], SIMD_ALIGNMENT_SIZE)) { \
+              cnt = get_count_of_elements_not_aligned_to_simd_size(                            \
+                &((tDType*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(tDType)                         \
+              );                                                                               \
+              for (i = 0; i < cnt; i++) {                                                      \
+                ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]);                                   \
+              }                                                                                \
+              cnt_simd_loop = (n - i) % num_pack;                                              \
+              if (p1 == p2) {                                                                  \
+                for (; i < n - cnt_simd_loop; i += num_pack) {                                 \
+                  a = _mm_load_pd(&((tDType*)p1)[i]);                                          \
+                  a = _mm_sqrt_pd(a);                                                          \
+                  _mm_store_pd(&((tDType*)p1)[i], a);                                          \
+                }                                                                              \
+              } else {                                                                         \
+                for (; i < n - cnt_simd_loop; i += num_pack) {                                 \
+                  a = _mm_load_pd(&((tDType*)p1)[i]);                                          \
+                  a = _mm_sqrt_pd(a);                                                          \
+                  _mm_stream_pd(&((tDType*)p2)[i], a);                                         \
+                }                                                                              \
+              }                                                                                \
+            }                                                                                  \
+            if (cnt_simd_loop != 0) {                                                          \
+              for (; i < n; i++) {                                                             \
+                ((tDType*)p2)[i] = m_sqrt(((tDType*)p1)[i]);                                   \
+              }                                                                                \
+            }                                                                                  \
+            return;                                                                            \
+          }                                                                                    \
+          if (is_aligned_step(s1, sizeof(tDType)) && is_aligned_step(s2, sizeof(tDType))) {    \
+            for (i = 0; i < n; i++) {                                                          \
+              *(tDType*)p2 = m_sqrt(*(tDType*)p1);                                             \
+              p1 += s1;                                                                        \
+              p2 += s2;                                                                        \
+            }                                                                                  \
+            return;                                                                            \
+          }                                                                                    \
+        }                                                                                      \
+        for (i = 0; i < n; i++) {                                                              \
+          GET_DATA_STRIDE(p1, s1, tDType, x);                                                  \
+          x = m_sqrt(x);                                                                       \
+          SET_DATA_STRIDE(p2, s2, tDType, x);                                                  \
+        }                                                                                      \
+      }                                                                                        \
+    }                                                                                          \
+  }                                                                                            \
+                                                                                               \
+  static VALUE tDType##_math_s_sqrt(VALUE mod, VALUE a1) {                                     \
+    ndfunc_arg_in_t ain[1] = { { tNAryType, 0 } };                                             \
+    ndfunc_arg_out_t aout[1] = { { tNAryType, 0 } };                                           \
+    ndfunc_t ndf = { iter_##tDType##_math_s_sqrt, FULL_LOOP, 1, 1, ain, aout };                \
+    return na_ndloop(&ndf, 1, a1);                                                             \
+  }
+#endif /* NUMO_NARRAY_MH_MATH_SQRT_H */

data/ext/numo/narray/src/mh/math/tan.h ADDED Viewed

@@ -0,0 +1,9 @@
+#ifndef NUMO_NARRAY_MH_MATH_TAN_H
+#define NUMO_NARRAY_MH_MATH_TAN_H 1
+#include "unary_func.h"
+#define DEF_NARRAY_FLT_TAN_METHOD_FUNC(tDType, tNAryType)                                      \
+  DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(tan, tDType, tNAryType)
+#endif /* NUMO_NARRAY_MH_MATH_TAN_H */

data/ext/numo/narray/src/mh/math/tanh.h ADDED Viewed

@@ -0,0 +1,9 @@
+#ifndef NUMO_NARRAY_MH_MATH_TANH_H
+#define NUMO_NARRAY_MH_MATH_TANH_H 1
+#include "unary_func.h"
+#define DEF_NARRAY_FLT_TANH_METHOD_FUNC(tDType, tNAryType)                                     \
+  DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(tanh, tDType, tNAryType)
+#endif /* NUMO_NARRAY_MH_MATH_TANH_H */

data/ext/numo/narray/src/mh/math/unary_func.h ADDED Viewed

@@ -0,0 +1,70 @@
+#ifndef NUMO_NARRAY_MH_MATH_UNARY_FUNC_H
+#define NUMO_NARRAY_MH_MATH_UNARY_FUNC_H 1
+#define DEF_NARRAY_FLT_UNARY_MATH_METHOD_FUNC(fMathFunc, tDType, tNAryType)                    \
+  static void iter_##tDType##_math_s_##fMathFunc(na_loop_t* const lp) {                        \
+    size_t n;                                                                                  \
+    char *p1, *p2;                                                                             \
+    ssize_t s1, s2;                                                                            \
+    size_t *idx1, *idx2;                                                                       \
+    tDType x;                                                                                  \
+                                                                                               \
+    INIT_COUNTER(lp, n);                                                                       \
+    INIT_PTR_IDX(lp, 0, p1, s1, idx1);                                                         \
+    INIT_PTR_IDX(lp, 1, p2, s2, idx2);                                                         \
+                                                                                               \
+    if (idx1) {                                                                                \
+      if (idx2) {                                                                              \
+        for (size_t i = 0; i < n; i++) {                                                       \
+          GET_DATA_INDEX(p1, idx1, tDType, x);                                                 \
+          x = m_##fMathFunc(x);                                                                \
+          SET_DATA_INDEX(p2, idx2, tDType, x);                                                 \
+        }                                                                                      \
+      } else {                                                                                 \
+        for (size_t i = 0; i < n; i++) {                                                       \
+          GET_DATA_INDEX(p1, idx1, tDType, x);                                                 \
+          x = m_##fMathFunc(x);                                                                \
+          SET_DATA_STRIDE(p2, s2, tDType, x);                                                  \
+        }                                                                                      \
+      }                                                                                        \
+    } else {                                                                                   \
+      if (idx2) {                                                                              \
+        for (size_t i = 0; i < n; i++) {                                                       \
+          GET_DATA_STRIDE(p1, s1, tDType, x);                                                  \
+          x = m_##fMathFunc(x);                                                                \
+          SET_DATA_INDEX(p2, idx2, tDType, x);                                                 \
+        }                                                                                      \
+      } else {                                                                                 \
+        if (is_aligned(p1, sizeof(tDType)) && is_aligned(p2, sizeof(tDType))) {                \
+          if (s1 == sizeof(tDType) && s2 == sizeof(tDType)) {                                  \
+            for (size_t i = 0; i < n; i++) {                                                   \
+              ((tDType*)p2)[i] = m_##fMathFunc(((tDType*)p1)[i]);                              \
+            }                                                                                  \
+            return;                                                                            \
+          }                                                                                    \
+          if (is_aligned_step(s1, sizeof(tDType)) && is_aligned_step(s2, sizeof(tDType))) {    \
+            for (size_t i = 0; i < n; i++) {                                                   \
+              *(tDType*)p2 = m_##fMathFunc(*(tDType*)p1);                                      \
+              p1 += s1;                                                                        \
+              p2 += s2;                                                                        \
+            }                                                                                  \
+            return;                                                                            \
+          }                                                                                    \
+        }                                                                                      \
+        for (size_t i = 0; i < n; i++) {                                                       \
+          GET_DATA_STRIDE(p1, s1, tDType, x);                                                  \
+          x = m_##fMathFunc(x);                                                                \
+          SET_DATA_STRIDE(p2, s2, tDType, x);                                                  \
+        }                                                                                      \
+      }                                                                                        \
+    }                                                                                          \
+  }                                                                                            \
+                                                                                               \
+  static VALUE tDType##_math_s_##fMathFunc(VALUE mod, VALUE a1) {                              \
+    ndfunc_arg_in_t ain[1] = { { tNAryType, 0 } };                                             \
+    ndfunc_arg_out_t aout[1] = { { tNAryType, 0 } };                                           \
+    ndfunc_t ndf = { iter_##tDType##_math_s_##fMathFunc, FULL_LOOP, 1, 1, ain, aout };         \
+    return na_ndloop(&ndf, 1, a1);                                                             \
+  }
+#endif /* NUMO_NARRAY_MH_MATH_UNARY_FUNC_H */

data/ext/numo/narray/src/mh/mean.h CHANGED Viewed

@@ -1,14 +1,7 @@
-/*
-  mean.h
-  Numo::NArray Alternative
-  created on: 2025-10-08
-  Copyright (C) 2025 Atsushi Tatsuma
-*/
 #ifndef NUMO_NARRAY_MH_MEAN_H
 #define NUMO_NARRAY_MH_MEAN_H 1
-#define DEF_NARRAY_FLT_MEAN_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass)            \
+#define DEF_NARRAY_FLT_MEAN_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass)            \
   static void iter_##tDType##_mean(na_loop_t* const lp) {                                      \
     size_t n;                                                                                  \
     char* p1;                                                                                  \

data/ext/numo/narray/src/mh/rms.h CHANGED Viewed

@@ -1,14 +1,7 @@
-/*
-  rms.h
-  Numo::NArray Alternative
-  created on: 2025-10-15
-  Copyright (C) 2025 Atsushi Tatsuma
-*/
 #ifndef NUMO_NARRAY_MH_RMS_H
 #define NUMO_NARRAY_MH_RMS_H 1
-#define DEF_NARRAY_FLT_RMS_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass)             \
+#define DEF_NARRAY_FLT_RMS_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass)             \
   static void iter_##tDType##_rms(na_loop_t* const lp) {                                       \
     size_t n;                                                                                  \
     char* p1;                                                                                  \

data/ext/numo/narray/src/mh/stddev.h CHANGED Viewed

@@ -1,14 +1,7 @@
-/*
-  stddev.h
-  Numo::NArray Alternative
-  created on: 2025-10-15
-  Copyright (C) 2025 Atsushi Tatsuma
-*/
 #ifndef NUMO_NARRAY_MH_STDDEV_H
 #define NUMO_NARRAY_MH_STDDEV_H 1
-#define DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass)          \
+#define DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass)          \
   static void iter_##tDType##_stddev(na_loop_t* const lp) {                                    \
     size_t n;                                                                                  \
     char* p1;                                                                                  \

data/ext/numo/narray/src/mh/var.h CHANGED Viewed

@@ -1,14 +1,7 @@
-/*
-  var.h
-  Numo::NArray Alternative
-  created on: 2025-10-15
-  Copyright (C) 2025 Atsushi Tatsuma
-*/
 #ifndef NUMO_NARRAY_MH_VAR_H
 #define NUMO_NARRAY_MH_VAR_H 1
-#define DEF_NARRAY_FLT_VAR_METHOD_FUNC(tDType, tRtDType, tNAryClass, tRtNAryClass)             \
+#define DEF_NARRAY_FLT_VAR_METHOD_FUNC(tDType, tNAryClass, tRtDType, tRtNAryClass)             \
   static void iter_##tDType##_var(na_loop_t* const lp) {                                       \
     size_t n;                                                                                  \
     char* p1;                                                                                  \