RubyGems - numo-narray-alt - Versions diffs - 0.9.11 → 0.9.12 - Mend

numo-narray-alt 0.9.11 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

checksums.yaml +4 -4
data/Gemfile +0 -1
data/README.md +7 -0
data/ext/numo/narray/numo/narray.h +2 -2
data/ext/numo/narray/numo/types/robj_macro.h +1 -1
data/ext/numo/narray/src/mh/bincount.h +233 -0
data/ext/numo/narray/src/mh/bit/and.h +225 -0
data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
data/ext/numo/narray/src/mh/bit/not.h +173 -0
data/ext/numo/narray/src/mh/bit/or.h +225 -0
data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
data/ext/numo/narray/src/mh/bit/xor.h +225 -0
data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
data/ext/numo/narray/src/mh/comp/eq.h +26 -0
data/ext/numo/narray/src/mh/comp/ge.h +26 -0
data/ext/numo/narray/src/mh/comp/gt.h +26 -0
data/ext/numo/narray/src/mh/comp/le.h +26 -0
data/ext/numo/narray/src/mh/comp/lt.h +26 -0
data/ext/numo/narray/src/mh/comp/ne.h +26 -0
data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
data/ext/numo/narray/src/mh/divmod.h +142 -0
data/ext/numo/narray/src/mh/eye.h +1 -1
data/ext/numo/narray/src/mh/fill.h +94 -0
data/ext/numo/narray/src/mh/format.h +108 -0
data/ext/numo/narray/src/mh/format_to_a.h +89 -0
data/ext/numo/narray/src/mh/inspect.h +33 -0
data/ext/numo/narray/src/mh/isfinite.h +42 -0
data/ext/numo/narray/src/mh/isinf.h +42 -0
data/ext/numo/narray/src/mh/isnan.h +42 -0
data/ext/numo/narray/src/mh/isneginf.h +42 -0
data/ext/numo/narray/src/mh/isposinf.h +42 -0
data/ext/numo/narray/src/mh/math/acos.h +2 -2
data/ext/numo/narray/src/mh/math/acosh.h +2 -2
data/ext/numo/narray/src/mh/math/asin.h +2 -2
data/ext/numo/narray/src/mh/math/asinh.h +2 -2
data/ext/numo/narray/src/mh/math/atan.h +2 -2
data/ext/numo/narray/src/mh/math/atan2.h +3 -3
data/ext/numo/narray/src/mh/math/atanh.h +2 -2
data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
data/ext/numo/narray/src/mh/math/cos.h +2 -2
data/ext/numo/narray/src/mh/math/cosh.h +2 -2
data/ext/numo/narray/src/mh/math/erf.h +2 -2
data/ext/numo/narray/src/mh/math/erfc.h +2 -2
data/ext/numo/narray/src/mh/math/exp.h +2 -2
data/ext/numo/narray/src/mh/math/exp10.h +2 -2
data/ext/numo/narray/src/mh/math/exp2.h +2 -2
data/ext/numo/narray/src/mh/math/expm1.h +2 -2
data/ext/numo/narray/src/mh/math/frexp.h +3 -3
data/ext/numo/narray/src/mh/math/hypot.h +3 -3
data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
data/ext/numo/narray/src/mh/math/log.h +2 -2
data/ext/numo/narray/src/mh/math/log10.h +2 -2
data/ext/numo/narray/src/mh/math/log1p.h +2 -2
data/ext/numo/narray/src/mh/math/log2.h +2 -2
data/ext/numo/narray/src/mh/math/sin.h +2 -2
data/ext/numo/narray/src/mh/math/sinc.h +2 -2
data/ext/numo/narray/src/mh/math/sinh.h +2 -2
data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
data/ext/numo/narray/src/mh/math/tan.h +2 -2
data/ext/numo/narray/src/mh/math/tanh.h +2 -2
data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
data/ext/numo/narray/src/mh/op/add.h +78 -0
data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
data/ext/numo/narray/src/mh/op/div.h +118 -0
data/ext/numo/narray/src/mh/op/mod.h +108 -0
data/ext/numo/narray/src/mh/op/mul.h +78 -0
data/ext/numo/narray/src/mh/op/sub.h +78 -0
data/ext/numo/narray/src/mh/rand.h +2 -2
data/ext/numo/narray/src/mh/round/ceil.h +11 -0
data/ext/numo/narray/src/mh/round/floor.h +11 -0
data/ext/numo/narray/src/mh/round/rint.h +9 -0
data/ext/numo/narray/src/mh/round/round.h +11 -0
data/ext/numo/narray/src/mh/round/trunc.h +11 -0
data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
data/ext/numo/narray/src/mh/to_a.h +78 -0
data/ext/numo/narray/src/t_bit.c +45 -234
data/ext/numo/narray/src/t_dcomplex.c +584 -1809
data/ext/numo/narray/src/t_dfloat.c +429 -2432
data/ext/numo/narray/src/t_int16.c +481 -2283
data/ext/numo/narray/src/t_int32.c +481 -2283
data/ext/numo/narray/src/t_int64.c +481 -2283
data/ext/numo/narray/src/t_int8.c +408 -1873
data/ext/numo/narray/src/t_robject.c +448 -1977
data/ext/numo/narray/src/t_scomplex.c +584 -1809
data/ext/numo/narray/src/t_sfloat.c +429 -2434
data/ext/numo/narray/src/t_uint16.c +480 -2278
data/ext/numo/narray/src/t_uint32.c +480 -2278
data/ext/numo/narray/src/t_uint64.c +480 -2278
data/ext/numo/narray/src/t_uint8.c +407 -1868
metadata +41 -2

data/ext/numo/narray/src/t_sfloat.c CHANGED Viewed

@@ -42,7 +42,36 @@ static ID id_to_a;
 VALUE cT;
 extern VALUE cRT;
+#include "mh/coerce_cast.h"
+#include "mh/to_a.h"
+#include "mh/fill.h"
+#include "mh/format.h"
+#include "mh/format_to_a.h"
+#include "mh/inspect.h"
+#include "mh/op/add.h"
+#include "mh/op/sub.h"
+#include "mh/op/mul.h"
+#include "mh/op/div.h"
+#include "mh/op/mod.h"
+#include "mh/divmod.h"
+#include "mh/round/floor.h"
+#include "mh/round/round.h"
+#include "mh/round/ceil.h"
+#include "mh/round/trunc.h"
+#include "mh/round/rint.h"
+#include "mh/comp/eq.h"
+#include "mh/comp/ne.h"
+#include "mh/comp/nearly_eq.h"
+#include "mh/comp/gt.h"
+#include "mh/comp/ge.h"
+#include "mh/comp/lt.h"
+#include "mh/comp/le.h"
 #include "mh/clip.h"
+#include "mh/isnan.h"
+#include "mh/isinf.h"
+#include "mh/isposinf.h"
+#include "mh/isneginf.h"
+#include "mh/isfinite.h"
 #include "mh/sum.h"
 #include "mh/prod.h"
 #include "mh/mean.h"
@@ -98,7 +127,43 @@ extern VALUE cRT;
 typedef float sfloat; // Type aliases for shorter notation
                       // following the codebase naming convention.
+DEF_NARRAY_COERCE_CAST_METHOD_FUNC(sfloat)
+DEF_NARRAY_TO_A_METHOD_FUNC(sfloat)
+DEF_NARRAY_FILL_METHOD_FUNC(sfloat)
+DEF_NARRAY_FORMAT_METHOD_FUNC(sfloat)
+DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(sfloat)
+DEF_NARRAY_INSPECT_METHOD_FUNC(sfloat)
+#ifdef __SSE2__
+DEF_NARRAY_SFLT_ADD_SSE2_METHOD_FUNC()
+DEF_NARRAY_SFLT_SUB_SSE2_METHOD_FUNC()
+DEF_NARRAY_SFLT_MUL_SSE2_METHOD_FUNC()
+DEF_NARRAY_SFLT_DIV_SSE2_METHOD_FUNC()
+#else
+DEF_NARRAY_ADD_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_SUB_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_MUL_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_DIV_METHOD_FUNC(sfloat, numo_cSFloat)
+#endif
+DEF_NARRAY_FLT_MOD_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_ROUND_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_CEIL_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_RINT_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_NE_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_GT_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_GE_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_LT_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_LE_METHOD_FUNC(sfloat, numo_cSFloat)
 DEF_NARRAY_CLIP_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_ISINF_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(sfloat, numo_cSFloat)
+DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(sfloat, numo_cSFloat)
 DEF_NARRAY_FLT_SUM_METHOD_FUNC(sfloat, numo_cSFloat)
 DEF_NARRAY_FLT_PROD_METHOD_FUNC(sfloat, numo_cSFloat)
 DEF_NARRAY_FLT_MEAN_METHOD_FUNC(sfloat, numo_cSFloat, float, numo_cSFloat)
@@ -1275,171 +1340,6 @@ static VALUE sfloat_aset(int argc, VALUE* argv, VALUE self) {
   return argv[argc];
 }
-static VALUE sfloat_coerce_cast(VALUE self, VALUE type) {
-  return Qnil;
-}
-static void iter_sfloat_to_a(na_loop_t* const lp) {
-  size_t i, s1;
-  char* p1;
-  size_t* idx1;
-  dtype x;
-  volatile VALUE a, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  a = rb_ary_new2(i);
-  rb_ary_push(lp->args[1].value, a);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      y = m_data_to_num(x);
-      rb_ary_push(a, y);
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      y = m_data_to_num(x);
-      rb_ary_push(a, y);
-    }
-  }
-}
-static VALUE sfloat_to_a(VALUE self) {
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
-  ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
-  ndfunc_t ndf = { iter_sfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
-  return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
-}
-static void iter_sfloat_fill(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  ssize_t s1;
-  size_t* idx1;
-  VALUE x = lp->option;
-  dtype y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  y = m_num_to_data(x);
-  if (idx1) {
-    for (; i--;) {
-      SET_DATA_INDEX(p1, idx1, dtype, y);
-    }
-  } else {
-    for (; i--;) {
-      SET_DATA_STRIDE(p1, s1, dtype, y);
-    }
-  }
-}
-static VALUE sfloat_fill(VALUE self, VALUE val) {
-  ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
-  ndfunc_t ndf = { iter_sfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
-  na_ndloop(&ndf, 2, self, val);
-  return self;
-}
-static VALUE format_sfloat(VALUE fmt, dtype* x) {
-  // fix-me
-  char s[48];
-  int n;
-  if (NIL_P(fmt)) {
-    n = m_sprintf(s, *x);
-    return rb_str_new(s, n);
-  }
-  return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
-}
-static void iter_sfloat_format(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype* x;
-  VALUE y;
-  VALUE fmt = lp->option;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR(lp, 1, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      x = (dtype*)(p1 + *idx1);
-      idx1++;
-      y = format_sfloat(fmt, x);
-      SET_DATA_STRIDE(p2, s2, VALUE, y);
-    }
-  } else {
-    for (; i--;) {
-      x = (dtype*)p1;
-      p1 += s1;
-      y = format_sfloat(fmt, x);
-      SET_DATA_STRIDE(p2, s2, VALUE, y);
-    }
-  }
-}
-static VALUE sfloat_format(int argc, VALUE* argv, VALUE self) {
-  VALUE fmt = Qnil;
-  ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
-  ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
-  ndfunc_t ndf = { iter_sfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
-  rb_scan_args(argc, argv, "01", &fmt);
-  return na_ndloop(&ndf, 2, self, fmt);
-}
-static void iter_sfloat_format_to_a(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  ssize_t s1;
-  size_t* idx1;
-  dtype* x;
-  VALUE y;
-  volatile VALUE a;
-  VALUE fmt = lp->option;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  a = rb_ary_new2(i);
-  rb_ary_push(lp->args[1].value, a);
-  if (idx1) {
-    for (; i--;) {
-      x = (dtype*)(p1 + *idx1);
-      idx1++;
-      y = format_sfloat(fmt, x);
-      rb_ary_push(a, y);
-    }
-  } else {
-    for (; i--;) {
-      x = (dtype*)p1;
-      p1 += s1;
-      y = format_sfloat(fmt, x);
-      rb_ary_push(a, y);
-    }
-  }
-}
-static VALUE sfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
-  VALUE fmt = Qnil;
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
-  ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
-  ndfunc_t ndf = { iter_sfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
-  rb_scan_args(argc, argv, "01", &fmt);
-  return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
-}
-static VALUE iter_sfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
-  return format_sfloat(fmt, (dtype*)(ptr + pos));
-}
-static VALUE sfloat_inspect(VALUE ary) {
-  return na_ndloop_inspect(ary, iter_sfloat_inspect, Qnil);
-}
 static void iter_sfloat_each(na_loop_t* const lp) {
   size_t i, s1;
   char* p1;
@@ -1722,2354 +1622,461 @@ static VALUE sfloat_abs(VALUE self) {
   return na_ndloop(&ndf, 1, self);
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_sfloat_add(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
+static void iter_sfloat_pow(na_loop_t* const lp) {
+  size_t i;
   char *p1, *p2, *p3;
   ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128 a;
-  __m128 b;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
-  INIT_COUNTER(lp, n);
+  dtype x, y;
+  INIT_COUNTER(lp, i);
   INIT_PTR(lp, 0, p1, s1);
   INIT_PTR(lp, 1, p2, s2);
   INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_add_ps(a, b);
-            _mm_store_ps(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_add_ps(a, b);
-            _mm_stream_ps(&((dtype*)p3)[i], a);
-          }
-        }
-      }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-      }
-#endif
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
-      //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_ps(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_add_ps(a, b);
-                _mm_store_ps(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_add_ps(a, b);
-                _mm_stream_ps(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
+  for (; i--;) {
     GET_DATA_STRIDE(p1, s1, dtype, x);
     GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_add(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
+    x = m_pow(x, y);
+    SET_DATA_STRIDE(p3, s3, dtype, x);
+  }
+}
+static void iter_sfloat_pow_int32(na_loop_t* const lp) {
+  size_t i;
+  char *p1, *p2, *p3;
+  ssize_t s1, s2, s3;
+  dtype x;
+  int32_t y;
+  INIT_COUNTER(lp, i);
+  INIT_PTR(lp, 0, p1, s1);
+  INIT_PTR(lp, 1, p2, s2);
+  INIT_PTR(lp, 2, p3, s3);
+  for (; i--;) {
+    GET_DATA_STRIDE(p1, s1, dtype, x);
+    GET_DATA_STRIDE(p2, s2, int32_t, y);
+    x = m_pow_int(x, y);
+    SET_DATA_STRIDE(p3, s3, dtype, x);
   }
-  //
 }
-#undef check_intdivzero
-static VALUE sfloat_add_self(VALUE self, VALUE other) {
+static VALUE sfloat_pow_self(VALUE self, VALUE other) {
   ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+  ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
+  ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
+  ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
-  return na_ndloop(&ndf, 2, self, other);
+  // fixme : use na.integer?
+  if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
+    return na_ndloop(&ndf_i, 2, self, other);
+  } else {
+    return na_ndloop(&ndf, 2, self, other);
+  }
 }
-static VALUE sfloat_add(VALUE self, VALUE other) {
+static VALUE sfloat_pow(VALUE self, VALUE other) {
   VALUE klass, v;
   klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
   if (klass == cT) {
-    return sfloat_add_self(self, other);
+    return sfloat_pow_self(self, other);
   } else {
     v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '+', 1, other);
+    return rb_funcall(v, id_pow, 1, other);
   }
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_sfloat_sub(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128 a;
-  __m128 b;
+static void iter_sfloat_minus(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
   INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_sub_ps(a, b);
-            _mm_store_ps(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_sub_ps(a, b);
-            _mm_stream_ps(&((dtype*)p3)[i], a);
-          }
-        }
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
       }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-#endif
-      return;
     }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
       //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_ps(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_sub_ps(a, b);
-                _mm_store_ps(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_sub_ps(a, b);
-                _mm_stream_ps(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
           for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
+            ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
           }
+          return;
         }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
           for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
+            *(dtype*)p2 = m_minus(*(dtype*)p1);
             p1 += s1;
             p2 += s2;
-            p3 += s3;
           }
+          return;
+          //
         }
       }
-      return;
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
       //
     }
   }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_sub(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
 }
-#undef check_intdivzero
-static VALUE sfloat_sub_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+static VALUE sfloat_minus(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_sub(VALUE self, VALUE other) {
-  VALUE klass, v;
+  ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_sub_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '-', 1, other);
-  }
+  return na_ndloop(&ndf, 1, self);
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_sfloat_mul(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128 a;
-  __m128 b;
+static void iter_sfloat_reciprocal(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
   INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
+    }
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      //
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
+          for (i = 0; i < n; i++) {
+            ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
           }
+          return;
         }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_mul_ps(a, b);
-            _mm_store_ps(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_mul_ps(a, b);
-            _mm_stream_ps(&((dtype*)p3)[i], a);
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
+          for (i = 0; i < n; i++) {
+            *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
+            p1 += s1;
+            p2 += s2;
           }
+          return;
+          //
         }
       }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-#endif
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
       //
+    }
+  }
+}
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_ps(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_mul_ps(a, b);
-                _mm_store_ps(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_mul_ps(a, b);
-                _mm_stream_ps(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_mul(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE sfloat_mul_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+static VALUE sfloat_reciprocal(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_mul(VALUE self, VALUE other) {
-  VALUE klass, v;
+  ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_mul_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '*', 1, other);
-  }
+  return na_ndloop(&ndf, 1, self);
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_sfloat_div(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128 a;
-  __m128 b;
+static void iter_sfloat_sign(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
   INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_div_ps(a, b);
-            _mm_store_ps(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_ps(&((dtype*)p1)[i]);
-            b = _mm_load_ps(&((dtype*)p2)[i]);
-            a = _mm_div_ps(a, b);
-            _mm_stream_ps(&((dtype*)p3)[i], a);
-          }
-        }
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
       }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-#endif
-      return;
     }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
       //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_ps(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_div_ps(a, b);
-                _mm_store_ps(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_ps(&((dtype*)p1)[i]);
-                a = _mm_div_ps(a, b);
-                _mm_stream_ps(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
           for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
+            ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
           }
+          return;
         }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
           for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
+            *(dtype*)p2 = m_sign(*(dtype*)p1);
             p1 += s1;
             p2 += s2;
-            p3 += s3;
           }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_div(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE sfloat_div_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_div(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_div_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '/', 1, other);
-  }
-}
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_sfloat_mod(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-      if (p1 == p3) { // inplace case
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      } else {
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      }
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
-      //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-          if (p1 == p3) { // inplace case
-            for (; i < n; i++) {
-              ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          } else {
-            for (; i < n; i++) {
-              ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_mod(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE sfloat_mod_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_mod(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_mod_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '%', 1, other);
-  }
-}
-static void iter_sfloat_divmod(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3, *p4;
-  ssize_t s1, s2, s3, s4;
-  dtype x, y, a, b;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  INIT_PTR(lp, 3, p4, s4);
-  for (i = n; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    m_divmod(x, y, a, b);
-    SET_DATA_STRIDE(p3, s3, dtype, a);
-    SET_DATA_STRIDE(p4, s4, dtype, b);
-  }
-}
-static VALUE sfloat_divmod_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_divmod(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_divmod_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_divmod, 1, other);
-  }
-}
-static void iter_sfloat_pow(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    x = m_pow(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, x);
-  }
-}
-static void iter_sfloat_pow_int32(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x;
-  int32_t y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, int32_t, y);
-    x = m_pow_int(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, x);
-  }
-}
-static VALUE sfloat_pow_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
-  ndfunc_t ndf_i = { iter_sfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
-  // fixme : use na.integer?
-  if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
-    return na_ndloop(&ndf_i, 2, self, other);
-  } else {
-    return na_ndloop(&ndf, 2, self, other);
-  }
-}
-static VALUE sfloat_pow(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_pow_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_pow, 1, other);
-  }
-}
-static void iter_sfloat_minus(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_minus(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_minus(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_minus, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_reciprocal(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_reciprocal(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_sign(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_sign(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_sign(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_square(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_square(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_square(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_square(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_square(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_square(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_square(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_eq(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_eq(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_eq_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_eq(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_eq_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_eq, 1, other);
-  }
-}
-static void iter_sfloat_ne(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_ne(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_ne_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_ne(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_ne_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_ne, 1, other);
-  }
-}
-static void iter_sfloat_nearly_eq(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_nearly_eq(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_nearly_eq_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_nearly_eq(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_nearly_eq_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_nearly_eq, 1, other);
-  }
-}
-static void iter_sfloat_floor(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_floor(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_floor(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_floor, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_round(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_round(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_round(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_round(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_round(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_round(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_round(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_round, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_ceil(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_ceil(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_ceil(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_trunc(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_trunc(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_trunc(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_rint(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_rint(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE sfloat_rint(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_rint, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_sfloat_copysign(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-      if (p1 == p3) { // inplace case
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      } else {
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      }
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
-      //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-          if (p1 == p3) { // inplace case
-            for (; i < n; i++) {
-              ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          } else {
-            for (; i < n; i++) {
-              ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_copysign(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_copysign(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_copysign_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_copysign, 1, other);
-  }
-}
-static void iter_sfloat_signbit(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_signbit(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_signbit(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE sfloat_signbit(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_modf(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x, y, z;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    m_modf(x, y, z);
-    SET_DATA_STRIDE(p2, s2, dtype, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-}
-static VALUE sfloat_modf(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_sfloat_gt(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_gt(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_gt_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_gt(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_gt_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_gt, 1, other);
-  }
-}
-static void iter_sfloat_ge(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_ge(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_ge_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_ge(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_ge_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_ge, 1, other);
-  }
-}
-static void iter_sfloat_lt(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_lt(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_lt_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_lt(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_lt_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_lt, 1, other);
-  }
-}
-static void iter_sfloat_le(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_le(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE sfloat_le_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE sfloat_le(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return sfloat_le_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_le, 1, other);
-  }
-}
-static void iter_sfloat_isnan(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isnan(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isnan(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
+          return;
+          //
+        }
+      }
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
+      //
     }
   }
 }
-static VALUE sfloat_isnan(VALUE self) {
+static VALUE sfloat_sign(VALUE self) {
   ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
+  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
+  ndfunc_t ndf = { iter_sfloat_sign, FULL_LOOP, 1, 1, ain, aout };
   return na_ndloop(&ndf, 1, self);
 }
-static void iter_sfloat_isinf(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
+static void iter_sfloat_square(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
   ssize_t s1, s2;
-  size_t* idx1;
+  size_t *idx1, *idx2;
   dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
+  INIT_COUNTER(lp, n);
   INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
   if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_square(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_square(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
     }
   } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_square(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      //
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
+          for (i = 0; i < n; i++) {
+            ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
+          }
+          return;
+        }
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
+          for (i = 0; i < n; i++) {
+            *(dtype*)p2 = m_square(*(dtype*)p1);
+            p1 += s1;
+            p2 += s2;
+          }
+          return;
+          //
+        }
+      }
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_square(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
+      //
     }
   }
 }
-static VALUE sfloat_isinf(VALUE self) {
+static VALUE sfloat_square(VALUE self) {
   ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
+  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
+  ndfunc_t ndf = { iter_sfloat_square, FULL_LOOP, 1, 1, ain, aout };
   return na_ndloop(&ndf, 1, self);
 }
-static void iter_sfloat_isposinf(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isposinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
+#define check_intdivzero(y)                                                                    \
+  {}
+static void iter_sfloat_copysign(na_loop_t* const lp) {
+  size_t i = 0;
+  size_t n;
+  char *p1, *p2, *p3;
+  ssize_t s1, s2, s3;
+  INIT_COUNTER(lp, n);
+  INIT_PTR(lp, 0, p1, s1);
+  INIT_PTR(lp, 1, p2, s2);
+  INIT_PTR(lp, 2, p3, s3);
+  //
+  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
+      is_aligned(p3, sizeof(dtype))) {
+    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
+      if (p1 == p3) { // inplace case
+        for (; i < n; i++) {
+          check_intdivzero(((dtype*)p2)[i]);
+          ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
+        }
+      } else {
+        for (; i < n; i++) {
+          check_intdivzero(((dtype*)p2)[i]);
+          ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
+        }
+      }
+      return;
     }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isposinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
+    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
+        is_aligned_step(s3, sizeof(dtype))) {
+      //
+      if (s2 == 0) { // Broadcasting from scalar value.
+        check_intdivzero(*(dtype*)p2);
+        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
+          if (p1 == p3) { // inplace case
+            for (; i < n; i++) {
+              ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
+            }
+          } else {
+            for (; i < n; i++) {
+              ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
+            }
+          }
+        } else {
+          for (i = 0; i < n; i++) {
+            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
+            p1 += s1;
+            p3 += s3;
+          }
+        }
+      } else {
+        if (p1 == p3) { // inplace case
+          for (i = 0; i < n; i++) {
+            check_intdivzero(*(dtype*)p2);
+            *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
+            p1 += s1;
+            p2 += s2;
+          }
+        } else {
+          for (i = 0; i < n; i++) {
+            check_intdivzero(*(dtype*)p2);
+            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
+            p1 += s1;
+            p2 += s2;
+            p3 += s3;
+          }
+        }
+      }
+      return;
+      //
     }
   }
+  for (i = 0; i < n; i++) {
+    dtype x, y, z;
+    GET_DATA_STRIDE(p1, s1, dtype, x);
+    GET_DATA_STRIDE(p2, s2, dtype, y);
+    check_intdivzero(y);
+    z = m_copysign(x, y);
+    SET_DATA_STRIDE(p3, s3, dtype, z);
+  }
+  //
 }
+#undef check_intdivzero
-static VALUE sfloat_isposinf(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
+static VALUE sfloat_copysign_self(VALUE self, VALUE other) {
+  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
+  ndfunc_t ndf = { iter_sfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
+  return na_ndloop(&ndf, 2, self, other);
+}
+static VALUE sfloat_copysign(VALUE self, VALUE other) {
+  VALUE klass, v;
+  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
+  if (klass == cT) {
+    return sfloat_copysign_self(self, other);
+  } else {
+    v = rb_funcall(klass, id_cast, 1, self);
+    return rb_funcall(v, id_copysign, 1, other);
+  }
 }
-static void iter_sfloat_isneginf(na_loop_t* const lp) {
+static void iter_sfloat_signbit(na_loop_t* const lp) {
   size_t i;
   char* p1;
   BIT_DIGIT* a2;
@@ -4084,61 +2091,49 @@ static void iter_sfloat_isneginf(na_loop_t* const lp) {
   if (idx1) {
     for (; i--;) {
       GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isneginf(x)) ? 1 : 0;
+      b = (m_signbit(x)) ? 1 : 0;
       STORE_BIT(a2, p2, b);
       p2 += s2;
     }
   } else {
     for (; i--;) {
       GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isneginf(x)) ? 1 : 0;
+      b = (m_signbit(x)) ? 1 : 0;
       STORE_BIT(a2, p2, b);
       p2 += s2;
     }
   }
 }
-static VALUE sfloat_isneginf(VALUE self) {
+static VALUE sfloat_signbit(VALUE self) {
   ndfunc_arg_in_t ain[1] = { { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
+  ndfunc_t ndf = { iter_sfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
   return na_ndloop(&ndf, 1, self);
 }
-static void iter_sfloat_isfinite(na_loop_t* const lp) {
+static void iter_sfloat_modf(na_loop_t* const lp) {
   size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
+  char *p1, *p2, *p3;
+  ssize_t s1, s2, s3;
+  dtype x, y, z;
   INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isfinite(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isfinite(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
+  INIT_PTR(lp, 0, p1, s1);
+  INIT_PTR(lp, 1, p2, s2);
+  INIT_PTR(lp, 2, p3, s3);
+  for (; i--;) {
+    GET_DATA_STRIDE(p1, s1, dtype, x);
+    m_modf(x, y, z);
+    SET_DATA_STRIDE(p2, s2, dtype, y);
+    SET_DATA_STRIDE(p3, s3, dtype, z);
   }
 }
-static VALUE sfloat_isfinite(VALUE self) {
+static VALUE sfloat_modf(VALUE self) {
   ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_sfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
+  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
+  ndfunc_t ndf = { iter_sfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
   return na_ndloop(&ndf, 1, self);
 }