RubyGems - numo-narray-alt - Versions diffs - 0.9.10 → 0.9.12 - Mend

numo-narray-alt 0.9.10 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

checksums.yaml +4 -4
data/Gemfile +0 -1
data/LICENSE +1 -1
data/README.md +7 -0
data/ext/numo/narray/numo/narray.h +2 -2
data/ext/numo/narray/numo/types/robj_macro.h +1 -1
data/ext/numo/narray/numo/types/robject.h +1 -1
data/ext/numo/narray/src/mh/argmax.h +154 -0
data/ext/numo/narray/src/mh/argmin.h +154 -0
data/ext/numo/narray/src/mh/bincount.h +233 -0
data/ext/numo/narray/src/mh/bit/and.h +225 -0
data/ext/numo/narray/src/mh/bit/left_shift.h +225 -0
data/ext/numo/narray/src/mh/bit/not.h +173 -0
data/ext/numo/narray/src/mh/bit/or.h +225 -0
data/ext/numo/narray/src/mh/bit/right_shift.h +225 -0
data/ext/numo/narray/src/mh/bit/xor.h +225 -0
data/ext/numo/narray/src/mh/clip.h +115 -0
data/ext/numo/narray/src/mh/coerce_cast.h +9 -0
data/ext/numo/narray/src/mh/comp/binary_func.h +37 -0
data/ext/numo/narray/src/mh/comp/eq.h +26 -0
data/ext/numo/narray/src/mh/comp/ge.h +26 -0
data/ext/numo/narray/src/mh/comp/gt.h +26 -0
data/ext/numo/narray/src/mh/comp/le.h +26 -0
data/ext/numo/narray/src/mh/comp/lt.h +26 -0
data/ext/numo/narray/src/mh/comp/ne.h +26 -0
data/ext/numo/narray/src/mh/comp/nearly_eq.h +26 -0
data/ext/numo/narray/src/mh/cumprod.h +98 -0
data/ext/numo/narray/src/mh/cumsum.h +98 -0
data/ext/numo/narray/src/mh/divmod.h +142 -0
data/ext/numo/narray/src/mh/eye.h +82 -0
data/ext/numo/narray/src/mh/fill.h +94 -0
data/ext/numo/narray/src/mh/format.h +108 -0
data/ext/numo/narray/src/mh/format_to_a.h +89 -0
data/ext/numo/narray/src/mh/inspect.h +33 -0
data/ext/numo/narray/src/mh/isfinite.h +42 -0
data/ext/numo/narray/src/mh/isinf.h +42 -0
data/ext/numo/narray/src/mh/isnan.h +42 -0
data/ext/numo/narray/src/mh/isneginf.h +42 -0
data/ext/numo/narray/src/mh/isposinf.h +42 -0
data/ext/numo/narray/src/mh/logseq.h +69 -0
data/ext/numo/narray/src/mh/math/acos.h +2 -2
data/ext/numo/narray/src/mh/math/acosh.h +2 -2
data/ext/numo/narray/src/mh/math/asin.h +2 -2
data/ext/numo/narray/src/mh/math/asinh.h +2 -2
data/ext/numo/narray/src/mh/math/atan.h +2 -2
data/ext/numo/narray/src/mh/math/atan2.h +3 -3
data/ext/numo/narray/src/mh/math/atanh.h +2 -2
data/ext/numo/narray/src/mh/math/cbrt.h +2 -2
data/ext/numo/narray/src/mh/math/cos.h +2 -2
data/ext/numo/narray/src/mh/math/cosh.h +2 -2
data/ext/numo/narray/src/mh/math/erf.h +2 -2
data/ext/numo/narray/src/mh/math/erfc.h +2 -2
data/ext/numo/narray/src/mh/math/exp.h +2 -2
data/ext/numo/narray/src/mh/math/exp10.h +2 -2
data/ext/numo/narray/src/mh/math/exp2.h +2 -2
data/ext/numo/narray/src/mh/math/expm1.h +2 -2
data/ext/numo/narray/src/mh/math/frexp.h +3 -3
data/ext/numo/narray/src/mh/math/hypot.h +3 -3
data/ext/numo/narray/src/mh/math/ldexp.h +3 -3
data/ext/numo/narray/src/mh/math/log.h +2 -2
data/ext/numo/narray/src/mh/math/log10.h +2 -2
data/ext/numo/narray/src/mh/math/log1p.h +2 -2
data/ext/numo/narray/src/mh/math/log2.h +2 -2
data/ext/numo/narray/src/mh/math/sin.h +2 -2
data/ext/numo/narray/src/mh/math/sinc.h +2 -2
data/ext/numo/narray/src/mh/math/sinh.h +2 -2
data/ext/numo/narray/src/mh/math/sqrt.h +8 -8
data/ext/numo/narray/src/mh/math/tan.h +2 -2
data/ext/numo/narray/src/mh/math/tanh.h +2 -2
data/ext/numo/narray/src/mh/math/unary_func.h +3 -3
data/ext/numo/narray/src/mh/max.h +69 -0
data/ext/numo/narray/src/mh/max_index.h +184 -0
data/ext/numo/narray/src/mh/maximum.h +116 -0
data/ext/numo/narray/src/mh/min.h +69 -0
data/ext/numo/narray/src/mh/min_index.h +184 -0
data/ext/numo/narray/src/mh/minimum.h +116 -0
data/ext/numo/narray/src/mh/minmax.h +77 -0
data/ext/numo/narray/src/mh/mulsum.h +185 -0
data/ext/numo/narray/src/mh/op/add.h +78 -0
data/ext/numo/narray/src/mh/op/binary_func.h +423 -0
data/ext/numo/narray/src/mh/op/div.h +118 -0
data/ext/numo/narray/src/mh/op/mod.h +108 -0
data/ext/numo/narray/src/mh/op/mul.h +78 -0
data/ext/numo/narray/src/mh/op/sub.h +78 -0
data/ext/numo/narray/src/mh/prod.h +69 -0
data/ext/numo/narray/src/mh/ptp.h +69 -0
data/ext/numo/narray/src/mh/rand.h +315 -0
data/ext/numo/narray/src/mh/round/ceil.h +11 -0
data/ext/numo/narray/src/mh/round/floor.h +11 -0
data/ext/numo/narray/src/mh/round/rint.h +9 -0
data/ext/numo/narray/src/mh/round/round.h +11 -0
data/ext/numo/narray/src/mh/round/trunc.h +11 -0
data/ext/numo/narray/src/mh/round/unary_func.h +127 -0
data/ext/numo/narray/src/mh/seq.h +130 -0
data/ext/numo/narray/src/mh/sum.h +69 -0
data/ext/numo/narray/src/mh/to_a.h +78 -0
data/ext/numo/narray/src/t_bit.c +45 -234
data/ext/numo/narray/src/t_dcomplex.c +608 -2369
data/ext/numo/narray/src/t_dfloat.c +485 -3736
data/ext/numo/narray/src/t_int16.c +743 -3444
data/ext/numo/narray/src/t_int32.c +745 -3445
data/ext/numo/narray/src/t_int64.c +743 -3446
data/ext/numo/narray/src/t_int8.c +678 -3040
data/ext/numo/narray/src/t_robject.c +771 -3548
data/ext/numo/narray/src/t_scomplex.c +607 -2368
data/ext/numo/narray/src/t_sfloat.c +440 -3693
data/ext/numo/narray/src/t_uint16.c +743 -3440
data/ext/numo/narray/src/t_uint32.c +743 -3440
data/ext/numo/narray/src/t_uint64.c +743 -3442
data/ext/numo/narray/src/t_uint8.c +678 -3038
data/lib/numo/narray.rb +2 -3
metadata +62 -3

data/ext/numo/narray/src/t_dfloat.c CHANGED Viewed

@@ -42,10 +42,59 @@ static ID id_to_a;
 VALUE cT;
 extern VALUE cRT;
+#include "mh/coerce_cast.h"
+#include "mh/to_a.h"
+#include "mh/fill.h"
+#include "mh/format.h"
+#include "mh/format_to_a.h"
+#include "mh/inspect.h"
+#include "mh/op/add.h"
+#include "mh/op/sub.h"
+#include "mh/op/mul.h"
+#include "mh/op/div.h"
+#include "mh/op/mod.h"
+#include "mh/divmod.h"
+#include "mh/round/floor.h"
+#include "mh/round/round.h"
+#include "mh/round/ceil.h"
+#include "mh/round/trunc.h"
+#include "mh/round/rint.h"
+#include "mh/comp/eq.h"
+#include "mh/comp/ne.h"
+#include "mh/comp/nearly_eq.h"
+#include "mh/comp/gt.h"
+#include "mh/comp/ge.h"
+#include "mh/comp/lt.h"
+#include "mh/comp/le.h"
+#include "mh/clip.h"
+#include "mh/isnan.h"
+#include "mh/isinf.h"
+#include "mh/isposinf.h"
+#include "mh/isneginf.h"
+#include "mh/isfinite.h"
+#include "mh/sum.h"
+#include "mh/prod.h"
 #include "mh/mean.h"
 #include "mh/var.h"
 #include "mh/stddev.h"
 #include "mh/rms.h"
+#include "mh/min.h"
+#include "mh/max.h"
+#include "mh/ptp.h"
+#include "mh/max_index.h"
+#include "mh/min_index.h"
+#include "mh/argmax.h"
+#include "mh/argmin.h"
+#include "mh/maximum.h"
+#include "mh/minimum.h"
+#include "mh/minmax.h"
+#include "mh/cumsum.h"
+#include "mh/cumprod.h"
+#include "mh/mulsum.h"
+#include "mh/seq.h"
+#include "mh/logseq.h"
+#include "mh/eye.h"
+#include "mh/rand.h"
 #include "mh/math/sqrt.h"
 #include "mh/math/cbrt.h"
 #include "mh/math/log.h"
@@ -78,10 +127,66 @@ extern VALUE cRT;
 typedef double dfloat; // Type aliases for shorter notation
                        // following the codebase naming convention.
+DEF_NARRAY_COERCE_CAST_METHOD_FUNC(dfloat)
+DEF_NARRAY_TO_A_METHOD_FUNC(dfloat)
+DEF_NARRAY_FILL_METHOD_FUNC(dfloat)
+DEF_NARRAY_FORMAT_METHOD_FUNC(dfloat)
+DEF_NARRAY_FORMAT_TO_A_METHOD_FUNC(dfloat)
+DEF_NARRAY_INSPECT_METHOD_FUNC(dfloat)
+#ifdef __SSE2__
+DEF_NARRAY_DFLT_ADD_SSE2_METHOD_FUNC()
+DEF_NARRAY_DFLT_SUB_SSE2_METHOD_FUNC()
+DEF_NARRAY_DFLT_MUL_SSE2_METHOD_FUNC()
+DEF_NARRAY_DFLT_DIV_SSE2_METHOD_FUNC()
+#else
+DEF_NARRAY_ADD_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_SUB_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_MUL_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_DIV_METHOD_FUNC(dfloat, numo_cDFloat)
+#endif
+DEF_NARRAY_FLT_MOD_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_DIVMOD_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_FLOOR_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_ROUND_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_CEIL_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_TRUNC_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_RINT_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_NE_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_NEARLY_EQ_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_GT_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_GE_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_LT_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_LE_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_CLIP_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_ISNAN_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_ISINF_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_ISPOSINF_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_ISNEGINF_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_ISFINITE_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_SUM_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_PROD_METHOD_FUNC(dfloat, numo_cDFloat)
 DEF_NARRAY_FLT_MEAN_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
 DEF_NARRAY_FLT_VAR_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
 DEF_NARRAY_FLT_STDDEV_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
 DEF_NARRAY_FLT_RMS_METHOD_FUNC(dfloat, numo_cDFloat, double, numo_cDFloat)
+DEF_NARRAY_FLT_MIN_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_MAX_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_PTP_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_MAX_INDEX_METHOD_FUNC(dfloat)
+DEF_NARRAY_FLT_MIN_INDEX_METHOD_FUNC(dfloat)
+DEF_NARRAY_FLT_ARGMAX_METHOD_FUNC(dfloat)
+DEF_NARRAY_FLT_ARGMIN_METHOD_FUNC(dfloat)
+DEF_NARRAY_FLT_MAXIMUM_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_MINIMUM_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_MINMAX_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_CUMSUM_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_CUMPROD_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_MULSUM_METHOD_FUNC(dfloat, numo_cDFloat)
+DEF_NARRAY_FLT_SEQ_METHOD_FUNC(dfloat)
+DEF_NARRAY_FLT_LOGSEQ_METHOD_FUNC(dfloat)
+DEF_NARRAY_EYE_METHOD_FUNC(dfloat)
+DEF_NARRAY_FLT_RAND_METHOD_FUNC(dfloat)
 #ifdef __SSE2__
 DEF_NARRAY_FLT_SQRT_SSE2_DBL_METHOD_FUNC(dfloat, numo_cDFloat)
 #else
@@ -1235,171 +1340,6 @@ static VALUE dfloat_aset(int argc, VALUE* argv, VALUE self) {
   return argv[argc];
 }
-static VALUE dfloat_coerce_cast(VALUE self, VALUE type) {
-  return Qnil;
-}
-static void iter_dfloat_to_a(na_loop_t* const lp) {
-  size_t i, s1;
-  char* p1;
-  size_t* idx1;
-  dtype x;
-  volatile VALUE a, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  a = rb_ary_new2(i);
-  rb_ary_push(lp->args[1].value, a);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      y = m_data_to_num(x);
-      rb_ary_push(a, y);
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      y = m_data_to_num(x);
-      rb_ary_push(a, y);
-    }
-  }
-}
-static VALUE dfloat_to_a(VALUE self) {
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
-  ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
-  ndfunc_t ndf = { iter_dfloat_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
-  return na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
-}
-static void iter_dfloat_fill(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  ssize_t s1;
-  size_t* idx1;
-  VALUE x = lp->option;
-  dtype y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  y = m_num_to_data(x);
-  if (idx1) {
-    for (; i--;) {
-      SET_DATA_INDEX(p1, idx1, dtype, y);
-    }
-  } else {
-    for (; i--;) {
-      SET_DATA_STRIDE(p1, s1, dtype, y);
-    }
-  }
-}
-static VALUE dfloat_fill(VALUE self, VALUE val) {
-  ndfunc_arg_in_t ain[2] = { { OVERWRITE, 0 }, { sym_option } };
-  ndfunc_t ndf = { iter_dfloat_fill, FULL_LOOP, 2, 0, ain, 0 };
-  na_ndloop(&ndf, 2, self, val);
-  return self;
-}
-static VALUE format_dfloat(VALUE fmt, dtype* x) {
-  // fix-me
-  char s[48];
-  int n;
-  if (NIL_P(fmt)) {
-    n = m_sprintf(s, *x);
-    return rb_str_new(s, n);
-  }
-  return rb_funcall(fmt, '%', 1, m_data_to_num(*x));
-}
-static void iter_dfloat_format(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype* x;
-  VALUE y;
-  VALUE fmt = lp->option;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR(lp, 1, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      x = (dtype*)(p1 + *idx1);
-      idx1++;
-      y = format_dfloat(fmt, x);
-      SET_DATA_STRIDE(p2, s2, VALUE, y);
-    }
-  } else {
-    for (; i--;) {
-      x = (dtype*)p1;
-      p1 += s1;
-      y = format_dfloat(fmt, x);
-      SET_DATA_STRIDE(p2, s2, VALUE, y);
-    }
-  }
-}
-static VALUE dfloat_format(int argc, VALUE* argv, VALUE self) {
-  VALUE fmt = Qnil;
-  ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_option } };
-  ndfunc_arg_out_t aout[1] = { { numo_cRObject, 0 } };
-  ndfunc_t ndf = { iter_dfloat_format, FULL_LOOP_NIP, 2, 1, ain, aout };
-  rb_scan_args(argc, argv, "01", &fmt);
-  return na_ndloop(&ndf, 2, self, fmt);
-}
-static void iter_dfloat_format_to_a(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  ssize_t s1;
-  size_t* idx1;
-  dtype* x;
-  VALUE y;
-  volatile VALUE a;
-  VALUE fmt = lp->option;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  a = rb_ary_new2(i);
-  rb_ary_push(lp->args[1].value, a);
-  if (idx1) {
-    for (; i--;) {
-      x = (dtype*)(p1 + *idx1);
-      idx1++;
-      y = format_dfloat(fmt, x);
-      rb_ary_push(a, y);
-    }
-  } else {
-    for (; i--;) {
-      x = (dtype*)p1;
-      p1 += s1;
-      y = format_dfloat(fmt, x);
-      rb_ary_push(a, y);
-    }
-  }
-}
-static VALUE dfloat_format_to_a(int argc, VALUE* argv, VALUE self) {
-  VALUE fmt = Qnil;
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { sym_loop_opt }, { sym_option } };
-  ndfunc_arg_out_t aout[1] = { { rb_cArray, 0 } }; // dummy?
-  ndfunc_t ndf = { iter_dfloat_format_to_a, FULL_LOOP_NIP, 3, 1, ain, aout };
-  rb_scan_args(argc, argv, "01", &fmt);
-  return na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
-}
-static VALUE iter_dfloat_inspect(char* ptr, size_t pos, VALUE fmt) {
-  return format_dfloat(fmt, (dtype*)(ptr + pos));
-}
-static VALUE dfloat_inspect(VALUE ary) {
-  return na_ndloop_inspect(ary, iter_dfloat_inspect, Qnil);
-}
 static void iter_dfloat_each(na_loop_t* const lp) {
   size_t i, s1;
   char* p1;
@@ -1682,3369 +1622,354 @@ static VALUE dfloat_abs(VALUE self) {
   return na_ndloop(&ndf, 1, self);
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_dfloat_add(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
+static void iter_dfloat_pow(na_loop_t* const lp) {
+  size_t i;
   char *p1, *p2, *p3;
   ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128d a;
-  __m128d b;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
-  INIT_COUNTER(lp, n);
+  dtype x, y;
+  INIT_COUNTER(lp, i);
   INIT_PTR(lp, 0, p1, s1);
   INIT_PTR(lp, 1, p2, s2);
   INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_add_pd(a, b);
-            _mm_store_pd(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_add_pd(a, b);
-            _mm_stream_pd(&((dtype*)p3)[i], a);
-          }
-        }
-      }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-      }
-#endif
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
-      //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_pd(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_add_pd(a, b);
-                _mm_store_pd(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_add_pd(a, b);
-                _mm_stream_pd(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_add(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_add(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_add(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
+  for (; i--;) {
     GET_DATA_STRIDE(p1, s1, dtype, x);
     GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_add(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
+    x = m_pow(x, y);
+    SET_DATA_STRIDE(p3, s3, dtype, x);
+  }
+}
+static void iter_dfloat_pow_int32(na_loop_t* const lp) {
+  size_t i;
+  char *p1, *p2, *p3;
+  ssize_t s1, s2, s3;
+  dtype x;
+  int32_t y;
+  INIT_COUNTER(lp, i);
+  INIT_PTR(lp, 0, p1, s1);
+  INIT_PTR(lp, 1, p2, s2);
+  INIT_PTR(lp, 2, p3, s3);
+  for (; i--;) {
+    GET_DATA_STRIDE(p1, s1, dtype, x);
+    GET_DATA_STRIDE(p2, s2, int32_t, y);
+    x = m_pow_int(x, y);
+    SET_DATA_STRIDE(p3, s3, dtype, x);
   }
-  //
 }
-#undef check_intdivzero
-static VALUE dfloat_add_self(VALUE self, VALUE other) {
+static VALUE dfloat_pow_self(VALUE self, VALUE other) {
   ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+  ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_add, STRIDE_LOOP, 2, 1, ain, aout };
+  ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
+  ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
-  return na_ndloop(&ndf, 2, self, other);
+  // fixme : use na.integer?
+  if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
+    return na_ndloop(&ndf_i, 2, self, other);
+  } else {
+    return na_ndloop(&ndf, 2, self, other);
+  }
 }
-static VALUE dfloat_add(VALUE self, VALUE other) {
+static VALUE dfloat_pow(VALUE self, VALUE other) {
   VALUE klass, v;
   klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
   if (klass == cT) {
-    return dfloat_add_self(self, other);
+    return dfloat_pow_self(self, other);
   } else {
     v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '+', 1, other);
+    return rb_funcall(v, id_pow, 1, other);
   }
 }
-#define check_intdivzero(y)                                                                    \
-  {}
+static void iter_dfloat_minus(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
-static void iter_dfloat_sub(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128d a;
-  __m128d b;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
   INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_sub_pd(a, b);
-            _mm_store_pd(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_sub_pd(a, b);
-            _mm_stream_pd(&((dtype*)p3)[i], a);
-          }
-        }
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
       }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-#endif
-      return;
     }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
       //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_pd(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_sub_pd(a, b);
-                _mm_store_pd(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_sub_pd(a, b);
-                _mm_stream_pd(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_sub(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
           for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
+            ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
           }
+          return;
         }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_sub(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
           for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_sub(*(dtype*)p1, *(dtype*)p2);
+            *(dtype*)p2 = m_minus(*(dtype*)p1);
             p1 += s1;
             p2 += s2;
-            p3 += s3;
           }
+          return;
+          //
         }
       }
-      return;
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_minus(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
       //
     }
   }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_sub(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
 }
-#undef check_intdivzero
-static VALUE dfloat_sub_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+static VALUE dfloat_minus(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_sub, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_sub(VALUE self, VALUE other) {
-  VALUE klass, v;
+  ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_sub_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '-', 1, other);
-  }
+  return na_ndloop(&ndf, 1, self);
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_dfloat_mul(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128d a;
-  __m128d b;
+static void iter_dfloat_reciprocal(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
   INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-#endif
-            ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
+    }
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      //
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
+          for (i = 0; i < n; i++) {
+            ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
           }
+          return;
         }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_mul_pd(a, b);
-            _mm_store_pd(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_mul_pd(a, b);
-            _mm_stream_pd(&((dtype*)p3)[i], a);
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
+          for (i = 0; i < n; i++) {
+            *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
+            p1 += s1;
+            p2 += s2;
           }
+          return;
+          //
         }
       }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_reciprocal(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-#endif
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
       //
+    }
+  }
+}
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_pd(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-#endif
-                ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
+static VALUE dfloat_reciprocal(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
+  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
+  ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_mul_pd(a, b);
-                _mm_store_pd(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_mul_pd(a, b);
-                _mm_stream_pd(&((dtype*)p3)[i], a);
-              }
-            }
-          }
+  return na_ndloop(&ndf, 1, self);
+}
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_mul(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
+static void iter_dfloat_sign(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
+  INIT_COUNTER(lp, n);
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
+    }
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
+      //
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
           for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
+            ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
           }
+          return;
         }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_mul(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
           for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_mul(*(dtype*)p1, *(dtype*)p2);
+            *(dtype*)p2 = m_sign(*(dtype*)p1);
             p1 += s1;
             p2 += s2;
-            p3 += s3;
           }
+          return;
+          //
         }
       }
-      return;
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_sign(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
+      }
       //
     }
   }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_mul(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
 }
-#undef check_intdivzero
-static VALUE dfloat_mul_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
+static VALUE dfloat_sign(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_mul, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_mul(VALUE self, VALUE other) {
-  VALUE klass, v;
+  ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_mul_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '*', 1, other);
-  }
+  return na_ndloop(&ndf, 1, self);
 }
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_dfloat_div(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-#ifdef __SSE2__
-  size_t cnt;
-  size_t cnt_simd_loop = -1;
-  __m128d a;
-  __m128d b;
+static void iter_dfloat_square(na_loop_t* const lp) {
+  size_t i, n;
+  char *p1, *p2;
+  ssize_t s1, s2;
+  size_t *idx1, *idx2;
+  dtype x;
-  size_t num_pack; // Number of elements packed for SIMD.
-  num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
-#endif
   INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-      // Check number of elements. & Check same alignment.
-      if ((n >= num_pack) &&
-          is_same_aligned3(
-            &((dtype*)p1)[i], &((dtype*)p2)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE
-          )) {
-        // Calculate up to the position just before the start of SIMD computation.
-        cnt = get_count_of_elements_not_aligned_to_simd_size(
-          &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-        );
-#endif
-        if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-#endif
-            ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-#ifdef __SSE2__
-          for (; i < cnt; i++) {
-#else
-        for (; i < n; i++) {
-#endif
-            ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
-#ifdef __SSE2__
-        // Get the count of SIMD computation loops.
-        cnt_simd_loop = (n - i) % num_pack;
+  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-        // SIMD computation.
-        if (p1 == p3) { // inplace case
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_div_pd(a, b);
-            _mm_store_pd(&((dtype*)p1)[i], a);
-          }
-        } else {
-          for (; i < n - cnt_simd_loop; i += num_pack) {
-            a = _mm_load_pd(&((dtype*)p1)[i]);
-            b = _mm_load_pd(&((dtype*)p2)[i]);
-            a = _mm_div_pd(a, b);
-            _mm_stream_pd(&((dtype*)p3)[i], a);
-          }
-        }
+  if (idx1) {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_square(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
       }
-      // Compute the remainder of the SIMD operation.
-      if (cnt_simd_loop != 0) {
-        if (p1 == p3) { // inplace case
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        } else {
-          for (; i < n; i++) {
-            check_intdivzero(((dtype*)p2)[i]);
-            ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], ((dtype*)p2)[i]);
-          }
-        }
+    } else {
+      for (i = 0; i < n; i++) {
+        GET_DATA_INDEX(p1, idx1, dtype, x);
+        x = m_square(x);
+        SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-#endif
-      return;
     }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
+  } else {
+    if (idx2) {
+      for (i = 0; i < n; i++) {
+        GET_DATA_STRIDE(p1, s1, dtype, x);
+        x = m_square(x);
+        SET_DATA_INDEX(p2, idx2, dtype, x);
+      }
+    } else {
       //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-#ifdef __SSE2__
-          // Broadcast a scalar value and use it for SIMD computation.
-          b = _mm_load1_pd(&((dtype*)p2)[0]);
-          // Check number of elements. & Check same alignment.
-          if ((n >= num_pack) &&
-              is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p3)[i], SIMD_ALIGNMENT_SIZE)) {
-            // Calculate up to the position just before the start of SIMD computation.
-            cnt = get_count_of_elements_not_aligned_to_simd_size(
-              &((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype)
-            );
-#endif
-            if (p1 == p3) { // inplace case
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-              check_intdivzero(((dtype*)p2)[i]);
-#endif
-                ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-#ifdef __SSE2__
-              for (; i < cnt; i++) {
-#else
-            for (; i < n; i++) {
-              check_intdivzero(((dtype*)p2)[i]);
-#endif
-                ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-#ifdef __SSE2__
-            // Get the count of SIMD computation loops.
-            cnt_simd_loop = (n - i) % num_pack;
-            // SIMD computation.
-            if (p1 == p3) { // inplace case
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_div_pd(a, b);
-                _mm_store_pd(&((dtype*)p1)[i], a);
-              }
-            } else {
-              for (; i < n - cnt_simd_loop; i += num_pack) {
-                a = _mm_load_pd(&((dtype*)p1)[i]);
-                a = _mm_div_pd(a, b);
-                _mm_stream_pd(&((dtype*)p3)[i], a);
-              }
-            }
-          }
-          // Compute the remainder of the SIMD operation.
-          if (cnt_simd_loop != 0) {
-            if (p1 == p3) { // inplace case
-              for (; i < n; i++) {
-                ((dtype*)p1)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            } else {
-              for (; i < n; i++) {
-                ((dtype*)p3)[i] = m_div(((dtype*)p1)[i], *(dtype*)p2);
-              }
-            }
-          }
-#endif
-        } else {
+      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
+        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
           for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
+            ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
           }
+          return;
         }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_div(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
+        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
+          //
           for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_div(*(dtype*)p1, *(dtype*)p2);
+            *(dtype*)p2 = m_square(*(dtype*)p1);
             p1 += s1;
             p2 += s2;
-            p3 += s3;
           }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_div(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE dfloat_div_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_div, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_div(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_div_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '/', 1, other);
-  }
-}
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_dfloat_mod(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-      if (p1 == p3) { // inplace case
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      } else {
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      }
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
-      //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-          if (p1 == p3) { // inplace case
-            for (; i < n; i++) {
-              ((dtype*)p1)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          } else {
-            for (; i < n; i++) {
-              ((dtype*)p3)[i] = m_mod(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_mod(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_mod(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_mod(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE dfloat_mod_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_mod, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_mod(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_mod_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, '%', 1, other);
-  }
-}
-static void iter_dfloat_divmod(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3, *p4;
-  ssize_t s1, s2, s3, s4;
-  dtype x, y, a, b;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  INIT_PTR(lp, 3, p4, s4);
-  for (i = n; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    m_divmod(x, y, a, b);
-    SET_DATA_STRIDE(p3, s3, dtype, a);
-    SET_DATA_STRIDE(p4, s4, dtype, b);
-  }
-}
-static VALUE dfloat_divmod_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_divmod, STRIDE_LOOP, 2, 2, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_divmod(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_divmod_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_divmod, 1, other);
-  }
-}
-static void iter_dfloat_pow(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    x = m_pow(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, x);
-  }
-}
-static void iter_dfloat_pow_int32(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x;
-  int32_t y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, int32_t, y);
-    x = m_pow_int(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, x);
-  }
-}
-static VALUE dfloat_pow_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_in_t ain_i[2] = { { cT, 0 }, { numo_cInt32, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_pow, STRIDE_LOOP, 2, 1, ain, aout };
-  ndfunc_t ndf_i = { iter_dfloat_pow_int32, STRIDE_LOOP, 2, 1, ain_i, aout };
-  // fixme : use na.integer?
-  if (FIXNUM_P(other) || rb_obj_is_kind_of(other, numo_cInt32)) {
-    return na_ndloop(&ndf_i, 2, self, other);
-  } else {
-    return na_ndloop(&ndf, 2, self, other);
-  }
-}
-static VALUE dfloat_pow(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_pow_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_pow, 1, other);
-  }
-}
-static void iter_dfloat_minus(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_minus(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_minus(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_minus(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
+          return;
+          //
         }
       }
       for (i = 0; i < n; i++) {
         GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_minus(x);
+        x = m_square(x);
         SET_DATA_STRIDE(p2, s2, dtype, x);
       }
-      //
-    }
-  }
-}
-static VALUE dfloat_minus(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_minus, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_reciprocal(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_reciprocal(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_reciprocal(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_reciprocal(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_reciprocal(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_reciprocal, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_sign(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_sign(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_sign(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_sign(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_sign(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_sign, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_square(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_square(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_square(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_square(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_square(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_square(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_square(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_square(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_eq(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_eq(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_eq_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_eq, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_eq(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_eq_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_eq, 1, other);
-  }
-}
-static void iter_dfloat_ne(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_ne(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_ne_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_ne, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_ne(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_ne_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_ne, 1, other);
-  }
-}
-static void iter_dfloat_nearly_eq(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_nearly_eq(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_nearly_eq_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_nearly_eq, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_nearly_eq(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_nearly_eq_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_nearly_eq, 1, other);
-  }
-}
-static void iter_dfloat_floor(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_floor(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_floor(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_floor(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_floor(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_floor, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_round(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_round(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_round(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_round(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_round(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_round(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_round(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_round(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_round, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_ceil(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_ceil(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_ceil(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_ceil(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_ceil(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_ceil, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_trunc(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_trunc(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_trunc(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_trunc(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_trunc(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_trunc, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_rint(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  size_t *idx1, *idx2;
-  dtype x;
-  INIT_COUNTER(lp, n);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_IDX(lp, 1, p2, s2, idx2);
-  if (idx1) {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      for (i = 0; i < n; i++) {
-        GET_DATA_INDEX(p1, idx1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-    }
-  } else {
-    if (idx2) {
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_INDEX(p2, idx2, dtype, x);
-      }
-    } else {
-      //
-      if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype))) {
-        if (s1 == sizeof(dtype) && s2 == sizeof(dtype)) {
-          for (i = 0; i < n; i++) {
-            ((dtype*)p2)[i] = m_rint(((dtype*)p1)[i]);
-          }
-          return;
-        }
-        if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype))) {
-          //
-          for (i = 0; i < n; i++) {
-            *(dtype*)p2 = m_rint(*(dtype*)p1);
-            p1 += s1;
-            p2 += s2;
-          }
-          return;
-          //
-        }
-      }
-      for (i = 0; i < n; i++) {
-        GET_DATA_STRIDE(p1, s1, dtype, x);
-        x = m_rint(x);
-        SET_DATA_STRIDE(p2, s2, dtype, x);
-      }
-      //
-    }
-  }
-}
-static VALUE dfloat_rint(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_rint, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-#define check_intdivzero(y)                                                                    \
-  {}
-static void iter_dfloat_copysign(na_loop_t* const lp) {
-  size_t i = 0;
-  size_t n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  //
-  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
-      is_aligned(p3, sizeof(dtype))) {
-    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
-      if (p1 == p3) { // inplace case
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      } else {
-        for (; i < n; i++) {
-          check_intdivzero(((dtype*)p2)[i]);
-          ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
-        }
-      }
-      return;
-    }
-    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
-        is_aligned_step(s3, sizeof(dtype))) {
-      //
-      if (s2 == 0) { // Broadcasting from scalar value.
-        check_intdivzero(*(dtype*)p2);
-        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
-          if (p1 == p3) { // inplace case
-            for (; i < n; i++) {
-              ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          } else {
-            for (; i < n; i++) {
-              ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
-            }
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p3 += s3;
-          }
-        }
-      } else {
-        if (p1 == p3) { // inplace case
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-          }
-        } else {
-          for (i = 0; i < n; i++) {
-            check_intdivzero(*(dtype*)p2);
-            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
-            p1 += s1;
-            p2 += s2;
-            p3 += s3;
-          }
-        }
-      }
-      return;
-      //
-    }
-  }
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    check_intdivzero(y);
-    z = m_copysign(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-  //
-}
-#undef check_intdivzero
-static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_copysign(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_copysign_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_copysign, 1, other);
-  }
-}
-static void iter_dfloat_signbit(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_signbit(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_signbit(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE dfloat_signbit(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_modf(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x, y, z;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    m_modf(x, y, z);
-    SET_DATA_STRIDE(p2, s2, dtype, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-}
-static VALUE dfloat_modf(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_gt(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_gt(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_gt_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_gt, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_gt(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_gt_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_gt, 1, other);
-  }
-}
-static void iter_dfloat_ge(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_ge(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_ge_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_ge, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_ge(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_ge_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_ge, 1, other);
-  }
-}
-static void iter_dfloat_lt(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_lt(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_lt_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_lt, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_lt(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_lt_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_lt, 1, other);
-  }
-}
-static void iter_dfloat_le(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  BIT_DIGIT* a3;
-  size_t p3;
-  ssize_t s1, s2, s3;
-  dtype x, y;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR_BIT(lp, 2, a3, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    b = (m_le(x, y)) ? 1 : 0;
-    STORE_BIT(a3, p3, b);
-    p3 += s3;
-  }
-}
-static VALUE dfloat_le_self(VALUE self, VALUE other) {
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_le, STRIDE_LOOP, 2, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, other);
-}
-static VALUE dfloat_le(VALUE self, VALUE other) {
-  VALUE klass, v;
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
-  if (klass == cT) {
-    return dfloat_le_self(self, other);
-  } else {
-    v = rb_funcall(klass, id_cast, 1, self);
-    return rb_funcall(v, id_le, 1, other);
-  }
-}
-static void iter_dfloat_clip(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3, *p4;
-  ssize_t s1, s2, s3, s4;
-  dtype x, min, max;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  INIT_PTR(lp, 3, p4, s4);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, min);
-    GET_DATA_STRIDE(p3, s3, dtype, max);
-    if (m_gt(min, max)) {
-      rb_raise(nary_eOperationError, "min is greater than max");
-    }
-    if (m_lt(x, min)) {
-      x = min;
-    }
-    if (m_gt(x, max)) {
-      x = max;
-    }
-    SET_DATA_STRIDE(p4, s4, dtype, x);
-  }
-}
-static void iter_dfloat_clip_min(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x, min;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, min);
-    if (m_lt(x, min)) {
-      x = min;
-    }
-    SET_DATA_STRIDE(p3, s3, dtype, x);
-  }
-}
-static void iter_dfloat_clip_max(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  dtype x, max;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, max);
-    if (m_gt(x, max)) {
-      x = max;
-    }
-    SET_DATA_STRIDE(p3, s3, dtype, x);
-  }
-}
-static VALUE dfloat_clip(VALUE self, VALUE min, VALUE max) {
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf_min = { iter_dfloat_clip_min, STRIDE_LOOP, 2, 1, ain, aout };
-  ndfunc_t ndf_max = { iter_dfloat_clip_max, STRIDE_LOOP, 2, 1, ain, aout };
-  ndfunc_t ndf_both = { iter_dfloat_clip, STRIDE_LOOP, 3, 1, ain, aout };
-  if (RTEST(min)) {
-    if (RTEST(max)) {
-      return na_ndloop(&ndf_both, 3, self, min, max);
-    } else {
-      return na_ndloop(&ndf_min, 2, self, min);
-    }
-  } else {
-    if (RTEST(max)) {
-      return na_ndloop(&ndf_max, 2, self, max);
-    }
-  }
-  rb_raise(rb_eArgError, "min and max are not given");
-  return Qnil;
-}
-static void iter_dfloat_isnan(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isnan(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isnan(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE dfloat_isnan(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_isnan, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_isinf(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE dfloat_isinf(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_isinf, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_isposinf(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isposinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isposinf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE dfloat_isposinf(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_isposinf, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_isneginf(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isneginf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isneginf(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE dfloat_isneginf(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_isneginf, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_isfinite(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
-  BIT_DIGIT* a2;
-  size_t p2;
-  ssize_t s1, s2;
-  size_t* idx1;
-  dtype x;
-  BIT_DIGIT b;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  INIT_PTR_BIT(lp, 1, a2, p2, s2);
-  if (idx1) {
-    for (; i--;) {
-      GET_DATA_INDEX(p1, idx1, dtype, x);
-      b = (m_isfinite(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  } else {
-    for (; i--;) {
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      b = (m_isfinite(x)) ? 1 : 0;
-      STORE_BIT(a2, p2, b);
-      p2 += s2;
-    }
-  }
-}
-static VALUE dfloat_isfinite(VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
-  ndfunc_t ndf = { iter_dfloat_isfinite, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 1, self);
-}
-static void iter_dfloat_sum(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_sum(n, p1, s1);
-}
-static void iter_dfloat_sum_nan(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_sum_nan(n, p1, s1);
-}
-static VALUE dfloat_sum(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_sum_nan);
-  v = na_ndloop(&ndf, 2, self, reduce);
-  return dfloat_extract(v);
-}
-static void iter_dfloat_prod(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_prod(n, p1, s1);
-}
-static void iter_dfloat_prod_nan(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_prod_nan(n, p1, s1);
-}
-static VALUE dfloat_prod(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_prod, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_prod_nan);
-  v = na_ndloop(&ndf, 2, self, reduce);
-  return dfloat_extract(v);
-}
-static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_kahan_sum(n, p1, s1);
-}
-static void iter_dfloat_kahan_sum_nan(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_kahan_sum_nan(n, p1, s1);
-}
-static VALUE dfloat_kahan_sum(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_kahan_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_kahan_sum_nan);
-  v = na_ndloop(&ndf, 2, self, reduce);
-  return dfloat_extract(v);
-}
-static void iter_dfloat_min(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_min(n, p1, s1);
-}
-static void iter_dfloat_min_nan(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_min_nan(n, p1, s1);
-}
-static VALUE dfloat_min(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_min, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_min_nan);
-  v = na_ndloop(&ndf, 2, self, reduce);
-  return dfloat_extract(v);
-}
-static void iter_dfloat_max(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_max(n, p1, s1);
-}
-static void iter_dfloat_max_nan(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_max_nan(n, p1, s1);
-}
-static VALUE dfloat_max(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_max, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_max_nan);
-  v = na_ndloop(&ndf, 2, self, reduce);
-  return dfloat_extract(v);
-}
-static void iter_dfloat_ptp(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_ptp(n, p1, s1);
-}
-static void iter_dfloat_ptp_nan(na_loop_t* const lp) {
-  size_t n;
-  char *p1, *p2;
-  ssize_t s1;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  *(dtype*)p2 = f_ptp_nan(n, p1, s1);
-}
-static VALUE dfloat_ptp(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_ptp, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_ptp_nan);
-  v = na_ndloop(&ndf, 2, self, reduce);
-  return dfloat_extract(v);
-}
-#define idx_t int64_t
-static void iter_dfloat_max_index_index64(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_max_index_index32(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-#define idx_t int64_t
-static void iter_dfloat_max_index_index64_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index_nan(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_max_index_index32_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index_nan(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-static VALUE dfloat_max_index(int argc, VALUE* argv, VALUE self) {
-  narray_t* na;
-  VALUE idx, reduce;
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { Qnil, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
-  ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 3, 1, ain, aout };
-  GetNArray(self, na);
-  if (na->ndim == 0) {
-    return INT2FIX(0);
-  }
-  if (na->size > (~(u_int32_t)0)) {
-    aout[0].type = numo_cInt64;
-    idx = nary_new(numo_cInt64, na->ndim, na->shape);
-    ndf.func = iter_dfloat_max_index_index64;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_max_index_index64_nan);
-  } else {
-    aout[0].type = numo_cInt32;
-    idx = nary_new(numo_cInt32, na->ndim, na->shape);
-    ndf.func = iter_dfloat_max_index_index32;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_max_index_index32_nan);
-  }
-  rb_funcall(idx, rb_intern("seq"), 0);
-  return na_ndloop(&ndf, 3, self, idx, reduce);
-}
-#define idx_t int64_t
-static void iter_dfloat_min_index_index64(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_min_index_index32(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-#define idx_t int64_t
-static void iter_dfloat_min_index_index64_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index_nan(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_min_index_index32_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *i_ptr, *o_ptr;
-  ssize_t d_step, i_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index_nan(n, d_ptr, d_step);
-  INIT_PTR(lp, 1, i_ptr, i_step);
-  o_ptr = NDL_PTR(lp, 2);
-  *(idx_t*)o_ptr = *(idx_t*)(i_ptr + i_step * idx);
-}
-#undef idx_t
-static VALUE dfloat_min_index(int argc, VALUE* argv, VALUE self) {
-  narray_t* na;
-  VALUE idx, reduce;
-  ndfunc_arg_in_t ain[3] = { { Qnil, 0 }, { Qnil, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
-  ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 3, 1, ain, aout };
-  GetNArray(self, na);
-  if (na->ndim == 0) {
-    return INT2FIX(0);
-  }
-  if (na->size > (~(u_int32_t)0)) {
-    aout[0].type = numo_cInt64;
-    idx = nary_new(numo_cInt64, na->ndim, na->shape);
-    ndf.func = iter_dfloat_min_index_index64;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_min_index_index64_nan);
-  } else {
-    aout[0].type = numo_cInt32;
-    idx = nary_new(numo_cInt32, na->ndim, na->shape);
-    ndf.func = iter_dfloat_min_index_index32;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_min_index_index32_nan);
-  }
-  rb_funcall(idx, rb_intern("seq"), 0);
-  return na_ndloop(&ndf, 3, self, idx, reduce);
-}
-#define idx_t int64_t
-static void iter_dfloat_argmax_arg64(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_argmax_arg32(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-#define idx_t int64_t
-static void iter_dfloat_argmax_arg64_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index_nan(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_argmax_arg32_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_max_index_nan(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-static VALUE dfloat_argmax(int argc, VALUE* argv, VALUE self) {
-  narray_t* na;
-  VALUE reduce;
-  ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
-  ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 1, ain, aout };
-  GetNArray(self, na);
-  if (na->ndim == 0) {
-    return INT2FIX(0);
-  }
-  if (na->size > (~(u_int32_t)0)) {
-    aout[0].type = numo_cInt64;
-    ndf.func = iter_dfloat_argmax_arg64;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmax_arg64_nan);
-  } else {
-    aout[0].type = numo_cInt32;
-    ndf.func = iter_dfloat_argmax_arg32;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmax_arg32_nan);
-  }
-  return na_ndloop(&ndf, 2, self, reduce);
-}
-#define idx_t int64_t
-static void iter_dfloat_argmin_arg64(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_argmin_arg32(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-#define idx_t int64_t
-static void iter_dfloat_argmin_arg64_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index_nan(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-#define idx_t int32_t
-static void iter_dfloat_argmin_arg32_nan(na_loop_t* const lp) {
-  size_t n, idx;
-  char *d_ptr, *o_ptr;
-  ssize_t d_step;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, d_ptr, d_step);
-  idx = f_min_index_nan(n, d_ptr, d_step);
-  o_ptr = NDL_PTR(lp, 1);
-  *(idx_t*)o_ptr = (idx_t)idx;
-}
-#undef idx_t
-static VALUE dfloat_argmin(int argc, VALUE* argv, VALUE self) {
-  narray_t* na;
-  VALUE reduce;
-  ndfunc_arg_in_t ain[2] = { { Qnil, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { 0, 0, 0 } };
-  ndfunc_t ndf = { 0, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 1, ain, aout };
-  GetNArray(self, na);
-  if (na->ndim == 0) {
-    return INT2FIX(0);
-  }
-  if (na->size > (~(u_int32_t)0)) {
-    aout[0].type = numo_cInt64;
-    ndf.func = iter_dfloat_argmin_arg64;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmin_arg64_nan);
-  } else {
-    aout[0].type = numo_cInt32;
-    ndf.func = iter_dfloat_argmin_arg32;
-    reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_argmin_arg32_nan);
-  }
-  return na_ndloop(&ndf, 2, self, reduce);
-}
-static void iter_dfloat_minmax(na_loop_t* const lp) {
-  size_t n;
-  char* p1;
-  ssize_t s1;
-  dtype xmin, xmax;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  f_minmax(n, p1, s1, &xmin, &xmax);
-  *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
-  *(dtype*)(lp->args[2].ptr + lp->args[2].iter[0].pos) = xmax;
-}
-static void iter_dfloat_minmax_nan(na_loop_t* const lp) {
-  size_t n;
-  char* p1;
-  ssize_t s1;
-  dtype xmin, xmax;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  f_minmax_nan(n, p1, s1, &xmin, &xmax);
-  *(dtype*)(lp->args[1].ptr + lp->args[1].iter[0].pos) = xmin;
-  *(dtype*)(lp->args[2].ptr + lp->args[2].iter[0].pos) = xmax;
-}
-static VALUE dfloat_minmax(int argc, VALUE* argv, VALUE self) {
-  VALUE reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_t ndf = {
-    iter_dfloat_minmax, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE | NDF_EXTRACT, 2, 2, ain, aout
-  };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_minmax_nan);
-  return na_ndloop(&ndf, 2, self, reduce);
-}
-static void iter_dfloat_s_maximum(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    GET_DATA(p3, dtype, z);
-    z = f_maximum(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-}
-static void iter_dfloat_s_maximum_nan(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    GET_DATA(p3, dtype, z);
-    z = f_maximum_nan(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-}
-static VALUE dfloat_s_maximum(int argc, VALUE* argv, VALUE mod) {
-  VALUE a1 = Qnil;
-  VALUE a2 = Qnil;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_s_maximum, STRIDE_LOOP_NIP, 2, 1, ain, aout };
-  VALUE kw_hash = Qnil;
-  ID kw_table[1] = { id_nan };
-  VALUE opts[1] = { Qundef };
-  rb_scan_args(argc, argv, "20:", &a1, &a2, &kw_hash);
-  rb_get_kwargs(kw_hash, kw_table, 0, 1, opts);
-  if (opts[0] != Qundef) {
-    ndf.func = iter_dfloat_s_maximum_nan;
-  }
-  return na_ndloop(&ndf, 2, a1, a2);
-}
-static void iter_dfloat_s_minimum(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    GET_DATA(p3, dtype, z);
-    z = f_minimum(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-}
-static void iter_dfloat_s_minimum_nan(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
-  for (i = 0; i < n; i++) {
-    dtype x, y, z;
-    GET_DATA_STRIDE(p1, s1, dtype, x);
-    GET_DATA_STRIDE(p2, s2, dtype, y);
-    GET_DATA(p3, dtype, z);
-    z = f_minimum_nan(x, y);
-    SET_DATA_STRIDE(p3, s3, dtype, z);
-  }
-}
-static VALUE dfloat_s_minimum(int argc, VALUE* argv, VALUE mod) {
-  VALUE a1 = Qnil;
-  VALUE a2 = Qnil;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_s_minimum, STRIDE_LOOP_NIP, 2, 1, ain, aout };
-  VALUE kw_hash = Qnil;
-  ID kw_table[1] = { id_nan };
-  VALUE opts[1] = { Qundef };
-  rb_scan_args(argc, argv, "20:", &a1, &a2, &kw_hash);
-  rb_get_kwargs(kw_hash, kw_table, 0, 1, opts);
-  if (opts[0] != Qundef) {
-    ndf.func = iter_dfloat_s_minimum_nan;
-  }
-  return na_ndloop(&ndf, 2, a1, a2);
-}
-static void iter_dfloat_cumsum(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  dtype x, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  GET_DATA_STRIDE(p1, s1, dtype, x);
-  SET_DATA_STRIDE(p2, s2, dtype, x);
-  for (i--; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, y);
-    m_cumsum(x, y);
-    SET_DATA_STRIDE(p2, s2, dtype, x);
-  }
-}
-static void iter_dfloat_cumsum_nan(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  dtype x, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  GET_DATA_STRIDE(p1, s1, dtype, x);
-  SET_DATA_STRIDE(p2, s2, dtype, x);
-  for (i--; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, y);
-    m_cumsum_nan(x, y);
-    SET_DATA_STRIDE(p2, s2, dtype, x);
-  }
-}
-static VALUE dfloat_cumsum(int argc, VALUE* argv, VALUE self) {
-  VALUE reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
-  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = {
-    iter_dfloat_cumsum, STRIDE_LOOP | NDF_FLAT_REDUCE | NDF_CUM, 2, 1, ain, aout
-  };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_cumsum_nan);
-  return na_ndloop(&ndf, 2, self, reduce);
-}
-static void iter_dfloat_cumprod(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  dtype x, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  GET_DATA_STRIDE(p1, s1, dtype, x);
-  SET_DATA_STRIDE(p2, s2, dtype, x);
-  for (i--; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, y);
-    m_cumprod(x, y);
-    SET_DATA_STRIDE(p2, s2, dtype, x);
-  }
-}
-static void iter_dfloat_cumprod_nan(na_loop_t* const lp) {
-  size_t i;
-  char *p1, *p2;
-  ssize_t s1, s2;
-  dtype x, y;
-  INIT_COUNTER(lp, i);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  GET_DATA_STRIDE(p1, s1, dtype, x);
-  SET_DATA_STRIDE(p2, s2, dtype, x);
-  for (i--; i--;) {
-    GET_DATA_STRIDE(p1, s1, dtype, y);
-    m_cumprod_nan(x, y);
-    SET_DATA_STRIDE(p2, s2, dtype, x);
+      //
+    }
   }
 }
-static VALUE dfloat_cumprod(int argc, VALUE* argv, VALUE self) {
-  VALUE reduce;
-  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
+static VALUE dfloat_square(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = {
-    iter_dfloat_cumprod, STRIDE_LOOP | NDF_FLAT_REDUCE | NDF_CUM, 2, 1, ain, aout
-  };
-  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_cumprod_nan);
+  ndfunc_t ndf = { iter_dfloat_square, FULL_LOOP, 1, 1, ain, aout };
-  return na_ndloop(&ndf, 2, self, reduce);
+  return na_ndloop(&ndf, 1, self);
 }
-//
-static void iter_dfloat_mulsum(na_loop_t* const lp) {
-  size_t i, n;
+#define check_intdivzero(y)                                                                    \
+  {}
+static void iter_dfloat_copysign(na_loop_t* const lp) {
+  size_t i = 0;
+  size_t n;
   char *p1, *p2, *p3;
   ssize_t s1, s2, s3;
@@ -5053,376 +1978,200 @@ static void iter_dfloat_mulsum(na_loop_t* const lp) {
   INIT_PTR(lp, 1, p2, s2);
   INIT_PTR(lp, 2, p3, s3);
-  if (s3 == 0) {
-    dtype z;
-    // Reduce loop
-    GET_DATA(p3, dtype, z);
-    for (i = 0; i < n; i++) {
-      dtype x, y;
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      GET_DATA_STRIDE(p2, s2, dtype, y);
-      m_mulsum(x, y, z);
-    }
-    SET_DATA(p3, dtype, z);
-    return;
-  } else {
-    for (i = 0; i < n; i++) {
-      dtype x, y, z;
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      GET_DATA_STRIDE(p2, s2, dtype, y);
-      GET_DATA(p3, dtype, z);
-      m_mulsum(x, y, z);
-      SET_DATA_STRIDE(p3, s3, dtype, z);
+  //
+  if (is_aligned(p1, sizeof(dtype)) && is_aligned(p2, sizeof(dtype)) &&
+      is_aligned(p3, sizeof(dtype))) {
+    if (s1 == sizeof(dtype) && s2 == sizeof(dtype) && s3 == sizeof(dtype)) {
+      if (p1 == p3) { // inplace case
+        for (; i < n; i++) {
+          check_intdivzero(((dtype*)p2)[i]);
+          ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
+        }
+      } else {
+        for (; i < n; i++) {
+          check_intdivzero(((dtype*)p2)[i]);
+          ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], ((dtype*)p2)[i]);
+        }
+      }
+      return;
     }
-  }
-}
-//
-static void iter_dfloat_mulsum_nan(na_loop_t* const lp) {
-  size_t i, n;
-  char *p1, *p2, *p3;
-  ssize_t s1, s2, s3;
-  INIT_COUNTER(lp, n);
-  INIT_PTR(lp, 0, p1, s1);
-  INIT_PTR(lp, 1, p2, s2);
-  INIT_PTR(lp, 2, p3, s3);
+    if (is_aligned_step(s1, sizeof(dtype)) && is_aligned_step(s2, sizeof(dtype)) &&
+        is_aligned_step(s3, sizeof(dtype))) {
+      //
-  if (s3 == 0) {
-    dtype z;
-    // Reduce loop
-    GET_DATA(p3, dtype, z);
-    for (i = 0; i < n; i++) {
-      dtype x, y;
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      GET_DATA_STRIDE(p2, s2, dtype, y);
-      m_mulsum_nan(x, y, z);
-    }
-    SET_DATA(p3, dtype, z);
-    return;
-  } else {
-    for (i = 0; i < n; i++) {
-      dtype x, y, z;
-      GET_DATA_STRIDE(p1, s1, dtype, x);
-      GET_DATA_STRIDE(p2, s2, dtype, y);
-      GET_DATA(p3, dtype, z);
-      m_mulsum_nan(x, y, z);
-      SET_DATA_STRIDE(p3, s3, dtype, z);
+      if (s2 == 0) { // Broadcasting from scalar value.
+        check_intdivzero(*(dtype*)p2);
+        if (s1 == sizeof(dtype) && s3 == sizeof(dtype)) {
+          if (p1 == p3) { // inplace case
+            for (; i < n; i++) {
+              ((dtype*)p1)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
+            }
+          } else {
+            for (; i < n; i++) {
+              ((dtype*)p3)[i] = m_copysign(((dtype*)p1)[i], *(dtype*)p2);
+            }
+          }
+        } else {
+          for (i = 0; i < n; i++) {
+            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
+            p1 += s1;
+            p3 += s3;
+          }
+        }
+      } else {
+        if (p1 == p3) { // inplace case
+          for (i = 0; i < n; i++) {
+            check_intdivzero(*(dtype*)p2);
+            *(dtype*)p1 = m_copysign(*(dtype*)p1, *(dtype*)p2);
+            p1 += s1;
+            p2 += s2;
+          }
+        } else {
+          for (i = 0; i < n; i++) {
+            check_intdivzero(*(dtype*)p2);
+            *(dtype*)p3 = m_copysign(*(dtype*)p1, *(dtype*)p2);
+            p1 += s1;
+            p2 += s2;
+            p3 += s3;
+          }
+        }
+      }
+      return;
+      //
     }
   }
+  for (i = 0; i < n; i++) {
+    dtype x, y, z;
+    GET_DATA_STRIDE(p1, s1, dtype, x);
+    GET_DATA_STRIDE(p2, s2, dtype, y);
+    check_intdivzero(y);
+    z = m_copysign(x, y);
+    SET_DATA_STRIDE(p3, s3, dtype, z);
+  }
+  //
 }
-//
+#undef check_intdivzero
-static VALUE dfloat_mulsum_self(int argc, VALUE* argv, VALUE self) {
-  VALUE v, reduce;
-  VALUE naryv[2];
-  ndfunc_arg_in_t ain[4] = { { cT, 0 }, { cT, 0 }, { sym_reduce, 0 }, { sym_init, 0 } };
+static VALUE dfloat_copysign_self(VALUE self, VALUE other) {
+  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { cT, 0 } };
   ndfunc_arg_out_t aout[1] = { { cT, 0 } };
-  ndfunc_t ndf = { iter_dfloat_mulsum, STRIDE_LOOP_NIP, 4, 1, ain, aout };
-  if (argc < 1) {
-    rb_raise(rb_eArgError, "wrong number of arguments (%d for >=1)", argc);
-  }
-  // should fix below: [self.ndim,other.ndim].max or?
-  naryv[0] = self;
-  naryv[1] = argv[0];
-  //
-  reduce = na_reduce_dimension(argc - 1, argv + 1, 2, naryv, &ndf, iter_dfloat_mulsum_nan);
-  //
+  ndfunc_t ndf = { iter_dfloat_copysign, STRIDE_LOOP, 2, 1, ain, aout };
-  v = na_ndloop(&ndf, 4, self, argv[0], reduce, m_mulsum_init);
-  return dfloat_extract(v);
+  return na_ndloop(&ndf, 2, self, other);
 }
-static VALUE dfloat_mulsum(int argc, VALUE* argv, VALUE self) {
-  //
+static VALUE dfloat_copysign(VALUE self, VALUE other) {
   VALUE klass, v;
-  //
-  if (argc < 1) {
-    rb_raise(rb_eArgError, "wrong number of arguments (%d for >=1)", argc);
-  }
-  //
-  klass = na_upcast(rb_obj_class(self), rb_obj_class(argv[0]));
+  klass = na_upcast(rb_obj_class(self), rb_obj_class(other));
   if (klass == cT) {
-    return dfloat_mulsum_self(argc, argv, self);
+    return dfloat_copysign_self(self, other);
   } else {
     v = rb_funcall(klass, id_cast, 1, self);
-    //
-    return rb_funcallv_kw(v, rb_intern("mulsum"), argc, argv, RB_PASS_CALLED_KEYWORDS);
-    //
+    return rb_funcall(v, id_copysign, 1, other);
   }
-  //
 }
-typedef dtype seq_data_t;
-typedef double seq_count_t;
-typedef struct {
-  seq_data_t beg;
-  seq_data_t step;
-  seq_count_t count;
-} seq_opt_t;
-static void iter_dfloat_seq(na_loop_t* const lp) {
+static void iter_dfloat_signbit(na_loop_t* const lp) {
   size_t i;
   char* p1;
-  ssize_t s1;
+  BIT_DIGIT* a2;
+  size_t p2;
+  ssize_t s1, s2;
   size_t* idx1;
   dtype x;
-  seq_data_t beg, step;
-  seq_count_t c;
-  seq_opt_t* g;
+  BIT_DIGIT b;
   INIT_COUNTER(lp, i);
   INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  g = (seq_opt_t*)(lp->opt_ptr);
-  beg = g->beg;
-  step = g->step;
-  c = g->count;
+  INIT_PTR_BIT(lp, 1, a2, p2, s2);
   if (idx1) {
     for (; i--;) {
-      x = f_seq(beg, step, c++);
-      *(dtype*)(p1 + *idx1) = x;
-      idx1++;
+      GET_DATA_INDEX(p1, idx1, dtype, x);
+      b = (m_signbit(x)) ? 1 : 0;
+      STORE_BIT(a2, p2, b);
+      p2 += s2;
     }
   } else {
     for (; i--;) {
-      x = f_seq(beg, step, c++);
-      *(dtype*)(p1) = x;
-      p1 += s1;
+      GET_DATA_STRIDE(p1, s1, dtype, x);
+      b = (m_signbit(x)) ? 1 : 0;
+      STORE_BIT(a2, p2, b);
+      p2 += s2;
     }
   }
-  g->count = c;
 }
-static VALUE dfloat_seq(int argc, VALUE* args, VALUE self) {
-  seq_opt_t* g;
-  VALUE vbeg = Qnil, vstep = Qnil;
-  ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
-  ndfunc_t ndf = { iter_dfloat_seq, FULL_LOOP, 1, 0, ain, 0 };
-  g = ALLOCA_N(seq_opt_t, 1);
-  g->beg = m_zero;
-  g->step = m_one;
-  g->count = 0;
-  rb_scan_args(argc, args, "02", &vbeg, &vstep);
-  if (vbeg != Qnil) {
-    g->beg = m_num_to_data(vbeg);
-  }
-  if (vstep != Qnil) {
-    g->step = m_num_to_data(vstep);
-  }
+static VALUE dfloat_signbit(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
+  ndfunc_arg_out_t aout[1] = { { numo_cBit, 0 } };
+  ndfunc_t ndf = { iter_dfloat_signbit, FULL_LOOP, 1, 1, ain, aout };
-  na_ndloop3(&ndf, g, 1, self);
-  return self;
+  return na_ndloop(&ndf, 1, self);
 }
-typedef struct {
-  seq_data_t beg;
-  seq_data_t step;
-  seq_data_t base;
-  seq_count_t count;
-} logseq_opt_t;
-static void iter_dfloat_logseq(na_loop_t* const lp) {
+static void iter_dfloat_modf(na_loop_t* const lp) {
   size_t i;
-  char* p1;
-  ssize_t s1;
-  size_t* idx1;
-  dtype x;
-  seq_data_t beg, step, base;
-  seq_count_t c;
-  logseq_opt_t* g;
+  char *p1, *p2, *p3;
+  ssize_t s1, s2, s3;
+  dtype x, y, z;
   INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  g = (logseq_opt_t*)(lp->opt_ptr);
-  beg = g->beg;
-  step = g->step;
-  base = g->base;
-  c = g->count;
-  if (idx1) {
-    for (; i--;) {
-      x = f_seq(beg, step, c++);
-      *(dtype*)(p1 + *idx1) = m_pow(base, x);
-      idx1++;
-    }
-  } else {
-    for (; i--;) {
-      x = f_seq(beg, step, c++);
-      *(dtype*)(p1) = m_pow(base, x);
-      p1 += s1;
-    }
+  INIT_PTR(lp, 0, p1, s1);
+  INIT_PTR(lp, 1, p2, s2);
+  INIT_PTR(lp, 2, p3, s3);
+  for (; i--;) {
+    GET_DATA_STRIDE(p1, s1, dtype, x);
+    m_modf(x, y, z);
+    SET_DATA_STRIDE(p2, s2, dtype, y);
+    SET_DATA_STRIDE(p3, s3, dtype, z);
   }
-  g->count = c;
 }
-static VALUE dfloat_logseq(int argc, VALUE* args, VALUE self) {
-  logseq_opt_t* g;
-  VALUE vbeg, vstep, vbase;
-  ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
-  ndfunc_t ndf = { iter_dfloat_logseq, FULL_LOOP, 1, 0, ain, 0 };
-  g = ALLOCA_N(logseq_opt_t, 1);
-  rb_scan_args(argc, args, "21", &vbeg, &vstep, &vbase);
-  g->beg = m_num_to_data(vbeg);
-  g->step = m_num_to_data(vstep);
-  if (vbase == Qnil) {
-    g->base = m_from_real(10);
-  } else {
-    g->base = m_num_to_data(vbase);
-  }
-  na_ndloop3(&ndf, g, 1, self);
-  return self;
-}
+static VALUE dfloat_modf(VALUE self) {
+  ndfunc_arg_in_t ain[1] = { { cT, 0 } };
+  ndfunc_arg_out_t aout[2] = { { cT, 0 }, { cT, 0 } };
+  ndfunc_t ndf = { iter_dfloat_modf, STRIDE_LOOP, 1, 2, ain, aout };
-static void iter_dfloat_eye(na_loop_t* const lp) {
-  size_t n0, n1;
-  size_t i0, i1;
-  ssize_t s0, s1;
-  char *p0, *p1;
-  char* g;
-  ssize_t kofs;
-  dtype data;
-  g = (char*)(lp->opt_ptr);
-  kofs = *(ssize_t*)g;
-  data = *(dtype*)(g + sizeof(ssize_t));
-  n0 = lp->args[0].shape[0];
-  n1 = lp->args[0].shape[1];
-  s0 = lp->args[0].iter[0].step;
-  s1 = lp->args[0].iter[1].step;
-  p0 = NDL_PTR(lp, 0);
-  for (i0 = 0; i0 < n0; i0++) {
-    p1 = p0;
-    for (i1 = 0; i1 < n1; i1++) {
-      *(dtype*)p1 = (i0 + kofs == i1) ? data : m_zero;
-      p1 += s1;
-    }
-    p0 += s0;
-  }
+  return na_ndloop(&ndf, 1, self);
 }
-static VALUE dfloat_eye(int argc, VALUE* argv, VALUE self) {
-  ndfunc_arg_in_t ain[1] = { { OVERWRITE, 2 } };
-  ndfunc_t ndf = { iter_dfloat_eye, NO_LOOP, 1, 0, ain, 0 };
-  ssize_t kofs;
-  dtype data;
-  char* g;
-  int nd;
-  narray_t* na;
-  // check arguments
-  if (argc > 2) {
-    rb_raise(rb_eArgError, "too many arguments (%d for 0..2)", argc);
-  } else if (argc == 2) {
-    data = m_num_to_data(argv[0]);
-    kofs = NUM2SSIZET(argv[1]);
-  } else if (argc == 1) {
-    data = m_num_to_data(argv[0]);
-    kofs = 0;
-  } else {
-    data = m_one;
-    kofs = 0;
-  }
-  GetNArray(self, na);
-  nd = na->ndim;
-  if (nd < 2) {
-    rb_raise(nary_eDimensionError, "less than 2-d array");
-  }
-  // Diagonal offset from the main diagonal.
-  if (kofs >= 0) {
-    if ((size_t)(kofs) >= na->shape[nd - 1]) {
-      rb_raise(
-        rb_eArgError,
-        "invalid diagonal offset(%" SZF "d) for "
-        "last dimension size(%" SZF "d)",
-        kofs, na->shape[nd - 1]
-      );
-    }
-  } else {
-    if ((size_t)(-kofs) >= na->shape[nd - 2]) {
-      rb_raise(
-        rb_eArgError,
-        "invalid diagonal offset(%" SZF "d) for "
-        "last-1 dimension size(%" SZF "d)",
-        kofs, na->shape[nd - 2]
-      );
-    }
-  }
+static void iter_dfloat_kahan_sum(na_loop_t* const lp) {
+  size_t n;
+  char *p1, *p2;
+  ssize_t s1;
-  g = ALLOCA_N(char, sizeof(ssize_t) + sizeof(dtype));
-  *(ssize_t*)g = kofs;
-  *(dtype*)(g + sizeof(ssize_t)) = data;
+  INIT_COUNTER(lp, n);
+  INIT_PTR(lp, 0, p1, s1);
+  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  na_ndloop3(&ndf, g, 1, self);
-  return self;
+  *(dtype*)p2 = f_kahan_sum(n, p1, s1);
 }
-typedef struct {
-  dtype low;
-  dtype max;
-} rand_opt_t;
-static void iter_dfloat_rand(na_loop_t* const lp) {
-  size_t i;
-  char* p1;
+static void iter_dfloat_kahan_sum_nan(na_loop_t* const lp) {
+  size_t n;
+  char *p1, *p2;
   ssize_t s1;
-  size_t* idx1;
-  dtype x;
-  rand_opt_t* g;
-  dtype low;
-  dtype max;
-  INIT_COUNTER(lp, i);
-  INIT_PTR_IDX(lp, 0, p1, s1, idx1);
-  g = (rand_opt_t*)(lp->opt_ptr);
-  low = g->low;
-  max = g->max;
+  INIT_COUNTER(lp, n);
+  INIT_PTR(lp, 0, p1, s1);
+  p2 = lp->args[1].ptr + lp->args[1].iter[0].pos;
-  if (idx1) {
-    for (; i--;) {
-      x = m_add(m_rand(max), low);
-      SET_DATA_INDEX(p1, idx1, dtype, x);
-    }
-  } else {
-    for (; i--;) {
-      x = m_add(m_rand(max), low);
-      SET_DATA_STRIDE(p1, s1, dtype, x);
-    }
-  }
+  *(dtype*)p2 = f_kahan_sum_nan(n, p1, s1);
 }
-static VALUE dfloat_rand(int argc, VALUE* args, VALUE self) {
-  rand_opt_t g;
-  VALUE v1 = Qnil, v2 = Qnil;
-  dtype high;
-  ndfunc_arg_in_t ain[1] = { { OVERWRITE, 0 } };
-  ndfunc_t ndf = { iter_dfloat_rand, FULL_LOOP, 1, 0, ain, 0 };
-  rb_scan_args(argc, args, "02", &v1, &v2);
-  if (v2 == Qnil) {
-    g.low = m_zero;
-    if (v1 == Qnil) {
-      g.max = high = m_one;
+static VALUE dfloat_kahan_sum(int argc, VALUE* argv, VALUE self) {
+  VALUE v, reduce;
+  ndfunc_arg_in_t ain[2] = { { cT, 0 }, { sym_reduce, 0 } };
+  ndfunc_arg_out_t aout[1] = { { cT, 0 } };
+  ndfunc_t ndf = { iter_dfloat_kahan_sum, STRIDE_LOOP_NIP | NDF_FLAT_REDUCE, 2, 1, ain, aout };
-    } else {
-      g.max = high = m_num_to_data(v1);
-    }
+  reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, iter_dfloat_kahan_sum_nan);
-  } else {
-    g.low = m_num_to_data(v1);
-    high = m_num_to_data(v2);
-    g.max = m_sub(high, g.low);
-  }
+  v = na_ndloop(&ndf, 2, self, reduce);
-  na_ndloop3(&ndf, &g, 1, self);
-  return self;
+  return dfloat_extract(v);
 }
 typedef struct {