RubyGems - cumo - Versions diffs - 0.1.0 - Mend

cumo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

checksums.yaml +7 -0
data/.gitignore +27 -0
data/.travis.yml +5 -0
data/3rd_party/mkmf-cu/.gitignore +36 -0
data/3rd_party/mkmf-cu/Gemfile +3 -0
data/3rd_party/mkmf-cu/LICENSE +21 -0
data/3rd_party/mkmf-cu/README.md +36 -0
data/3rd_party/mkmf-cu/Rakefile +11 -0
data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +4 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +32 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +80 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +157 -0
data/3rd_party/mkmf-cu/mkmf-cu.gemspec +16 -0
data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +67 -0
data/CODE_OF_CONDUCT.md +46 -0
data/Gemfile +8 -0
data/LICENSE.txt +82 -0
data/README.md +252 -0
data/Rakefile +43 -0
data/bench/broadcast_fp32.rb +138 -0
data/bench/cumo_bench.rb +193 -0
data/bench/numo_bench.rb +138 -0
data/bench/reduction_fp32.rb +117 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/cumo.gemspec +32 -0
data/ext/cumo/cuda/cublas.c +278 -0
data/ext/cumo/cuda/driver.c +421 -0
data/ext/cumo/cuda/memory_pool.cpp +185 -0
data/ext/cumo/cuda/memory_pool_impl.cpp +308 -0
data/ext/cumo/cuda/memory_pool_impl.hpp +370 -0
data/ext/cumo/cuda/memory_pool_impl_test.cpp +554 -0
data/ext/cumo/cuda/nvrtc.c +207 -0
data/ext/cumo/cuda/runtime.c +167 -0
data/ext/cumo/cumo.c +148 -0
data/ext/cumo/depend.erb +58 -0
data/ext/cumo/extconf.rb +179 -0
data/ext/cumo/include/cumo.h +25 -0
data/ext/cumo/include/cumo/compat.h +23 -0
data/ext/cumo/include/cumo/cuda/cublas.h +153 -0
data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +187 -0
data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +79 -0
data/ext/cumo/include/cumo/cuda/driver.h +22 -0
data/ext/cumo/include/cumo/cuda/memory_pool.h +28 -0
data/ext/cumo/include/cumo/cuda/nvrtc.h +22 -0
data/ext/cumo/include/cumo/cuda/runtime.h +40 -0
data/ext/cumo/include/cumo/indexer.h +238 -0
data/ext/cumo/include/cumo/intern.h +142 -0
data/ext/cumo/include/cumo/intern_fwd.h +38 -0
data/ext/cumo/include/cumo/intern_kernel.h +6 -0
data/ext/cumo/include/cumo/narray.h +429 -0
data/ext/cumo/include/cumo/narray_kernel.h +149 -0
data/ext/cumo/include/cumo/ndloop.h +95 -0
data/ext/cumo/include/cumo/reduce_kernel.h +126 -0
data/ext/cumo/include/cumo/template.h +158 -0
data/ext/cumo/include/cumo/template_kernel.h +77 -0
data/ext/cumo/include/cumo/types/bit.h +40 -0
data/ext/cumo/include/cumo/types/bit_kernel.h +34 -0
data/ext/cumo/include/cumo/types/complex.h +402 -0
data/ext/cumo/include/cumo/types/complex_kernel.h +414 -0
data/ext/cumo/include/cumo/types/complex_macro.h +382 -0
data/ext/cumo/include/cumo/types/complex_macro_kernel.h +186 -0
data/ext/cumo/include/cumo/types/dcomplex.h +46 -0
data/ext/cumo/include/cumo/types/dcomplex_kernel.h +13 -0
data/ext/cumo/include/cumo/types/dfloat.h +47 -0
data/ext/cumo/include/cumo/types/dfloat_kernel.h +14 -0
data/ext/cumo/include/cumo/types/float_def.h +34 -0
data/ext/cumo/include/cumo/types/float_def_kernel.h +39 -0
data/ext/cumo/include/cumo/types/float_macro.h +191 -0
data/ext/cumo/include/cumo/types/float_macro_kernel.h +158 -0
data/ext/cumo/include/cumo/types/int16.h +24 -0
data/ext/cumo/include/cumo/types/int16_kernel.h +23 -0
data/ext/cumo/include/cumo/types/int32.h +24 -0
data/ext/cumo/include/cumo/types/int32_kernel.h +19 -0
data/ext/cumo/include/cumo/types/int64.h +24 -0
data/ext/cumo/include/cumo/types/int64_kernel.h +19 -0
data/ext/cumo/include/cumo/types/int8.h +24 -0
data/ext/cumo/include/cumo/types/int8_kernel.h +19 -0
data/ext/cumo/include/cumo/types/int_macro.h +67 -0
data/ext/cumo/include/cumo/types/int_macro_kernel.h +48 -0
data/ext/cumo/include/cumo/types/real_accum.h +486 -0
data/ext/cumo/include/cumo/types/real_accum_kernel.h +101 -0
data/ext/cumo/include/cumo/types/robj_macro.h +80 -0
data/ext/cumo/include/cumo/types/robj_macro_kernel.h +0 -0
data/ext/cumo/include/cumo/types/robject.h +27 -0
data/ext/cumo/include/cumo/types/robject_kernel.h +7 -0
data/ext/cumo/include/cumo/types/scomplex.h +46 -0
data/ext/cumo/include/cumo/types/scomplex_kernel.h +13 -0
data/ext/cumo/include/cumo/types/sfloat.h +48 -0
data/ext/cumo/include/cumo/types/sfloat_kernel.h +14 -0
data/ext/cumo/include/cumo/types/uint16.h +25 -0
data/ext/cumo/include/cumo/types/uint16_kernel.h +20 -0
data/ext/cumo/include/cumo/types/uint32.h +25 -0
data/ext/cumo/include/cumo/types/uint32_kernel.h +20 -0
data/ext/cumo/include/cumo/types/uint64.h +25 -0
data/ext/cumo/include/cumo/types/uint64_kernel.h +20 -0
data/ext/cumo/include/cumo/types/uint8.h +25 -0
data/ext/cumo/include/cumo/types/uint8_kernel.h +20 -0
data/ext/cumo/include/cumo/types/uint_macro.h +58 -0
data/ext/cumo/include/cumo/types/uint_macro_kernel.h +38 -0
data/ext/cumo/include/cumo/types/xint_macro.h +169 -0
data/ext/cumo/include/cumo/types/xint_macro_kernel.h +88 -0
data/ext/cumo/narray/SFMT-params.h +97 -0
data/ext/cumo/narray/SFMT-params19937.h +46 -0
data/ext/cumo/narray/SFMT.c +620 -0
data/ext/cumo/narray/SFMT.h +167 -0
data/ext/cumo/narray/array.c +638 -0
data/ext/cumo/narray/data.c +961 -0
data/ext/cumo/narray/gen/cogen.rb +56 -0
data/ext/cumo/narray/gen/cogen_kernel.rb +58 -0
data/ext/cumo/narray/gen/def/bit.rb +37 -0
data/ext/cumo/narray/gen/def/dcomplex.rb +39 -0
data/ext/cumo/narray/gen/def/dfloat.rb +37 -0
data/ext/cumo/narray/gen/def/int16.rb +36 -0
data/ext/cumo/narray/gen/def/int32.rb +36 -0
data/ext/cumo/narray/gen/def/int64.rb +36 -0
data/ext/cumo/narray/gen/def/int8.rb +36 -0
data/ext/cumo/narray/gen/def/robject.rb +37 -0
data/ext/cumo/narray/gen/def/scomplex.rb +39 -0
data/ext/cumo/narray/gen/def/sfloat.rb +37 -0
data/ext/cumo/narray/gen/def/uint16.rb +36 -0
data/ext/cumo/narray/gen/def/uint32.rb +36 -0
data/ext/cumo/narray/gen/def/uint64.rb +36 -0
data/ext/cumo/narray/gen/def/uint8.rb +36 -0
data/ext/cumo/narray/gen/erbpp2.rb +346 -0
data/ext/cumo/narray/gen/narray_def.rb +268 -0
data/ext/cumo/narray/gen/spec.rb +425 -0
data/ext/cumo/narray/gen/tmpl/accum.c +86 -0
data/ext/cumo/narray/gen/tmpl/accum_binary.c +121 -0
data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +61 -0
data/ext/cumo/narray/gen/tmpl/accum_index.c +119 -0
data/ext/cumo/narray/gen/tmpl/accum_index_kernel.cu +66 -0
data/ext/cumo/narray/gen/tmpl/accum_kernel.cu +12 -0
data/ext/cumo/narray/gen/tmpl/alloc_func.c +107 -0
data/ext/cumo/narray/gen/tmpl/allocate.c +37 -0
data/ext/cumo/narray/gen/tmpl/aref.c +66 -0
data/ext/cumo/narray/gen/tmpl/aref_cpu.c +50 -0
data/ext/cumo/narray/gen/tmpl/aset.c +56 -0
data/ext/cumo/narray/gen/tmpl/binary.c +162 -0
data/ext/cumo/narray/gen/tmpl/binary2.c +70 -0
data/ext/cumo/narray/gen/tmpl/binary2_kernel.cu +15 -0
data/ext/cumo/narray/gen/tmpl/binary_kernel.cu +31 -0
data/ext/cumo/narray/gen/tmpl/binary_s.c +45 -0
data/ext/cumo/narray/gen/tmpl/binary_s_kernel.cu +15 -0
data/ext/cumo/narray/gen/tmpl/bincount.c +181 -0
data/ext/cumo/narray/gen/tmpl/cast.c +44 -0
data/ext/cumo/narray/gen/tmpl/cast_array.c +13 -0
data/ext/cumo/narray/gen/tmpl/class.c +9 -0
data/ext/cumo/narray/gen/tmpl/class_kernel.cu +6 -0
data/ext/cumo/narray/gen/tmpl/clip.c +121 -0
data/ext/cumo/narray/gen/tmpl/coerce_cast.c +10 -0
data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +129 -0
data/ext/cumo/narray/gen/tmpl/cond_binary.c +68 -0
data/ext/cumo/narray/gen/tmpl/cond_binary_kernel.cu +18 -0
data/ext/cumo/narray/gen/tmpl/cond_unary.c +46 -0
data/ext/cumo/narray/gen/tmpl/cum.c +50 -0
data/ext/cumo/narray/gen/tmpl/each.c +47 -0
data/ext/cumo/narray/gen/tmpl/each_with_index.c +70 -0
data/ext/cumo/narray/gen/tmpl/ewcomp.c +79 -0
data/ext/cumo/narray/gen/tmpl/ewcomp_kernel.cu +19 -0
data/ext/cumo/narray/gen/tmpl/extract.c +22 -0
data/ext/cumo/narray/gen/tmpl/extract_cpu.c +26 -0
data/ext/cumo/narray/gen/tmpl/extract_data.c +53 -0
data/ext/cumo/narray/gen/tmpl/eye.c +105 -0
data/ext/cumo/narray/gen/tmpl/eye_kernel.cu +19 -0
data/ext/cumo/narray/gen/tmpl/fill.c +52 -0
data/ext/cumo/narray/gen/tmpl/fill_kernel.cu +29 -0
data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +106 -0
data/ext/cumo/narray/gen/tmpl/format.c +62 -0
data/ext/cumo/narray/gen/tmpl/format_to_a.c +49 -0
data/ext/cumo/narray/gen/tmpl/frexp.c +38 -0
data/ext/cumo/narray/gen/tmpl/gemm.c +203 -0
data/ext/cumo/narray/gen/tmpl/init_class.c +20 -0
data/ext/cumo/narray/gen/tmpl/init_module.c +12 -0
data/ext/cumo/narray/gen/tmpl/inspect.c +21 -0
data/ext/cumo/narray/gen/tmpl/lib.c +50 -0
data/ext/cumo/narray/gen/tmpl/lib_kernel.cu +24 -0
data/ext/cumo/narray/gen/tmpl/logseq.c +102 -0
data/ext/cumo/narray/gen/tmpl/logseq_kernel.cu +31 -0
data/ext/cumo/narray/gen/tmpl/map_with_index.c +98 -0
data/ext/cumo/narray/gen/tmpl/median.c +66 -0
data/ext/cumo/narray/gen/tmpl/minmax.c +47 -0
data/ext/cumo/narray/gen/tmpl/module.c +9 -0
data/ext/cumo/narray/gen/tmpl/module_kernel.cu +1 -0
data/ext/cumo/narray/gen/tmpl/new_dim0.c +15 -0
data/ext/cumo/narray/gen/tmpl/new_dim0_kernel.cu +8 -0
data/ext/cumo/narray/gen/tmpl/poly.c +50 -0
data/ext/cumo/narray/gen/tmpl/pow.c +97 -0
data/ext/cumo/narray/gen/tmpl/pow_kernel.cu +29 -0
data/ext/cumo/narray/gen/tmpl/powint.c +17 -0
data/ext/cumo/narray/gen/tmpl/qsort.c +212 -0
data/ext/cumo/narray/gen/tmpl/rand.c +168 -0
data/ext/cumo/narray/gen/tmpl/rand_norm.c +121 -0
data/ext/cumo/narray/gen/tmpl/real_accum_kernel.cu +75 -0
data/ext/cumo/narray/gen/tmpl/seq.c +112 -0
data/ext/cumo/narray/gen/tmpl/seq_kernel.cu +43 -0
data/ext/cumo/narray/gen/tmpl/set2.c +57 -0
data/ext/cumo/narray/gen/tmpl/sort.c +48 -0
data/ext/cumo/narray/gen/tmpl/sort_index.c +111 -0
data/ext/cumo/narray/gen/tmpl/store.c +41 -0
data/ext/cumo/narray/gen/tmpl/store_array.c +187 -0
data/ext/cumo/narray/gen/tmpl/store_array_kernel.cu +58 -0
data/ext/cumo/narray/gen/tmpl/store_bit.c +86 -0
data/ext/cumo/narray/gen/tmpl/store_bit_kernel.cu +66 -0
data/ext/cumo/narray/gen/tmpl/store_from.c +81 -0
data/ext/cumo/narray/gen/tmpl/store_from_kernel.cu +58 -0
data/ext/cumo/narray/gen/tmpl/store_kernel.cu +3 -0
data/ext/cumo/narray/gen/tmpl/store_numeric.c +9 -0
data/ext/cumo/narray/gen/tmpl/to_a.c +43 -0
data/ext/cumo/narray/gen/tmpl/unary.c +132 -0
data/ext/cumo/narray/gen/tmpl/unary2.c +60 -0
data/ext/cumo/narray/gen/tmpl/unary_kernel.cu +72 -0
data/ext/cumo/narray/gen/tmpl/unary_ret2.c +34 -0
data/ext/cumo/narray/gen/tmpl/unary_s.c +86 -0
data/ext/cumo/narray/gen/tmpl/unary_s_kernel.cu +58 -0
data/ext/cumo/narray/gen/tmpl_bit/allocate.c +24 -0
data/ext/cumo/narray/gen/tmpl_bit/aref.c +54 -0
data/ext/cumo/narray/gen/tmpl_bit/aref_cpu.c +57 -0
data/ext/cumo/narray/gen/tmpl_bit/aset.c +56 -0
data/ext/cumo/narray/gen/tmpl_bit/binary.c +98 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +64 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_count_cpu.c +88 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_count_kernel.cu +76 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +133 -0
data/ext/cumo/narray/gen/tmpl_bit/each.c +48 -0
data/ext/cumo/narray/gen/tmpl_bit/each_with_index.c +70 -0
data/ext/cumo/narray/gen/tmpl_bit/extract.c +30 -0
data/ext/cumo/narray/gen/tmpl_bit/extract_cpu.c +29 -0
data/ext/cumo/narray/gen/tmpl_bit/fill.c +69 -0
data/ext/cumo/narray/gen/tmpl_bit/format.c +64 -0
data/ext/cumo/narray/gen/tmpl_bit/format_to_a.c +51 -0
data/ext/cumo/narray/gen/tmpl_bit/inspect.c +21 -0
data/ext/cumo/narray/gen/tmpl_bit/mask.c +136 -0
data/ext/cumo/narray/gen/tmpl_bit/none_p.c +14 -0
data/ext/cumo/narray/gen/tmpl_bit/store_array.c +108 -0
data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +70 -0
data/ext/cumo/narray/gen/tmpl_bit/store_from.c +60 -0
data/ext/cumo/narray/gen/tmpl_bit/to_a.c +47 -0
data/ext/cumo/narray/gen/tmpl_bit/unary.c +81 -0
data/ext/cumo/narray/gen/tmpl_bit/where.c +90 -0
data/ext/cumo/narray/gen/tmpl_bit/where2.c +95 -0
data/ext/cumo/narray/index.c +880 -0
data/ext/cumo/narray/kwargs.c +153 -0
data/ext/cumo/narray/math.c +142 -0
data/ext/cumo/narray/narray.c +1948 -0
data/ext/cumo/narray/ndloop.c +2105 -0
data/ext/cumo/narray/rand.c +45 -0
data/ext/cumo/narray/step.c +474 -0
data/ext/cumo/narray/struct.c +886 -0
data/lib/cumo.rb +3 -0
data/lib/cumo/cuda.rb +11 -0
data/lib/cumo/cuda/compile_error.rb +36 -0
data/lib/cumo/cuda/compiler.rb +161 -0
data/lib/cumo/cuda/device.rb +47 -0
data/lib/cumo/cuda/link_state.rb +31 -0
data/lib/cumo/cuda/module.rb +40 -0
data/lib/cumo/cuda/nvrtc_program.rb +27 -0
data/lib/cumo/linalg.rb +12 -0
data/lib/cumo/narray.rb +2 -0
data/lib/cumo/narray/extra.rb +1278 -0
data/lib/erbpp.rb +294 -0
data/lib/erbpp/line_number.rb +137 -0
data/lib/erbpp/narray_def.rb +381 -0
data/numo-narray-version +1 -0
data/run.gdb +7 -0
metadata +353 -0

data/ext/cumo/include/cumo/types/complex_kernel.h ADDED

@@ -0,0 +1,414 @@
+#ifndef CUMO_COMPLEX_KERNEL_H
+#define CUMO_COMPLEX_KERNEL_H
+__host__ __device__ static inline dtype c_new(rtype r, rtype i) {
+    dtype z;
+    REAL(z) = r;
+    IMAG(z) = i;
+    return z;
+}
+__host__ __device__ static inline dtype c_set_real(dtype x, rtype r) {
+    REAL(x)=r;
+    return x;
+}
+__host__ __device__ static inline dtype c_set_imag(dtype x, rtype i) {
+    IMAG(x)=i;
+    return x;
+}
+//static inline VALUE COMP2NUM(dtype x) {
+//    VALUE v;
+//    v = rb_funcall(rb_intern("Kernel"), rb_intern("Complex"), 2,
+//                   rb_float_new(REAL(x)), rb_float_new(IMAG(x)));
+//    return v;
+//}
+//
+//static inline dtype NUM2COMP(VALUE v) {
+//    dtype z;
+//    REAL(z) = NUM2DBL(rb_funcall(v,id_real,0));
+//    IMAG(z) = NUM2DBL(rb_funcall(v,id_imag,0));
+//    return z;
+//}
+#define c_is_zero(x) (REAL(x)==0 && IMAG(x)==0)
+#define c_eq(x,y) (REAL(x)==REAL(y) && IMAG(x)==IMAG(y))
+#define c_ne(x,y) (REAL(x)!=REAL(y) || IMAG(x)!=IMAG(y))
+#define c_isnan(x) (isnan(REAL(x)) || isnan(IMAG(x)))
+#define c_isinf(x) (isinf(REAL(x)) || isinf(IMAG(x)))
+#define c_isposinf(x) ((isinf(REAL(x)) && signbit(REAL(x))==0) || \
+                       (isinf(IMAG(x)) && signbit(IMAG(x))==0))
+#define c_isneginf(x) ((isinf(REAL(x)) && signbit(REAL(x))) || \
+                       (isinf(IMAG(x)) && signbit(IMAG(x))))
+#define c_isfinite(x) (isfinite(REAL(x)) && isfinite(IMAG(x)))
+__host__ __device__ static inline dtype c_zero() {
+    dtype z;
+    REAL(z) = 0;
+    IMAG(z) = 0;
+    return z;
+}
+__host__ __device__ static inline dtype c_one() {
+    dtype z;
+    REAL(z) = 1;
+    IMAG(z) = 0;
+    return z;
+}
+__host__ __device__ static inline dtype c_minus(dtype x) {
+    dtype z;
+    REAL(z) = -REAL(x);
+    IMAG(z) = -IMAG(x);
+    return z;
+}
+__host__ __device__ static inline dtype c_im(dtype x) {
+    dtype z;
+    REAL(z) = -IMAG(x);
+    IMAG(z) = REAL(x);
+    return z;
+}
+__host__ __device__ static inline dtype c_add(dtype x, dtype y) {
+    dtype z;
+    REAL(z) = REAL(x)+REAL(y);
+    IMAG(z) = IMAG(x)+IMAG(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_sub(dtype x, dtype y) {
+    dtype z;
+    REAL(z) = REAL(x)-REAL(y);
+    IMAG(z) = IMAG(x)-IMAG(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_mul(dtype x, dtype y) {
+    dtype z;
+    REAL(z) = REAL(x)*REAL(y)-IMAG(x)*IMAG(y);
+    IMAG(z) = REAL(x)*IMAG(y)+IMAG(x)*REAL(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_mul_r(dtype x, rtype y) {
+    dtype z;
+    REAL(z) = REAL(x)*y;
+    IMAG(z) = IMAG(x)*y;
+    return z;
+}
+__host__ __device__ static inline dtype c_div(dtype x, dtype y) {
+    dtype z;
+    rtype s,yr,yi;
+    s  = r_hypot(REAL(y),IMAG(y));
+    yr = REAL(y)/s;
+    yi = IMAG(y)/s;
+    REAL(z) = (REAL(x)*yr+IMAG(x)*yi)/s;
+    IMAG(z) = (IMAG(x)*yr-REAL(x)*yi)/s;
+    return z;
+}
+__host__ __device__ static inline dtype c_div_r(dtype x, rtype y) {
+    dtype z;
+    REAL(z) = REAL(x)/y;
+    IMAG(z) = IMAG(x)/y;
+    return z;
+}
+__host__ __device__ static inline dtype c_reciprocal(dtype x) {
+    dtype z;
+    if ( r_abs(REAL(x)) > r_abs(IMAG(x)) ) {
+        IMAG(z) = IMAG(x)/REAL(x);
+        REAL(z) = (1+IMAG(z)*IMAG(z))*REAL(x);
+        IMAG(z) /= -REAL(z);
+        REAL(z) = 1/REAL(z);
+    } else {
+        REAL(z) = REAL(x)/IMAG(x);
+        IMAG(z) = (1+REAL(z)*REAL(z))*IMAG(x);
+        REAL(z) /= IMAG(z);
+        IMAG(z) = -1/IMAG(z);
+    }
+    return z;
+}
+__host__ __device__ static inline dtype c_square(dtype x) {
+    dtype z;
+    REAL(z) = REAL(x)*REAL(x)-IMAG(x)*IMAG(x);
+    IMAG(z) = 2*REAL(x)*IMAG(x);
+    return z;
+}
+__host__ __device__ static inline dtype c_sqrt(dtype x) {
+    dtype z;
+    rtype xr, xi, r;
+    xr = REAL(x)/2;
+    xi = IMAG(x)/2;
+    r  = r_hypot(xr,xi);
+    if (xr>0) {
+        REAL(z) = sqrt(r+xr);
+        IMAG(z) = xi/REAL(z);
+    } else if ( (r-=xr)!=0 ) {
+        IMAG(z) = (xi>=0) ? sqrt(r):-sqrt(r);
+        REAL(z) = xi/IMAG(z);
+    } else {
+        REAL(z) = IMAG(z) = 0;
+    }
+    return z;
+}
+__host__ __device__ static inline dtype c_log(dtype x) {
+    dtype z;
+    REAL(z) = r_log(r_hypot(REAL(x),IMAG(x)));
+    IMAG(z) = r_atan2(IMAG(x),REAL(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_log2(dtype x) {
+    dtype z;
+    z = c_log(x);
+    z = c_mul_r(x,M_LOG2E);
+    return z;
+}
+__host__ __device__ static inline dtype c_log10(dtype x) {
+    dtype z;
+    z = c_log(x);
+    z = c_mul_r(x,M_LOG10E);
+    return z;
+}
+__host__ __device__ static inline dtype c_exp(dtype x) {
+    dtype z;
+    rtype a = r_exp(REAL(x));
+    REAL(z) = a*r_cos(IMAG(x));
+    IMAG(z) = a*r_sin(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_exp2(dtype x) {
+    dtype z;
+    rtype a = r_exp(REAL(x)*M_LN2);
+    REAL(z) = a*r_cos(IMAG(x));
+    IMAG(z) = a*r_sin(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_exp10(dtype x) {
+    dtype z;
+    rtype a = r_exp(REAL(x)*M_LN10);
+    REAL(z) = a*r_cos(IMAG(x));
+    IMAG(z) = a*r_sin(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_sin(dtype x) {
+    dtype z;
+    REAL(z) = r_sin(REAL(x))*r_cosh(IMAG(x));
+    IMAG(z) = r_cos(REAL(x))*r_sinh(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_sinh(dtype x) {
+    dtype z;
+    REAL(z) = r_sinh(REAL(x))*r_cos(IMAG(x));
+    IMAG(z) = r_cosh(REAL(x))*r_sin(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_cos(dtype x) {
+    dtype z;
+    REAL(z) = r_cos(REAL(x))*r_cosh(IMAG(x));
+    IMAG(z) = -r_sin(REAL(x))*r_sinh(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_cosh(dtype x) {
+    dtype z;
+    REAL(z) = r_cosh(REAL(x))*r_cos(IMAG(x));
+    IMAG(z) = r_sinh(REAL(x))*r_sin(IMAG(x));
+    return z;
+}
+__host__ __device__ static inline dtype c_tan(dtype x) {
+    dtype z;
+    rtype c, d;
+    if (r_abs(IMAG(x))<1) {
+        c = r_cos(REAL(x));
+        d = r_sinh(IMAG(x));
+        d = c*c + d*d;
+        REAL(z) = 0.5*r_sin(2*REAL(x))/d;
+        IMAG(z) = 0.5*r_sinh(2*IMAG(x))/d;
+    } else {
+        d = r_exp(-IMAG(x));
+        c = 2*d/(1-d*d);
+        c = c*c;
+        d = r_cos(REAL(x));
+        d = 1.0 + d*d*c;
+        REAL(z) = 0.5*r_sin(2*REAL(x))*c/d;
+        IMAG(z) = 1/r_tanh(IMAG(x))/d;
+    }
+    return z;
+}
+__host__ __device__ static inline dtype c_tanh(dtype x) {
+    dtype z;
+    rtype c, d, s;
+    c = r_cos(IMAG(x));
+    s = r_sinh(REAL(x));
+    d = c*c + s*s;
+    if (r_abs(REAL(x))<1) {
+        REAL(z) = s*r_cosh(REAL(x))/d;
+        IMAG(z) = 0.5*r_sin(2*IMAG(x))/d;
+    } else {
+        c = c / s;
+        c = 1 + c*c;
+        REAL(z) = 1/(r_tanh(REAL(x))*c);
+        IMAG(z) = 0.5*r_sin(2*IMAG(x))/d;
+    }
+    return z;
+}
+__host__ __device__ static inline dtype c_asin(dtype x) {
+    dtype z, y;
+    y = c_square(x);
+    REAL(y) = 1-REAL(y);
+    IMAG(y) = -IMAG(y);
+    y = c_sqrt(y);
+    REAL(y) -= IMAG(x);
+    IMAG(y) += REAL(x);
+    y = c_log(y);
+    REAL(z) = IMAG(y);
+    IMAG(z) = -REAL(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_asinh(dtype x) {
+    dtype z, y;
+    y = c_square(x);
+    REAL(y) += 1;
+    y = c_sqrt(y);
+    REAL(y) += REAL(x);
+    IMAG(y) += IMAG(x);
+    z = c_log(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_acos(dtype x) {
+    dtype z, y;
+    y = c_square(x);
+    REAL(y) = 1-REAL(y);
+    IMAG(y) = -IMAG(y);
+    y = c_sqrt(y);
+    REAL(z) = REAL(x)-IMAG(y);
+    IMAG(z) = IMAG(x)+REAL(y);
+    y = c_log(z);
+    REAL(z) = IMAG(y);
+    IMAG(z) = -REAL(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_acosh(dtype x) {
+    dtype z, y;
+    y = c_square(x);
+    REAL(y) -= 1;
+    y = c_sqrt(y);
+    REAL(y) += REAL(x);
+    IMAG(y) += IMAG(x);
+    z = c_log(y);
+    return z;
+}
+__host__ __device__ static inline dtype c_atan(dtype x) {
+    dtype z, y;
+    REAL(y) = -REAL(x);
+    IMAG(y) = 1-IMAG(x);
+    REAL(z) = REAL(x);
+    IMAG(z) = 1+IMAG(x);
+    y = c_div(z,y);
+    y = c_log(y);
+    REAL(z) = -IMAG(y)/2;
+    IMAG(z) = REAL(y)/2;
+    return z;
+}
+__host__ __device__ static inline dtype c_atanh(dtype x) {
+    dtype z, y;
+    REAL(y) = 1-REAL(x);
+    IMAG(y) = -IMAG(x);
+    REAL(z) = 1+REAL(x);
+    IMAG(z) = IMAG(x);
+    y = c_div(z,y);
+    y = c_log(y);
+    REAL(z) = REAL(y)/2;
+    IMAG(z) = IMAG(y)/2;
+    return z;
+}
+__host__ __device__ static inline dtype c_pow(dtype x, dtype y)
+{
+    dtype z;
+    if (c_is_zero(y)) {
+        z = c_one();
+    } else if (c_is_zero(x) && REAL(y)>0 && IMAG(y)==0) {
+        z = c_zero();
+    } else {
+        z = c_log(x);
+        z = c_mul(y,z);
+        z = c_exp(z);
+    }
+    return z;
+}
+/* only internal use (called by c_pow_int) */
+__host__ __device__ static inline dtype c_pow_positive_int(dtype x, int p)
+{
+    dtype z = c_one();
+    if (p==2) {return c_square(x);}
+    if (p&1) {z = x;}
+    p >>= 1;
+    while (p) {
+	x = c_square(x);
+	if (p&1) z = c_mul(z,x);
+	p >>= 1;
+    }
+    return z;
+}
+__host__ __device__ static inline dtype c_pow_int(dtype x, int p)
+{
+    if (p<0) {
+        x = c_pow_positive_int(x,-p);
+        return c_reciprocal(x);
+    } else {
+        return c_pow_positive_int(x,p);
+    }
+}
+__host__ __device__ static inline dtype c_cbrt(dtype x) {
+    dtype z;
+    z = c_log(x);
+    z = c_div_r(z,3);
+    z = c_exp(z);
+    return z;
+}
+__host__ __device__ static inline rtype c_abs(dtype x) {
+    return r_hypot(REAL(x),IMAG(x));
+}
+__host__ __device__ static inline rtype c_abs_square(dtype x) {
+    return REAL(x)*REAL(x)+IMAG(x)*IMAG(x);
+}
+/*
+static inline rtype c_hypot(dtype x, dtype y) {
+    return r_hypot(c_abs(x),c_abs(y));
+}
+*/
+#endif // CUMO_COMPLEX_KERNEL_H

data/ext/cumo/include/cumo/types/complex_macro.h ADDED

@@ -0,0 +1,382 @@
+#include "float_def.h"
+extern double round(double);
+extern double log2(double);
+extern double exp2(double);
+extern double exp10(double);
+#define r_abs(x)   fabs(x)
+#define r_sqrt(x)  sqrt(x)
+#define r_exp(x)   exp(x)
+#define r_log(x)   log(x)
+#define r_sin(x)   sin(x)
+#define r_cos(x)   cos(x)
+#define r_sinh(x)  sinh(x)
+#define r_cosh(x)  cosh(x)
+#define r_tanh(x)  tanh(x)
+#define r_atan2(y,x)  atan2(y,x)
+#define r_hypot(x,y)  hypot(x,y)
+#include "complex.h"
+static inline dtype c_from_scomplex(scomplex x) {
+    dtype z;
+    REAL(z) = REAL(x);
+    IMAG(z) = IMAG(x);
+    return z;
+}
+static inline dtype c_from_dcomplex(dcomplex x) {
+    dtype z;
+    REAL(z) = REAL(x);
+    IMAG(z) = IMAG(x);
+    return z;
+}
+/* --------------------------- */
+#define m_zero c_zero()
+#define m_one  c_one()
+#define m_num_to_data(x) NUM2COMP(x)
+#define m_data_to_num(x) COMP2NUM(x)
+#define m_from_double(x) c_new(x,0)
+#define m_from_real(x)   c_new(x,0)
+#define m_from_sint(x)   c_new(x,0)
+#define m_from_int32(x)  c_new(x,0)
+#define m_from_int64(x)  c_new(x,0)
+#define m_from_uint32(x) c_new(x,0)
+#define m_from_uint64(x) c_new(x,0)
+#define m_from_scomplex(x) c_from_scomplex(x)
+#define m_from_dcomplex(x) c_from_dcomplex(x)
+#define m_extract(x) COMP2NUM(*(dtype*)x)
+#define m_real(x)  REAL(x)
+#define m_imag(x)  IMAG(x)
+#define m_set_real(x,y)  c_set_real(x,y)
+#define m_set_imag(x,y)  c_set_imag(x,y)
+#define m_add(x,y) c_add(x,y)
+#define m_sub(x,y) c_sub(x,y)
+#define m_mul(x,y) c_mul(x,y)
+#define m_div(x,y) c_div(x,y)
+#define m_mod(x,y) c_mod(x,y)
+#define m_pow(x,y) c_pow(x,y)
+#define m_pow_int(x,y) c_pow_int(x,y)
+#define m_abs(x)   c_abs(x)
+#define m_minus(x) c_minus(x)
+#define m_reciprocal(x) c_reciprocal(x)
+#define m_square(x) c_square(x)
+#define m_floor(x) c_new(floor(REAL(x)),floor(IMAG(x)))
+#define m_round(x) c_new(round(REAL(x)),round(IMAG(x)))
+#define m_ceil(x)  c_new(ceil(REAL(x)),ceil(IMAG(x)))
+#define m_trunc(x) c_new(trunc(REAL(x)),trunc(IMAG(x)))
+#define m_rint(x)  c_new(rint(REAL(x)),rint(IMAG(x)))
+#define m_sign(x)  c_new( \
+ ((REAL(x)==0) ? 0.0:((REAL(x)>0) ? 1.0:((REAL(x)<0) ? -1.0:REAL(x)))), \
+ ((IMAG(x)==0) ? 0.0:((IMAG(x)>0) ? 1.0:((IMAG(x)<0) ? -1.0:IMAG(x)))))
+#define m_copysign(x,y) c_new(copysign(REAL(x),REAL(y)),copysign(IMAG(x),IMAG(y)))
+#define m_im(x)    c_im(x)
+#define m_conj(x)  c_new(REAL(x),-IMAG(x))
+#define m_arg(x)   atan2(IMAG(x),REAL(x))
+#define m_eq(x,y) c_eq(x,y)
+#define m_ne(x,y) c_ne(x,y)
+#define m_nearly_eq(x,y) c_nearly_eq(x,y)
+#define m_isnan(x)    c_isnan(x)
+#define m_isinf(x)    c_isinf(x)
+#define m_isposinf(x) c_isposinf(x)
+#define m_isneginf(x) c_isneginf(x)
+#define m_isfinite(x) c_isfinite(x)
+#define m_sprintf(s,x) sprintf(s,"%g%+gi",REAL(x),IMAG(x))
+#define m_sqrt(x)    c_sqrt(x)
+#define m_cbrt(x)    c_cbrt(x)
+#define m_log(x)     c_log(x)
+#define m_log2(x)    c_log2(x)
+#define m_log10(x)   c_log10(x)
+#define m_exp(x)     c_exp(x)
+#define m_exp2(x)    c_exp2(x)
+#define m_exp10(x)   c_exp10(x)
+#define m_sin(x)     c_sin(x)
+#define m_cos(x)     c_cos(x)
+#define m_tan(x)     c_tan(x)
+#define m_asin(x)    c_asin(x)
+#define m_acos(x)    c_acos(x)
+#define m_atan(x)    c_atan(x)
+#define m_sinh(x)    c_sinh(x)
+#define m_cosh(x)    c_cosh(x)
+#define m_tanh(x)    c_tanh(x)
+#define m_asinh(x)   c_asinh(x)
+#define m_acosh(x)   c_acosh(x)
+#define m_atanh(x)   c_atanh(x)
+#define m_hypot(x,y) c_hypot(x,y)
+#define m_sinc(x)    c_div(c_sin(x),x)
+#define m_sum_init INT2FIX(0)
+#define m_mulsum_init INT2FIX(0)
+#define not_nan(x) (REAL(x)==REAL(x) && IMAG(x)==IMAG(x))
+#define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
+#define m_mulsum_nan(x,y,z) {          \
+        if(not_nan(x) && not_nan(y)) { \
+            z = m_add(m_mul(x,y),z);   \
+        }}
+#define m_cumsum(x,y) {(x)=m_add(x,y);}
+#define m_cumsum_nan(x,y) {      \
+        if (!not_nan(x)) {       \
+            (x) = (y);           \
+        } else if (not_nan(y)) { \
+            (x) = m_add(x,y);    \
+        }}
+#define m_cumprod(x,y) {(x)=m_mul(x,y);}
+#define m_cumprod_nan(x,y) {     \
+        if (!not_nan(x)) {       \
+            (x) = (y);           \
+        } else if (not_nan(y)) { \
+            (x) = m_mul(x,y);    \
+        }}
+static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    dtype x,y;
+    y = c_zero();
+    for (; i--;) {
+        x = *(dtype*)p;
+        y = c_add(x,y);
+        p += stride;
+    }
+    return y;
+}
+static inline dtype f_sum_nan(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    dtype x,y;
+    y = c_zero();
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (not_nan(x)) {
+            y = c_add(x,y);
+        }
+        p += stride;
+    }
+    return y;
+}
+static inline dtype f_kahan_sum(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    dtype x;
+    volatile dtype y,t,r;
+    y = c_zero();
+    r = c_zero();
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (fabs(REAL(x)) > fabs(REAL(y))) {
+            double z=REAL(x); REAL(x)=REAL(y); REAL(y)=z;
+        }
+        if (fabs(IMAG(x)) > fabs(IMAG(y))) {
+            double z=IMAG(x); IMAG(x)=IMAG(y); IMAG(y)=z;
+        }
+        r = c_add(x, r);
+        t = y;
+        y = c_add(r, y);
+        t = c_sub(y, t);
+        r = c_sub(r, t);
+        p += stride;
+    }
+    return y;
+}
+static inline dtype f_kahan_sum_nan(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    dtype x;
+    volatile dtype y,t,r;
+    y = c_zero();
+    r = c_zero();
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (not_nan(x)) {
+            if (fabs(REAL(x)) > fabs(REAL(y))) {
+                double z=REAL(x); REAL(x)=REAL(y); REAL(y)=z;
+            }
+            if (fabs(IMAG(x)) > fabs(IMAG(y))) {
+                double z=IMAG(x); IMAG(x)=IMAG(y); IMAG(y)=z;
+            }
+            r = c_add(x, r);
+            t = y;
+            y = c_add(r, y);
+            t = c_sub(y, t);
+            r = c_sub(r, t);
+        }
+        p += stride;
+    }
+    return y;
+}
+static inline dtype f_prod(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    dtype x,y;
+    y = c_one();
+    for (; i--;) {
+        x = *(dtype*)p;
+        y = c_mul(x,y);
+        p += stride;
+    }
+    return y;
+}
+static inline dtype f_prod_nan(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    dtype x,y;
+    y = c_one();
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (not_nan(x)) {
+            y = c_mul(x,y);
+        }
+        p += stride;
+    }
+    return y;
+}
+static inline dtype f_mean(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    size_t count=0;
+    dtype x,y;
+    y = c_zero();
+    for (; i--;) {
+        x = *(dtype*)p;
+        y = c_add(x,y);
+        count++;
+        p += stride;
+    }
+    return c_div_r(y,count);
+}
+static inline dtype f_mean_nan(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    size_t count=0;
+    dtype x,y;
+    y = c_zero();
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (not_nan(x)) {
+            y = c_add(x,y);
+            count++;
+        }
+        p += stride;
+    }
+    return c_div_r(y,count);
+}
+static inline rtype f_var(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    size_t count=0;
+    dtype x,m;
+    rtype y=0;
+    m = f_mean(n,p,stride);
+    for (; i--;) {
+        x = *(dtype*)p;
+        y += c_abs_square(c_sub(x,m));
+        count++;
+        p += stride;
+    }
+    return y/(count-1);
+}
+static inline rtype f_var_nan(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    size_t count=0;
+    dtype x,m;
+    rtype y=0;
+    m = f_mean_nan(n,p,stride);
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (not_nan(x)) {
+            y += c_abs_square(c_sub(x,m));
+            count++;
+        }
+        p += stride;
+    }
+    return y/(count-1);
+}
+static inline rtype f_stddev(size_t n, char *p, ssize_t stride)
+{
+    return r_sqrt(f_var(n,p,stride));
+}
+static inline rtype f_stddev_nan(size_t n, char *p, ssize_t stride)
+{
+    return r_sqrt(f_var_nan(n,p,stride));
+}
+static inline rtype f_rms(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    size_t count=0;
+    dtype x;
+    rtype y=0;
+    for (; i--;) {
+        x = *(dtype*)p;
+        y += c_abs_square(x);
+        count++;
+        p += stride;
+    }
+    return r_sqrt(y/count);
+}
+static inline rtype f_rms_nan(size_t n, char *p, ssize_t stride)
+{
+    size_t i=n;
+    size_t count=0;
+    dtype x;
+    rtype y=0;
+    for (; i--;) {
+        x = *(dtype*)p;
+        if (not_nan(x)) {
+            y += c_abs_square(x);
+            count++;
+        }
+        p += stride;
+    }
+    return r_sqrt(y/count);
+}
+static inline dtype f_seq(dtype x, dtype y, double c)
+{
+    return c_add(x,c_mul_r(y,c));
+}