RubyGems - gumath - Versions diffs - 0.2.0dev5 → 0.2.0dev8 - Mend

gumath 0.2.0dev5 → 0.2.0dev8

Files changed (99) hide show

checksums.yaml +4 -4
data/CONTRIBUTING.md +7 -2
data/Gemfile +0 -3
data/ext/ruby_gumath/GPATH +0 -0
data/ext/ruby_gumath/GRTAGS +0 -0
data/ext/ruby_gumath/GTAGS +0 -0
data/ext/ruby_gumath/extconf.rb +0 -5
data/ext/ruby_gumath/functions.c +10 -2
data/ext/ruby_gumath/gufunc_object.c +15 -4
data/ext/ruby_gumath/gufunc_object.h +9 -3
data/ext/ruby_gumath/gumath/Makefile +63 -0
data/ext/ruby_gumath/gumath/Makefile.in +1 -0
data/ext/ruby_gumath/gumath/config.h +56 -0
data/ext/ruby_gumath/gumath/config.h.in +3 -0
data/ext/ruby_gumath/gumath/config.log +497 -0
data/ext/ruby_gumath/gumath/config.status +1034 -0
data/ext/ruby_gumath/gumath/configure +375 -4
data/ext/ruby_gumath/gumath/configure.ac +47 -3
data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
data/ext/ruby_gumath/gumath/setup.py +67 -6
data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
data/ext/ruby_gumath/include/gumath.h +55 -14
data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
data/ext/ruby_gumath/lib/libgumath.a +0 -0
data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
data/ext/ruby_gumath/ruby_gumath.c +231 -70
data/ext/ruby_gumath/ruby_gumath.h +4 -1
data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
data/ext/ruby_gumath/util.c +34 -0
data/ext/ruby_gumath/util.h +9 -0
data/gumath.gemspec +3 -2
data/lib/gumath.rb +55 -1
data/lib/gumath/version.rb +2 -2
data/lib/ruby_gumath.so +0 -0
metadata +63 -10
data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449

data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc ADDED

@@ -0,0 +1,368 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017-2018, plures
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* 1. Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*
+* 2. Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*
+* 3. Neither the name of the copyright holder nor the names of its
+*    contributors may be used to endorse or promote products derived from
+*    this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <cinttypes>
+#include <cmath>
+#include <complex>
+#include "cpu_device_unary.h"
+#include "contrib/bfloat16.h"
+/*****************************************************************************/
+/*                          CPU device unary kernels                         */
+/*****************************************************************************/
+#define CPU_DEVICE_UNARY(name, func, t0, t1, common) \
+extern "C" void                                                                   \
+gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1,           \
+                                              const int64_t N)                    \
+{                                                                                 \
+    const t0##_t *x0 = (const t0##_t *)a0;                                        \
+    t1##_t *x1 = (t1##_t *)a1;                                                    \
+                                                                                  \
+    for (int64_t i = 0; i < N; i++) {                                             \
+        x1[i] = func((common##_t)x0[i]);                                          \
+    }                                                                             \
+}                                                                                 \
+                                                                                  \
+extern "C" void                                                                   \
+gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1,           \
+                                              const int64_t s0, const int64_t s1, \
+                                              const int64_t N)                    \
+{                                                                                 \
+    const t0##_t *x0 = (const t0##_t *)a0;                                        \
+    t1##_t *x1 = (t1##_t *)a1;                                                    \
+    int64_t i, k0, k1;                                                            \
+                                                                                  \
+    for (i=0, k0=0, k1=0; i < N; i++, k0+=s0, k1+=s1) {                           \
+        x1[k1] = func((common##_t)x0[k0]);                                        \
+    }                                                                             \
+}                                                                                 \
+                                                                                  \
+extern "C" void                                                                   \
+gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1)                   \
+{                                                                                 \
+    const t0##_t x0 = *((const t0##_t *)a0);                                      \
+    t1##_t *x1 = (t1##_t *)a1;                                                    \
+    *x1 = func((common##_t)x0);                                                   \
+}
+#ifdef _MSC_VER
+  #define CPU_DEVICE_UNARYC(name, func, t0, t1, common)
+#else
+  #define CPU_DEVICE_UNARYC(name, func, t0, t1, common) \
+    CPU_DEVICE_UNARY(name, func, t0, t1, common)
+#endif
+#define CPU_DEVICE_NOIMPL(name, func, t0, t1, common)
+#define CPU_DEVICE_ALL_UNARY(name, func, ufunc, tfunc, hfunc) \
+    CPU_DEVICE_UNARY(name, func, bool, bool, bool)                    \
+    CPU_DEVICE_UNARY(name, ufunc, bool, uint8, uint8)                 \
+    CPU_DEVICE_UNARY(name, ufunc, bool, uint16, uint16)               \
+    CPU_DEVICE_UNARY(name, ufunc, bool, uint32, uint32)               \
+    CPU_DEVICE_UNARY(name, ufunc, bool, uint64, uint64)               \
+    CPU_DEVICE_UNARY(name, func, bool, int8, int8)                    \
+    CPU_DEVICE_UNARY(name, func, bool, int16, int16)                  \
+    CPU_DEVICE_UNARY(name, func, bool, int32, int32)                  \
+    CPU_DEVICE_UNARY(name, func, bool, int64, int64)                  \
+    CPU_DEVICE_UNARY(name, tfunc, bool, bfloat16, bfloat16)           \
+    CPU_DEVICE_NOIMPL(name, hfunc, bool, float16, float16)            \
+    CPU_DEVICE_UNARY(name, func, bool, float32, float32)              \
+    CPU_DEVICE_UNARY(name, func, bool, float64, float64)              \
+    CPU_DEVICE_NOIMPL(name, func, bool, complex32, complex32)         \
+    CPU_DEVICE_UNARYC(name, func, bool, complex64, complex64)         \
+    CPU_DEVICE_UNARYC(name, func, bool, complex128, complex128)       \
+                                                                      \
+    CPU_DEVICE_UNARY(name, ufunc, uint8, uint8, uint8)                \
+    CPU_DEVICE_UNARY(name, ufunc, uint8, uint16, uint16)              \
+    CPU_DEVICE_UNARY(name, ufunc, uint8, uint32, uint32)              \
+    CPU_DEVICE_UNARY(name, ufunc, uint8, uint64, uint64)              \
+    CPU_DEVICE_UNARY(name, func, uint8, int16, int16)                 \
+    CPU_DEVICE_UNARY(name, func, uint8, int32, int32)                 \
+    CPU_DEVICE_UNARY(name, func, uint8, int64, int64)                 \
+    CPU_DEVICE_UNARY(name, tfunc, uint8, bfloat16, bfloat16)          \
+    CPU_DEVICE_NOIMPL(name, hfunc, uint8, float16, float16)           \
+    CPU_DEVICE_UNARY(name, func, uint8, float32, float32)             \
+    CPU_DEVICE_UNARY(name, func, uint8, float64, float64)             \
+    CPU_DEVICE_NOIMPL(name, func, uint8, complex32, complex32)        \
+    CPU_DEVICE_UNARYC(name, func, uint8, complex64, complex64)        \
+    CPU_DEVICE_UNARYC(name, func, uint8, complex128, complex128)      \
+                                                                      \
+    CPU_DEVICE_UNARY(name, ufunc, uint16, uint16, uint16)             \
+    CPU_DEVICE_UNARY(name, ufunc, uint16, uint32, uint32)             \
+    CPU_DEVICE_UNARY(name, ufunc, uint16, uint64, uint64)             \
+    CPU_DEVICE_UNARY(name, func, uint16, int32, int32)                \
+    CPU_DEVICE_UNARY(name, func, uint16, int64, int64)                \
+    CPU_DEVICE_UNARY(name, func, uint16, float32, float32)            \
+    CPU_DEVICE_UNARY(name, func, uint16, float64, float64)            \
+    CPU_DEVICE_UNARYC(name, func, uint16, complex64, complex64)       \
+    CPU_DEVICE_UNARYC(name, func, uint16, complex128, complex128)     \
+                                                                      \
+    CPU_DEVICE_UNARY(name, ufunc, uint32, uint32, uint32)             \
+    CPU_DEVICE_UNARY(name, ufunc, uint32, uint64, uint64)             \
+    CPU_DEVICE_UNARY(name, func, uint32, int64, int64)                \
+    CPU_DEVICE_UNARY(name, func, uint32, float64, float64)            \
+    CPU_DEVICE_UNARYC(name, func, uint32, complex128, complex128)     \
+                                                                      \
+    CPU_DEVICE_UNARY(name, ufunc, uint64, uint64, uint64)             \
+                                                                      \
+    CPU_DEVICE_UNARY(name, func, int8, int8, int8)                    \
+    CPU_DEVICE_UNARY(name, func, int8, int16, int16)                  \
+    CPU_DEVICE_UNARY(name, func, int8, int32, int32)                  \
+    CPU_DEVICE_UNARY(name, func, int8, int64, int64)                  \
+    CPU_DEVICE_UNARY(name, tfunc, int8, bfloat16, bfloat16)           \
+    CPU_DEVICE_NOIMPL(name, hfunc, int8, float16, float16)            \
+    CPU_DEVICE_UNARY(name, func, int8, float32, float32)              \
+    CPU_DEVICE_UNARY(name, func, int8, float64, float64)              \
+    CPU_DEVICE_NOIMPL(name, func, int8, complex32, complex32)         \
+    CPU_DEVICE_UNARYC(name, func, int8, complex64, complex64)         \
+    CPU_DEVICE_UNARYC(name, func, int8, complex128, complex128)       \
+                                                                      \
+    CPU_DEVICE_UNARY(name, func, int16, int16, int16)                 \
+    CPU_DEVICE_UNARY(name, func, int16, int32, int32)                 \
+    CPU_DEVICE_UNARY(name, func, int16, int64, int64)                 \
+    CPU_DEVICE_UNARY(name, func, int16, float32, float32)             \
+    CPU_DEVICE_UNARY(name, func, int16, float64, float64)             \
+    CPU_DEVICE_UNARYC(name, func, int16, complex64, complex64)        \
+    CPU_DEVICE_UNARYC(name, func, int16, complex128, complex128)      \
+                                                                      \
+    CPU_DEVICE_UNARY(name, func, int32, int32, int32)                 \
+    CPU_DEVICE_UNARY(name, func, int32, int64, int64)                 \
+    CPU_DEVICE_UNARY(name, func, int32, float64, float64)             \
+    CPU_DEVICE_UNARYC(name, func, int32, complex128, complex128)      \
+                                                                      \
+    CPU_DEVICE_UNARY(name, func, int64, int64, int64)                 \
+                                                                      \
+    CPU_DEVICE_UNARY(name, tfunc, bfloat16, bfloat16, bfloat16)       \
+    CPU_DEVICE_UNARY(name, func, bfloat16, float32, float32)          \
+    CPU_DEVICE_UNARY(name, func, bfloat16, float64, float64)          \
+    CPU_DEVICE_UNARYC(name, func, bfloat16, complex64, complex64)     \
+    CPU_DEVICE_UNARYC(name, func, bfloat16, complex128, complex128)   \
+                                                                      \
+    CPU_DEVICE_NOIMPL(name, hfunc, float16, float16, float16)         \
+    CPU_DEVICE_NOIMPL(name, func, float16, float32, float32)          \
+    CPU_DEVICE_NOIMPL(name, func, float16, float64, float64)          \
+    CPU_DEVICE_NOIMPL(name, func, float16, complex32, complex32)      \
+    CPU_DEVICE_NOIMPL(name, func, float16, complex64, complex64)      \
+    CPU_DEVICE_NOIMPL(name, func, float16, complex128, complex128)    \
+                                                                      \
+    CPU_DEVICE_UNARY(name, func, float32, float32, float32)           \
+    CPU_DEVICE_UNARY(name, func, float32, float64, float64)           \
+    CPU_DEVICE_UNARYC(name, func, float32, complex64, complex64)      \
+    CPU_DEVICE_UNARYC(name, func, float32, complex128, complex128)    \
+                                                                      \
+    CPU_DEVICE_UNARY(name, func, float64, float64, float64)           \
+    CPU_DEVICE_UNARYC(name, func, float64, complex128, complex128)    \
+                                                                      \
+    CPU_DEVICE_NOIMPL(name, func, complex32, complex32, complex32)    \
+    CPU_DEVICE_NOIMPL(name, func, complex32, complex64, complex64)    \
+    CPU_DEVICE_NOIMPL(name, func, complex32, complex128, complex128)  \
+                                                                      \
+    CPU_DEVICE_UNARYC(name, func, complex64, complex64, complex64)    \
+    CPU_DEVICE_UNARYC(name, func, complex64, complex128, complex128)  \
+                                                                      \
+    CPU_DEVICE_UNARYC(name, func, complex128, complex128, complex128)
+/*****************************************************************************/
+/*                                   Copy                                    */
+/*****************************************************************************/
+#define copy(x) x
+CPU_DEVICE_ALL_UNARY(copy, copy, copy, copy, copy)
+/*****************************************************************************/
+/*                                    Abs                                    */
+/*****************************************************************************/
+CPU_DEVICE_ALL_UNARY(abs, std::abs, copy, tf::fabs, std::abs)
+/*****************************************************************************/
+/*                               Bitwise NOT                                 */
+/*****************************************************************************/
+#define invert(x) !x
+CPU_DEVICE_UNARY(invert, invert, bool, bool, bool)
+#undef invert
+#define invert(x) ~x
+CPU_DEVICE_UNARY(invert, invert, uint8, uint8, uint8)
+CPU_DEVICE_UNARY(invert, invert, uint16, uint16, uint16)
+CPU_DEVICE_UNARY(invert, invert, uint32, uint32, uint32)
+CPU_DEVICE_UNARY(invert, invert, uint64, uint64, uint64)
+CPU_DEVICE_UNARY(invert, invert, int8, int8, int8)
+CPU_DEVICE_UNARY(invert, invert, int16, int16, int16)
+CPU_DEVICE_UNARY(invert, invert, int32, int32, int32)
+CPU_DEVICE_UNARY(invert, invert, int64, int64, int64)
+/*****************************************************************************/
+/*                                 Negative                                  */
+/*****************************************************************************/
+#define negative(x) -x
+CPU_DEVICE_UNARY(negative, negative, uint8, int16, int16)
+CPU_DEVICE_UNARY(negative, negative, uint16, int32, int32)
+CPU_DEVICE_UNARY(negative, negative, uint32, int64, int64)
+CPU_DEVICE_UNARY(negative, negative, int8, int8, int8)
+CPU_DEVICE_UNARY(negative, negative, int16, int16, int16)
+CPU_DEVICE_UNARY(negative, negative, int32, int32, int32)
+CPU_DEVICE_UNARY(negative, negative, int64, int64, int64)
+CPU_DEVICE_UNARY(negative, negative, bfloat16, bfloat16, bfloat16)
+CPU_DEVICE_NOIMPL(negative, negative, float16, float16, float16)
+CPU_DEVICE_UNARY(negative, negative, float32, float32, float32)
+CPU_DEVICE_UNARY(negative, negative, float64, float64, float64)
+CPU_DEVICE_NOIMPL(negative, negative, complex32, complex32, complex32)
+CPU_DEVICE_UNARYC(negative, negative, complex64, complex64, complex64)
+CPU_DEVICE_UNARYC(negative, negative, complex128, complex128, complex128)
+/*****************************************************************************/
+/*                                   Math                                    */
+/*****************************************************************************/
+#define CPU_DEVICE_UNARY_ALL_REAL_MATH(name) \
+    CPU_DEVICE_UNARY(name##f, name##f, uint16, float32, float32)        \
+    CPU_DEVICE_UNARY(name##f, name##f, int16, float32, float32)         \
+    CPU_DEVICE_UNARY(name##b16, tf::name, bfloat16, bfloat16, bfloat16) \
+    CPU_DEVICE_UNARY(name##f, name##f, float32, float32, float32)       \
+    CPU_DEVICE_UNARY(name, name, uint32, float64, float64)              \
+    CPU_DEVICE_UNARY(name, name, int32, float64, float64)               \
+    CPU_DEVICE_UNARY(name, name, float64, float64, float64)
+#define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(name) \
+    CPU_DEVICE_UNARY_ALL_REAL_MATH(name)                              \
+    CPU_DEVICE_NOIMPL(name, name, complex32, complex32, complex32)    \
+    CPU_DEVICE_UNARYC(name, name, complex64, complex64, complex64)    \
+    CPU_DEVICE_UNARYC(name, name, complex128, complex128, complex128) \
+#define CPU_DEVICE_UNARY_ALL_HALF_MATH(name, hfunc) \
+    CPU_DEVICE_UNARY(name##f16, hfunc, uint8, float16, float16)   \
+    CPU_DEVICE_UNARY(name##f16, hfunc, int8, float16, float16)    \
+    CPU_DEVICE_UNARY(name##f16, hfunc, float16, float16, float16)
+#define CPU_DEVICE_UNARY_ALL_REAL_MATH_WITH_HALF(name, hfunc) \
+    CPU_DEVICE_UNARY_ALL_HALF_MATH(name, hfunc)               \
+    CPU_DEVICE_UNARY_ALL_REAL_MATH(name)
+#define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_WITH_HALF(name, hfunc) \
+    CPU_DEVICE_UNARY_ALL_HALF_MATH(name, hfunc)                  \
+    CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(name)
+/*****************************************************************************/
+/*                                Abs functions                              */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_REAL_MATH(fabs)
+/*****************************************************************************/
+/*                             Exponential functions                         */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(exp)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(exp2)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(expm1)
+/*****************************************************************************/
+/*                              Logarithm functions                          */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(log)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(log10)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(log2)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(log1p)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(logb)
+/*****************************************************************************/
+/*                              Power functions                              */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sqrt)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(cbrt)
+/*****************************************************************************/
+/*                           Trigonometric functions                         */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sin)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(cos)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(tan)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(asin)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(acos)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(atan)
+/*****************************************************************************/
+/*                             Hyperbolic functions                          */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sinh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(cosh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(tanh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(asinh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(acosh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(atanh)
+/*****************************************************************************/
+/*                            Error and gamma functions                      */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_REAL_MATH(erf)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(erfc)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(lgamma)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(tgamma)
+/*****************************************************************************/
+/*                              Ceiling, floor, trunc                        */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_REAL_MATH(ceil)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(floor)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(trunc)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(round)
+CPU_DEVICE_UNARY_ALL_REAL_MATH(nearbyint)

data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h ADDED

@@ -0,0 +1,335 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017-2018, plures
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* 1. Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*
+* 2. Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*
+* 3. Neither the name of the copyright holder nor the names of its
+*    contributors may be used to endorse or promote products derived from
+*    this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef CPU_DEVICE_UNARY_H
+#define CPU_DEVICE_UNARY_H
+#ifdef __cplusplus
+#include <cinttypes>
+#include <complex>
+#include "contrib/bfloat16.h"
+typedef tf::bfloat16 bfloat16_t;
+typedef std::complex<float> complex64_t;
+typedef std::complex<double> complex128_t;
+#else
+#include <stdint.h>
+#endif
+typedef bool bool_t;
+typedef float float32_t;
+typedef double float64_t;
+/*****************************************************************************/
+/*                        Cuda device kernel signature                       */
+/*****************************************************************************/
+#ifdef __cplusplus
+  #define CPU_DEVICE_UNARY_DECL(name, t0, t1) \
+  extern "C" void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1,           \
+                                                                const int64_t N);                   \
+  extern "C" void gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1,           \
+                                                                const int64_t s0, const int64_t s1, \
+                                                                const int64_t N);                   \
+  extern "C" void gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1);
+#else
+  #define CPU_DEVICE_UNARY_DECL(name, t0, t1) \
+  void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1,           \
+                                                     const int64_t N);                   \
+  void gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1,           \
+                                                     const int64_t s0, const int64_t s1, \
+                                                     const int64_t N);                   \
+  void gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1);
+#endif
+#define CPU_DEVICE_UNARY_NOIMPL_DECL(name, t0, t1)
+/*****************************************************************************/
+/*                                   Copy                                    */
+/*****************************************************************************/
+#define CPU_DEVICE_ALL_UNARY_ALL_DECL(name) \
+    CPU_DEVICE_UNARY_DECL(name, bool, bool)                   \
+    CPU_DEVICE_UNARY_DECL(name, bool, uint8)                  \
+    CPU_DEVICE_UNARY_DECL(name, bool, uint16)                 \
+    CPU_DEVICE_UNARY_DECL(name, bool, uint32)                 \
+    CPU_DEVICE_UNARY_DECL(name, bool, uint64)                 \
+    CPU_DEVICE_UNARY_DECL(name, bool, int8)                   \
+    CPU_DEVICE_UNARY_DECL(name, bool, int16)                  \
+    CPU_DEVICE_UNARY_DECL(name, bool, int32)                  \
+    CPU_DEVICE_UNARY_DECL(name, bool, int64)                  \
+    CPU_DEVICE_UNARY_DECL(name, bool, bfloat16)               \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, bool, float16)         \
+    CPU_DEVICE_UNARY_DECL(name, bool, float32)                \
+    CPU_DEVICE_UNARY_DECL(name, bool, float64)                \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, bool, complex32)       \
+    CPU_DEVICE_UNARY_DECL(name, bool, complex64)              \
+    CPU_DEVICE_UNARY_DECL(name, bool, complex128)             \
+    CPU_DEVICE_UNARY_DECL(name, uint8, uint8)                 \
+    CPU_DEVICE_UNARY_DECL(name, uint8, uint16)                \
+    CPU_DEVICE_UNARY_DECL(name, uint8, uint32)                \
+    CPU_DEVICE_UNARY_DECL(name, uint8, uint64)                \
+    CPU_DEVICE_UNARY_DECL(name, uint8, int16)                 \
+    CPU_DEVICE_UNARY_DECL(name, uint8, int32)                 \
+    CPU_DEVICE_UNARY_DECL(name, uint8, int64)                 \
+    CPU_DEVICE_UNARY_DECL(name, uint8, bfloat16)              \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, uint8, float16)        \
+    CPU_DEVICE_UNARY_DECL(name, uint8, float32)               \
+    CPU_DEVICE_UNARY_DECL(name, uint8, float64)               \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, uint8, complex32)      \
+    CPU_DEVICE_UNARY_DECL(name, uint8, complex64)             \
+    CPU_DEVICE_UNARY_DECL(name, uint8, complex128)            \
+    CPU_DEVICE_UNARY_DECL(name, uint16, uint16)               \
+    CPU_DEVICE_UNARY_DECL(name, uint16, uint32)               \
+    CPU_DEVICE_UNARY_DECL(name, uint16, uint64)               \
+    CPU_DEVICE_UNARY_DECL(name, uint16, int32)                \
+    CPU_DEVICE_UNARY_DECL(name, uint16, int64)                \
+    CPU_DEVICE_UNARY_DECL(name, uint16, float32)              \
+    CPU_DEVICE_UNARY_DECL(name, uint16, float64)              \
+    CPU_DEVICE_UNARY_DECL(name, uint16, complex64)            \
+    CPU_DEVICE_UNARY_DECL(name, uint16, complex128)           \
+    CPU_DEVICE_UNARY_DECL(name, uint32, uint32)               \
+    CPU_DEVICE_UNARY_DECL(name, uint32, uint64)               \
+    CPU_DEVICE_UNARY_DECL(name, uint32, int64)                \
+    CPU_DEVICE_UNARY_DECL(name, uint32, float64)              \
+    CPU_DEVICE_UNARY_DECL(name, uint32, complex128)           \
+    CPU_DEVICE_UNARY_DECL(name, uint64, uint64)               \
+    CPU_DEVICE_UNARY_DECL(name, int8, int8)                   \
+    CPU_DEVICE_UNARY_DECL(name, int8, int16)                  \
+    CPU_DEVICE_UNARY_DECL(name, int8, int32)                  \
+    CPU_DEVICE_UNARY_DECL(name, int8, int64)                  \
+    CPU_DEVICE_UNARY_DECL(name, int8, bfloat16)               \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, int8, float16)         \
+    CPU_DEVICE_UNARY_DECL(name, int8, float32)                \
+    CPU_DEVICE_UNARY_DECL(name, int8, float64)                \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, int8, complex32)       \
+    CPU_DEVICE_UNARY_DECL(name, int8, complex64)              \
+    CPU_DEVICE_UNARY_DECL(name, int8, complex128)             \
+    CPU_DEVICE_UNARY_DECL(name, int16, int16)                 \
+    CPU_DEVICE_UNARY_DECL(name, int16, int32)                 \
+    CPU_DEVICE_UNARY_DECL(name, int16, int64)                 \
+    CPU_DEVICE_UNARY_DECL(name, int16, float32)               \
+    CPU_DEVICE_UNARY_DECL(name, int16, float64)               \
+    CPU_DEVICE_UNARY_DECL(name, int16, complex64)             \
+    CPU_DEVICE_UNARY_DECL(name, int16, complex128)            \
+    CPU_DEVICE_UNARY_DECL(name, int32, int32)                 \
+    CPU_DEVICE_UNARY_DECL(name, int32, int64)                 \
+    CPU_DEVICE_UNARY_DECL(name, int32, float64)               \
+    CPU_DEVICE_UNARY_DECL(name, int32, complex128)            \
+    CPU_DEVICE_UNARY_DECL(name, int64, int64)                 \
+    CPU_DEVICE_UNARY_DECL(name, bfloat16, bfloat16)           \
+    CPU_DEVICE_UNARY_DECL(name, bfloat16, float32)            \
+    CPU_DEVICE_UNARY_DECL(name, bfloat16, float64)            \
+    CPU_DEVICE_UNARY_DECL(name, bfloat16, complex64)          \
+    CPU_DEVICE_UNARY_DECL(name, bfloat16, complex128)         \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, float16)      \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, float32)      \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, float64)      \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, complex32)    \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, complex64)    \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, complex128)   \
+    CPU_DEVICE_UNARY_DECL(name, float32, float32)             \
+    CPU_DEVICE_UNARY_DECL(name, float32, float64)             \
+    CPU_DEVICE_UNARY_DECL(name, float32, complex64)           \
+    CPU_DEVICE_UNARY_DECL(name, float32, complex128)          \
+    CPU_DEVICE_UNARY_DECL(name, float64, float64)             \
+    CPU_DEVICE_UNARY_DECL(name, float64, complex128)          \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, complex32, complex32)  \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, complex32, complex64)  \
+    CPU_DEVICE_UNARY_NOIMPL_DECL(name, complex32, complex128) \
+    CPU_DEVICE_UNARY_DECL(name, complex64, complex64)         \
+    CPU_DEVICE_UNARY_DECL(name, complex64, complex128)        \
+    CPU_DEVICE_UNARY_DECL(name, complex128, complex128)
+CPU_DEVICE_ALL_UNARY_ALL_DECL(copy)
+CPU_DEVICE_ALL_UNARY_ALL_DECL(abs)
+/*****************************************************************************/
+/*                               Bitwise NOT                                 */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_DECL(invert, bool, bool)
+CPU_DEVICE_UNARY_DECL(invert, uint8, uint8)
+CPU_DEVICE_UNARY_DECL(invert, uint16, uint16)
+CPU_DEVICE_UNARY_DECL(invert, uint32, uint32)
+CPU_DEVICE_UNARY_DECL(invert, uint64, uint64)
+CPU_DEVICE_UNARY_DECL(invert, int8, int8)
+CPU_DEVICE_UNARY_DECL(invert, int16, int16)
+CPU_DEVICE_UNARY_DECL(invert, int32, int32)
+CPU_DEVICE_UNARY_DECL(invert, int64, int64)
+/*****************************************************************************/
+/*                                 Negative                                  */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_DECL(negative, uint8, int16)
+CPU_DEVICE_UNARY_DECL(negative, uint16, int32)
+CPU_DEVICE_UNARY_DECL(negative, uint32, int64)
+CPU_DEVICE_UNARY_DECL(negative, int8, int8)
+CPU_DEVICE_UNARY_DECL(negative, int16, int16)
+CPU_DEVICE_UNARY_DECL(negative, int32, int32)
+CPU_DEVICE_UNARY_DECL(negative, int64, int64)
+CPU_DEVICE_UNARY_DECL(negative, bfloat16, bfloat16)
+CPU_DEVICE_UNARY_NOIMPL_DECL(negative, float16, float16)
+CPU_DEVICE_UNARY_DECL(negative, float32, float32)
+CPU_DEVICE_UNARY_DECL(negative, float64, float64)
+CPU_DEVICE_UNARY_NOIMPL_DECL(negative, complex32, complex32)
+CPU_DEVICE_UNARY_DECL(negative, complex64, complex64)
+CPU_DEVICE_UNARY_DECL(negative, complex128, complex128)
+/*****************************************************************************/
+/*                                    Math                                   */
+/*****************************************************************************/
+#define CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(name) \
+    CPU_DEVICE_UNARY_DECL(name##f, uint16, float32)      \
+    CPU_DEVICE_UNARY_DECL(name##f, int16, float32)       \
+    CPU_DEVICE_UNARY_DECL(name##b16, bfloat16, bfloat16) \
+    CPU_DEVICE_UNARY_DECL(name##f, float32, float32)     \
+    CPU_DEVICE_UNARY_DECL(name, uint32, float64)         \
+    CPU_DEVICE_UNARY_DECL(name, int32, float64)          \
+    CPU_DEVICE_UNARY_DECL(name, float64, float64)
+#define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(name) \
+    CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(name)           \
+    CPU_DEVICE_UNARY_DECL(name, complex32, complex32)   \
+    CPU_DEVICE_UNARY_DECL(name, complex64, complex64)   \
+    CPU_DEVICE_UNARY_DECL(name, complex128, complex128)
+#define CPU_DEVICE_UNARY_ALL_HALF_MATH_DECL(name) \
+    CPU_DEVICE_UNARY_DECL(name##f16, uint8, float16)   \
+    CPU_DEVICE_UNARY_DECL(name##f16, int8, float16)    \
+    CPU_DEVICE_UNARY_DECL(name##f16, float16, float16)
+#define CPU_DEVICE_UNARY_ALL_REAL_MATH_WITH_HALF_DECL(name) \
+    CPU_DEVICE_UNARY_ALL_HALF_MATH_DECL(name)               \
+    CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(name)               \
+#define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_WITH_HALF_DECL(name) \
+    CPU_DEVICE_UNARY_ALL_HALF_MATH_DECL(name)                  \
+    CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(name)               \
+/*****************************************************************************/
+/*                                Abs functions                              */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(fabs)
+/*****************************************************************************/
+/*                             Exponential functions                         */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(exp)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(exp2)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(expm1)
+/*****************************************************************************/
+/*                              Logarithm functions                          */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(log)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(log10)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(log2)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(log1p)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(logb)
+/*****************************************************************************/
+/*                              Power functions                              */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(sqrt)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(cbrt)
+/*****************************************************************************/
+/*                           Trigonometric functions                         */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(sin)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(cos)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(tan)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(asin)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(acos)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(atan)
+/*****************************************************************************/
+/*                             Hyperbolic functions                          */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(sinh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(cosh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(tanh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(asinh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(acosh)
+CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(atanh)
+/*****************************************************************************/
+/*                            Error and gamma functions                      */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(erf)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(erfc)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(lgamma)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(tgamma)
+/*****************************************************************************/
+/*                              Ceiling, floor, trunc                        */
+/*****************************************************************************/
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(ceil)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(floor)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(trunc)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(round)
+CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(nearbyint)
+#endif /* CPU_DEVICE_UNARY_H */