gumath 0.2.0dev5 → 0.2.0dev8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +7 -2
  3. data/Gemfile +0 -3
  4. data/ext/ruby_gumath/GPATH +0 -0
  5. data/ext/ruby_gumath/GRTAGS +0 -0
  6. data/ext/ruby_gumath/GTAGS +0 -0
  7. data/ext/ruby_gumath/extconf.rb +0 -5
  8. data/ext/ruby_gumath/functions.c +10 -2
  9. data/ext/ruby_gumath/gufunc_object.c +15 -4
  10. data/ext/ruby_gumath/gufunc_object.h +9 -3
  11. data/ext/ruby_gumath/gumath/Makefile +63 -0
  12. data/ext/ruby_gumath/gumath/Makefile.in +1 -0
  13. data/ext/ruby_gumath/gumath/config.h +56 -0
  14. data/ext/ruby_gumath/gumath/config.h.in +3 -0
  15. data/ext/ruby_gumath/gumath/config.log +497 -0
  16. data/ext/ruby_gumath/gumath/config.status +1034 -0
  17. data/ext/ruby_gumath/gumath/configure +375 -4
  18. data/ext/ruby_gumath/gumath/configure.ac +47 -3
  19. data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
  20. data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
  21. data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
  22. data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
  23. data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
  24. data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
  25. data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
  26. data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
  27. data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
  28. data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
  29. data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
  30. data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
  31. data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
  32. data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
  33. data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
  34. data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
  35. data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
  36. data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
  37. data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
  38. data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
  39. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
  40. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
  41. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
  42. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
  43. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
  44. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
  45. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
  46. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
  47. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
  48. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
  49. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
  50. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
  51. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
  52. data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
  53. data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
  54. data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
  55. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
  56. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
  57. data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
  58. data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
  59. data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
  60. data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
  61. data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
  62. data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
  63. data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
  64. data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
  65. data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
  66. data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
  67. data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
  68. data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
  69. data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
  70. data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
  71. data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
  72. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
  73. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
  74. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
  75. data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
  76. data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
  77. data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
  78. data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
  79. data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
  80. data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
  81. data/ext/ruby_gumath/gumath/setup.py +67 -6
  82. data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
  83. data/ext/ruby_gumath/include/gumath.h +55 -14
  84. data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
  85. data/ext/ruby_gumath/lib/libgumath.a +0 -0
  86. data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
  87. data/ext/ruby_gumath/ruby_gumath.c +231 -70
  88. data/ext/ruby_gumath/ruby_gumath.h +4 -1
  89. data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
  90. data/ext/ruby_gumath/util.c +34 -0
  91. data/ext/ruby_gumath/util.h +9 -0
  92. data/gumath.gemspec +3 -2
  93. data/lib/gumath.rb +55 -1
  94. data/lib/gumath/version.rb +2 -2
  95. data/lib/ruby_gumath.so +0 -0
  96. metadata +63 -10
  97. data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
  98. data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
  99. data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449
@@ -0,0 +1,555 @@
1
+ /*
2
+ * BSD 3-Clause License
3
+ *
4
+ * Copyright (c) 2017-2018, plures
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ *
10
+ * 1. Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ *
13
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ * this list of conditions and the following disclaimer in the documentation
15
+ * and/or other materials provided with the distribution.
16
+ *
17
+ * 3. Neither the name of the copyright holder nor the names of its
18
+ * contributors may be used to endorse or promote products derived from
19
+ * this software without specific prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+
34
+ #include <cinttypes>
35
+ #include <cmath>
36
+ #include <complex>
37
+ #include "cpu_device_unary.h"
38
+ #include "device.hh"
39
+
40
+
41
+ /*
42
+ * This file contains complex functions that resist compilation with
43
+ * /fp:strict on Visual Studio compilers >= 2015 update 3.
44
+ */
45
+
46
+
47
+ /*****************************************************************************/
48
+ /* CPU device unary kernels */
49
+ /*****************************************************************************/
50
+
51
+ #define CPU_DEVICE_UNARY(name, func, t0, t1, common) \
52
+ extern "C" void \
53
+ gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1, \
54
+ int64_t N) \
55
+ { \
56
+ const t0##_t *x0 = (const t0##_t *)a0; \
57
+ t1##_t *x1 = (t1##_t *)a1; \
58
+ \
59
+ for (int64_t i = 0; i < N; i++) { \
60
+ x1[i] = func((common##_t)x0[i]); \
61
+ } \
62
+ } \
63
+ \
64
+ extern "C" void \
65
+ gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1, \
66
+ const int64_t s0, const int64_t s1, \
67
+ const int64_t N) \
68
+ { \
69
+ const t0##_t *x0 = (const t0##_t *)a0; \
70
+ t1##_t *x1 = (t1##_t *)a1; \
71
+ int64_t i, k0, k1; \
72
+ \
73
+ for (i=0, k0=0, k1=0; i < N; i++, k0+=s0, k1+=s1) { \
74
+ x1[k1] = func((common##_t)x0[k0]); \
75
+ } \
76
+ } \
77
+ \
78
+ extern "C" void \
79
+ gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1) \
80
+ { \
81
+ const t0##_t x0 = *((const t0##_t *)a0); \
82
+ t1##_t *x1 = (t1##_t *)a1; \
83
+ *x1 = func((common##_t)x0); \
84
+ }
85
+
86
+ #define CPU_DEVICE_UNARYC(name, func, t0, t1, common) \
87
+ CPU_DEVICE_UNARY(name, func, t0, t1, common)
88
+
89
+ #define CPU_DEVICE_NOIMPL(name, func, t0, t1, common)
90
+
91
+
92
+
93
+ /*****************************************************************************/
94
+ /* Copy */
95
+ /*****************************************************************************/
96
+
97
+ #define CPU_DEVICE_ALL_UNARY(name, func, hfunc) \
98
+ CPU_DEVICE_NOIMPL(name, func, bool, complex32, complex32) \
99
+ CPU_DEVICE_UNARYC(name, func, bool, complex64, complex64) \
100
+ CPU_DEVICE_UNARYC(name, func, bool, complex128, complex128) \
101
+ \
102
+ CPU_DEVICE_NOIMPL(name, func, uint8, complex32, complex32) \
103
+ CPU_DEVICE_UNARYC(name, func, uint8, complex64, complex64) \
104
+ CPU_DEVICE_UNARYC(name, func, uint8, complex128, complex128) \
105
+ \
106
+ CPU_DEVICE_UNARYC(name, func, uint16, complex64, complex64) \
107
+ CPU_DEVICE_UNARYC(name, func, uint16, complex128, complex128) \
108
+ \
109
+ CPU_DEVICE_UNARYC(name, func, uint32, complex128, complex128) \
110
+ \
111
+ CPU_DEVICE_NOIMPL(name, func, int8, complex32, complex32) \
112
+ CPU_DEVICE_UNARYC(name, func, int8, complex64, complex64) \
113
+ CPU_DEVICE_UNARYC(name, func, int8, complex128, complex128) \
114
+ \
115
+ CPU_DEVICE_UNARYC(name, func, int16, complex64, complex64) \
116
+ CPU_DEVICE_UNARYC(name, func, int16, complex128, complex128) \
117
+ \
118
+ CPU_DEVICE_UNARYC(name, func, int32, complex128, complex128) \
119
+ \
120
+ CPU_DEVICE_UNARYC(name, func, bfloat16, complex64, complex64) \
121
+ CPU_DEVICE_UNARYC(name, func, bfloat16, complex128, complex128) \
122
+ \
123
+ CPU_DEVICE_NOIMPL(name, func, float16, complex32, complex32) \
124
+ CPU_DEVICE_NOIMPL(name, func, float16, complex64, complex64) \
125
+ CPU_DEVICE_NOIMPL(name, func, float16, complex128, complex128) \
126
+ \
127
+ CPU_DEVICE_UNARYC(name, func, float32, complex64, complex64) \
128
+ CPU_DEVICE_UNARYC(name, func, float32, complex128, complex128) \
129
+ \
130
+ CPU_DEVICE_UNARYC(name, func, float64, complex128, complex128) \
131
+ \
132
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex32, complex32) \
133
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex64, complex64) \
134
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex128, complex128) \
135
+ \
136
+ CPU_DEVICE_UNARYC(name, func, complex64, complex64, complex64) \
137
+ CPU_DEVICE_UNARYC(name, func, complex64, complex128, complex128) \
138
+ \
139
+ CPU_DEVICE_UNARYC(name, func, complex128, complex128, complex128)
140
+
141
+
142
+ #define copy(x) x
143
+ CPU_DEVICE_ALL_UNARY(copy, copy, copy)
144
+ CPU_DEVICE_ALL_UNARY(abs, std::abs, std::abs)
145
+
146
+
147
+ /*****************************************************************************/
148
+ /* Negative */
149
+ /*****************************************************************************/
150
+
151
+ #define negative(x) -x
152
+
153
+ CPU_DEVICE_NOIMPL(negative, negative, complex32, complex32, complex32)
154
+ CPU_DEVICE_UNARYC(negative, negative, complex64, complex64, complex64)
155
+ CPU_DEVICE_UNARYC(negative, negative, complex128, complex128, complex128)
156
+
157
+
158
+ /*****************************************************************************/
159
+ /* Math */
160
+ /*****************************************************************************/
161
+
162
+ #define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(name) \
163
+ CPU_DEVICE_NOIMPL(name, name, complex32, complex32, complex32) \
164
+ CPU_DEVICE_UNARYC(name, name, complex64, complex64, complex64) \
165
+ CPU_DEVICE_UNARYC(name, name, complex128, complex128, complex128) \
166
+
167
+ #define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_WITH_HALF(name, hfunc) \
168
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(name)
169
+
170
+
171
+ /*****************************************************************************/
172
+ /* Exponential functions */
173
+ /*****************************************************************************/
174
+
175
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(exp)
176
+
177
+
178
+ /*****************************************************************************/
179
+ /* Logarithm functions */
180
+ /*****************************************************************************/
181
+
182
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(log)
183
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(log10)
184
+
185
+
186
+ /*****************************************************************************/
187
+ /* Power functions */
188
+ /*****************************************************************************/
189
+
190
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sqrt)
191
+
192
+
193
+ /*****************************************************************************/
194
+ /* Trigonometric functions */
195
+ /*****************************************************************************/
196
+
197
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sin)
198
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(cos)
199
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(tan)
200
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(asin)
201
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(acos)
202
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(atan)
203
+
204
+
205
+ /*****************************************************************************/
206
+ /* Hyperbolic functions */
207
+ /*****************************************************************************/
208
+
209
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sinh)
210
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(cosh)
211
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(tanh)
212
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(asinh)
213
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(acosh)
214
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(atanh)
215
+
216
+
217
+ /*****************************************************************************/
218
+ /* CPU device binary kernels */
219
+ /*****************************************************************************/
220
+
221
+ #undef CPU_DEVICE_NOIMPL
222
+ #include "cpu_device_binary.h"
223
+
224
+
225
+ #define CPU_DEVICE_BINARY(name, func, t0, t1, t2, common) \
226
+ extern "C" void \
227
+ gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2( \
228
+ const char *a0, const char *a1, char *a2, \
229
+ const int64_t N) \
230
+ { \
231
+ const t0##_t *x0 = (const t0##_t *)a0; \
232
+ const t1##_t *x1 = (const t1##_t *)a1; \
233
+ t2##_t *x2 = (t2##_t *)a2; \
234
+ int64_t i; \
235
+ \
236
+ for (i = 0; i < N-7; i += 8) { \
237
+ x2[i] = func((common##_t)x0[i], (common##_t)x1[i]); \
238
+ x2[i+1] = func((common##_t)x0[i+1], (common##_t)x1[i+1]); \
239
+ x2[i+2] = func((common##_t)x0[i+2], (common##_t)x1[i+2]); \
240
+ x2[i+3] = func((common##_t)x0[i+3], (common##_t)x1[i+3]); \
241
+ x2[i+4] = func((common##_t)x0[i+4], (common##_t)x1[i+4]); \
242
+ x2[i+5] = func((common##_t)x0[i+5], (common##_t)x1[i+5]); \
243
+ x2[i+6] = func((common##_t)x0[i+6], (common##_t)x1[i+6]); \
244
+ x2[i+7] = func((common##_t)x0[i+7], (common##_t)x1[i+7]); \
245
+ } \
246
+ for (; i < N; i++) { \
247
+ x2[i] = func((common##_t)x0[i], (common##_t)x1[i]); \
248
+ } \
249
+ } \
250
+ \
251
+ extern "C" void \
252
+ gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1##_##t2( \
253
+ const char *a0, const char *a1, char *a2, \
254
+ const int64_t s0, const int64_t s1, const int64_t s2, \
255
+ const int64_t N) \
256
+ { \
257
+ const t0##_t *x0 = (const t0##_t *)a0; \
258
+ const t1##_t *x1 = (const t1##_t *)a1; \
259
+ t2##_t *x2 = (t2##_t *)a2; \
260
+ int64_t i, k0, k1, k2; \
261
+ \
262
+ for (i=0, k0=0, k1=0, k2=0; i < N; i++, k0+=s0, k1+=s1, k2+=s2) { \
263
+ x2[k2] = func((common##_t)x0[k0], (common##_t)x1[k1]); \
264
+ } \
265
+ } \
266
+ \
267
+ extern "C" void \
268
+ gm_cpu_device_0D_##name##_##t0##_##t1##_##t2( \
269
+ const char *a0, const char *a1, char *a2) \
270
+ { \
271
+ const t0##_t x0 = *(const t0##_t *)a0; \
272
+ const t1##_t x1 = *(const t1##_t *)a1; \
273
+ t2##_t *x2 = (t2##_t *)a2; \
274
+ *x2 = func((common##_t)x0, (common##_t)x1); \
275
+ }
276
+
277
+ #define CPU_DEVICE_BINARYC(name, func, t0, t1, t2, common) \
278
+ CPU_DEVICE_BINARY(name, func, t0, t1, t2, common)
279
+
280
+ #define CPU_DEVICE_NOIMPL(name, func, t0, t1, t2, common)
281
+ #define CPU_DEVICE_NOKERN(name, func, t0, t1, t2, common)
282
+
283
+
284
+ /*****************************************************************************/
285
+ /* Arithmetic */
286
+ /*****************************************************************************/
287
+
288
+ #define CPU_DEVICE_ALL_BINARY(name, func, hfunc) \
289
+ CPU_DEVICE_NOIMPL(name, func, uint8, complex32, complex32, complex32) \
290
+ CPU_DEVICE_BINARYC(name, func, uint8, complex64, complex64, complex64) \
291
+ CPU_DEVICE_BINARYC(name, func, uint8, complex128, complex128, complex128) \
292
+ \
293
+ CPU_DEVICE_NOIMPL(name, func, uint16, complex32, complex64, complex64) \
294
+ CPU_DEVICE_BINARYC(name, func, uint16, complex64, complex64, complex64) \
295
+ CPU_DEVICE_BINARYC(name, func, uint16, complex128, complex128, complex128) \
296
+ \
297
+ CPU_DEVICE_NOIMPL(name, func, uint32, complex32, complex128, complex128) \
298
+ CPU_DEVICE_BINARYC(name, func, uint32, complex64, complex128, complex128) \
299
+ CPU_DEVICE_BINARYC(name, func, uint32, complex128, complex128, complex128) \
300
+ \
301
+ CPU_DEVICE_NOIMPL(name, func, int8, complex32, complex32, complex32) \
302
+ CPU_DEVICE_BINARYC(name, func, int8, complex64, complex64, complex64) \
303
+ CPU_DEVICE_BINARYC(name, func, int8, complex128, complex128, complex128) \
304
+ \
305
+ CPU_DEVICE_NOIMPL(name, func, int16, complex32, complex64, complex64) \
306
+ CPU_DEVICE_BINARYC(name, func, int16, complex64, complex64, complex64) \
307
+ CPU_DEVICE_BINARYC(name, func, int16, complex128, complex128, complex128) \
308
+ \
309
+ CPU_DEVICE_NOIMPL(name, func, int32, complex32, complex128, complex128) \
310
+ CPU_DEVICE_BINARYC(name, func, int32, complex64, complex128, complex128) \
311
+ CPU_DEVICE_BINARYC(name, func, int32, complex128, complex128, complex128) \
312
+ \
313
+ CPU_DEVICE_NOIMPL(name, func, float16, complex32, complex32, complex32) \
314
+ CPU_DEVICE_NOIMPL(name, func, float16, complex64, complex64, complex64) \
315
+ CPU_DEVICE_NOIMPL(name, func, float16, complex128, complex128, complex128) \
316
+ \
317
+ CPU_DEVICE_NOIMPL(name, func, float32, complex32, complex64, complex64) \
318
+ CPU_DEVICE_BINARYC(name, func, float32, complex64, complex64, complex64) \
319
+ CPU_DEVICE_BINARYC(name, func, float32, complex128, complex128, complex128) \
320
+ \
321
+ CPU_DEVICE_NOIMPL(name, func, float64, complex32, complex128, complex128) \
322
+ CPU_DEVICE_BINARYC(name, func, float64, complex64, complex128, complex128) \
323
+ CPU_DEVICE_BINARYC(name, func, float64, complex128, complex128, complex128) \
324
+ \
325
+ CPU_DEVICE_NOIMPL(name, func, complex32, uint8, complex32, complex32) \
326
+ CPU_DEVICE_NOIMPL(name, func, complex32, uint16, complex64, complex64) \
327
+ CPU_DEVICE_NOIMPL(name, func, complex32, uint32, complex128, complex128) \
328
+ CPU_DEVICE_NOIMPL(name, func, complex32, int8, complex32, complex32) \
329
+ CPU_DEVICE_NOIMPL(name, func, complex32, int16, complex64, complex64) \
330
+ CPU_DEVICE_NOIMPL(name, func, complex32, int32, complex128, complex128) \
331
+ CPU_DEVICE_NOIMPL(name, func, complex32, float16, complex32, complex32) \
332
+ CPU_DEVICE_NOIMPL(name, func, complex32, float32, complex64, complex64) \
333
+ CPU_DEVICE_NOIMPL(name, func, complex32, float64, complex128, complex128) \
334
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex32, complex32, complex32) \
335
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex64, complex64, complex64) \
336
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex128, complex128, complex128) \
337
+ \
338
+ CPU_DEVICE_BINARYC(name, func, complex64, uint8, complex64, complex64) \
339
+ CPU_DEVICE_BINARYC(name, func, complex64, uint16, complex64, complex64) \
340
+ CPU_DEVICE_BINARYC(name, func, complex64, uint32, complex128, complex128) \
341
+ CPU_DEVICE_BINARYC(name, func, complex64, int8, complex64, complex64) \
342
+ CPU_DEVICE_BINARYC(name, func, complex64, int16, complex64, complex64) \
343
+ CPU_DEVICE_BINARYC(name, func, complex64, int32, complex128, complex128) \
344
+ CPU_DEVICE_NOIMPL(name, func, complex64, float16, complex64, complex64) \
345
+ CPU_DEVICE_BINARYC(name, func, complex64, float32, complex64, complex64) \
346
+ CPU_DEVICE_BINARYC(name, func, complex64, float64, complex128, complex128) \
347
+ CPU_DEVICE_NOIMPL(name, func, complex64, complex32, complex64, complex64) \
348
+ CPU_DEVICE_BINARYC(name, func, complex64, complex64, complex64, complex64) \
349
+ CPU_DEVICE_BINARYC(name, func, complex64, complex128, complex128, complex128) \
350
+ \
351
+ CPU_DEVICE_BINARYC(name, func, complex128, uint8, complex128, complex128) \
352
+ CPU_DEVICE_BINARYC(name, func, complex128, uint16, complex128, complex128) \
353
+ CPU_DEVICE_BINARYC(name, func, complex128, uint32, complex128, complex128) \
354
+ CPU_DEVICE_BINARYC(name, func, complex128, int8, complex128, complex128) \
355
+ CPU_DEVICE_BINARYC(name, func, complex128, int16, complex128, complex128) \
356
+ CPU_DEVICE_BINARYC(name, func, complex128, int32, complex128, complex128) \
357
+ CPU_DEVICE_NOIMPL(name, func, complex128, float16, complex128, complex128) \
358
+ CPU_DEVICE_BINARYC(name, func, complex128, float32, complex128, complex128) \
359
+ CPU_DEVICE_BINARYC(name, func, complex128, float64, complex128, complex128) \
360
+ CPU_DEVICE_NOIMPL(name, func, complex128, complex32, complex128, complex128) \
361
+ CPU_DEVICE_BINARYC(name, func, complex128, complex64, complex128, complex128) \
362
+ CPU_DEVICE_BINARYC(name, func, complex128, complex128, complex128, complex128) \
363
+
364
+ #define CPU_DEVICE_ALL_BINARY_FLOAT_RETURN(name, func, hfunc) \
365
+ CPU_DEVICE_NOIMPL(name, func, uint8, complex32, complex32, complex32) \
366
+ CPU_DEVICE_BINARYC(name, func, uint8, complex64, complex64, complex64) \
367
+ CPU_DEVICE_BINARYC(name, func, uint8, complex128, complex128, complex128) \
368
+ \
369
+ CPU_DEVICE_NOIMPL(name, func, uint16, complex32, complex64, complex64) \
370
+ CPU_DEVICE_BINARYC(name, func, uint16, complex64, complex64, complex64) \
371
+ CPU_DEVICE_BINARYC(name, func, uint16, complex128, complex128, complex128) \
372
+ \
373
+ CPU_DEVICE_NOIMPL(name, func, uint32, complex32, complex128, complex128) \
374
+ CPU_DEVICE_BINARYC(name, func, uint32, complex64, complex128, complex128) \
375
+ CPU_DEVICE_BINARYC(name, func, uint32, complex128, complex128, complex128) \
376
+ \
377
+ CPU_DEVICE_NOIMPL(name, func, int8, complex32, complex32, complex32) \
378
+ CPU_DEVICE_BINARYC(name, func, int8, complex64, complex64, complex64) \
379
+ CPU_DEVICE_BINARYC(name, func, int8, complex128, complex128, complex128) \
380
+ \
381
+ CPU_DEVICE_NOIMPL(name, func, int16, complex32, complex64, complex64) \
382
+ CPU_DEVICE_BINARYC(name, func, int16, complex64, complex64, complex64) \
383
+ CPU_DEVICE_BINARYC(name, func, int16, complex128, complex128, complex128) \
384
+ \
385
+ CPU_DEVICE_NOIMPL(name, func, int32, complex32, complex128, complex128) \
386
+ CPU_DEVICE_BINARYC(name, func, int32, complex64, complex128, complex128) \
387
+ CPU_DEVICE_BINARYC(name, func, int32, complex128, complex128, complex128) \
388
+ \
389
+ CPU_DEVICE_NOIMPL(name, func, float16, complex32, complex32, complex32) \
390
+ CPU_DEVICE_NOIMPL(name, func, float16, complex64, complex64, complex64) \
391
+ CPU_DEVICE_NOIMPL(name, func, float16, complex128, complex128, complex128) \
392
+ \
393
+ CPU_DEVICE_NOIMPL(name, func, float32, complex32, complex64, complex64) \
394
+ CPU_DEVICE_BINARYC(name, func, float32, complex64, complex64, complex64) \
395
+ CPU_DEVICE_BINARYC(name, func, float32, complex128, complex128, complex128) \
396
+ \
397
+ CPU_DEVICE_NOIMPL(name, func, float64, complex32, complex128, complex128) \
398
+ CPU_DEVICE_BINARYC(name, func, float64, complex64, complex128, complex128) \
399
+ CPU_DEVICE_BINARYC(name, func, float64, complex128, complex128, complex128) \
400
+ \
401
+ CPU_DEVICE_NOIMPL(name, func, complex32, uint8, complex32, complex32) \
402
+ CPU_DEVICE_NOIMPL(name, func, complex32, uint16, complex64, complex64) \
403
+ CPU_DEVICE_NOIMPL(name, func, complex32, uint32, complex128, complex128) \
404
+ CPU_DEVICE_NOIMPL(name, func, complex32, int8, complex32, complex32) \
405
+ CPU_DEVICE_NOIMPL(name, func, complex32, int16, complex64, complex64) \
406
+ CPU_DEVICE_NOIMPL(name, func, complex32, int32, complex128, complex128) \
407
+ CPU_DEVICE_NOIMPL(name, func, complex32, float16, complex32, complex32) \
408
+ CPU_DEVICE_NOIMPL(name, func, complex32, float32, complex64, complex64) \
409
+ CPU_DEVICE_NOIMPL(name, func, complex32, float64, complex128, complex128) \
410
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex32, complex32, complex32) \
411
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex64, complex64, complex64) \
412
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex128, complex128, complex128) \
413
+ \
414
+ CPU_DEVICE_BINARYC(name, func, complex64, uint8, complex64, complex64) \
415
+ CPU_DEVICE_BINARYC(name, func, complex64, uint16, complex64, complex64) \
416
+ CPU_DEVICE_BINARYC(name, func, complex64, uint32, complex128, complex128) \
417
+ CPU_DEVICE_BINARYC(name, func, complex64, int8, complex64, complex64) \
418
+ CPU_DEVICE_BINARYC(name, func, complex64, int16, complex64, complex64) \
419
+ CPU_DEVICE_BINARYC(name, func, complex64, int32, complex128, complex128) \
420
+ CPU_DEVICE_NOIMPL(name, func, complex64, float16, complex64, complex64) \
421
+ CPU_DEVICE_BINARYC(name, func, complex64, float32, complex64, complex64) \
422
+ CPU_DEVICE_BINARYC(name, func, complex64, float64, complex128, complex128) \
423
+ CPU_DEVICE_NOIMPL(name, func, complex64, complex32, complex64, complex64) \
424
+ CPU_DEVICE_BINARYC(name, func, complex64, complex64, complex64, complex64) \
425
+ CPU_DEVICE_BINARYC(name, func, complex64, complex128, complex128, complex128) \
426
+ \
427
+ CPU_DEVICE_BINARYC(name, func, complex128, uint8, complex128, complex128) \
428
+ CPU_DEVICE_BINARYC(name, func, complex128, uint16, complex128, complex128) \
429
+ CPU_DEVICE_BINARYC(name, func, complex128, uint32, complex128, complex128) \
430
+ CPU_DEVICE_BINARYC(name, func, complex128, int8, complex128, complex128) \
431
+ CPU_DEVICE_BINARYC(name, func, complex128, int16, complex128, complex128) \
432
+ CPU_DEVICE_BINARYC(name, func, complex128, int32, complex128, complex128) \
433
+ CPU_DEVICE_NOIMPL(name, func, complex128, float16, complex128, complex128) \
434
+ CPU_DEVICE_BINARYC(name, func, complex128, float32, complex128, complex128) \
435
+ CPU_DEVICE_BINARYC(name, func, complex128, float64, complex128, complex128) \
436
+ CPU_DEVICE_NOIMPL(name, func, complex128, complex32, complex128, complex128) \
437
+ CPU_DEVICE_BINARYC(name, func, complex128, complex64, complex128, complex128) \
438
+ CPU_DEVICE_BINARYC(name, func, complex128, complex128, complex128, complex128) \
439
+
440
+ #define add(x, y) x + y
441
+ CPU_DEVICE_ALL_BINARY(add, add, add)
442
+
443
+ #define subtract(x, y) x - y
444
+ CPU_DEVICE_ALL_BINARY(subtract, subtract, sub)
445
+
446
+ #define multiply(x, y) x * y
447
+ CPU_DEVICE_ALL_BINARY(multiply, multiply, multiply)
448
+
449
+ #define divide(x, y) x / y
450
+ CPU_DEVICE_ALL_BINARY_FLOAT_RETURN(divide, divide, divide)
451
+
452
+ CPU_DEVICE_ALL_BINARY(power, _pow, _pow)
453
+
454
+
455
+ /*****************************************************************************/
456
+ /* Comparison */
457
+ /*****************************************************************************/
458
+
459
+ #define CPU_DEVICE_ALL_COMPARISON(name, func, hfunc, cfunc) \
460
+ CPU_DEVICE_NOIMPL(name, cfunc, uint8, complex32, bool, complex32) \
461
+ CPU_DEVICE_BINARYC(name, cfunc, uint8, complex64, bool, complex64) \
462
+ CPU_DEVICE_BINARYC(name, cfunc, uint8, complex128, bool, complex128) \
463
+ \
464
+ CPU_DEVICE_NOIMPL(name, cfunc, uint16, complex32, bool, complex64) \
465
+ CPU_DEVICE_BINARYC(name, cfunc, uint16, complex64, bool, complex64) \
466
+ CPU_DEVICE_BINARYC(name, cfunc, uint16, complex128, bool, complex128) \
467
+ \
468
+ CPU_DEVICE_NOIMPL(name, cfunc, uint32, complex32, bool, complex128) \
469
+ CPU_DEVICE_BINARYC(name, cfunc, uint32, complex64, bool, complex128) \
470
+ CPU_DEVICE_BINARYC(name, cfunc, uint32, complex128, bool, complex128) \
471
+ \
472
+ CPU_DEVICE_NOIMPL(name, cfunc, int8, complex32, bool, complex32) \
473
+ CPU_DEVICE_BINARYC(name, cfunc, int8, complex64, bool, complex64) \
474
+ CPU_DEVICE_BINARYC(name, cfunc, int8, complex128, bool, complex128) \
475
+ \
476
+ CPU_DEVICE_NOIMPL(name, cfunc, int16, complex32, bool, complex64) \
477
+ CPU_DEVICE_BINARYC(name, cfunc, int16, complex64, bool, complex64) \
478
+ CPU_DEVICE_BINARYC(name, cfunc, int16, complex128, bool, complex128) \
479
+ \
480
+ CPU_DEVICE_NOIMPL(name, cfunc, int32, complex32, bool, complex128) \
481
+ CPU_DEVICE_BINARYC(name, cfunc, int32, complex64, bool, complex128) \
482
+ CPU_DEVICE_BINARYC(name, cfunc, int32, complex128, bool, complex128) \
483
+ \
484
+ CPU_DEVICE_NOIMPL(name, cfunc, float16, complex32, bool, complex32) \
485
+ CPU_DEVICE_NOIMPL(name, cfunc, float16, complex64, bool, complex64) \
486
+ CPU_DEVICE_NOIMPL(name, cfunc, float16, complex128, bool, complex128) \
487
+ \
488
+ CPU_DEVICE_NOIMPL(name, cfunc, float32, complex32, bool, complex64) \
489
+ CPU_DEVICE_BINARYC(name, cfunc, float32, complex64, bool, complex64) \
490
+ CPU_DEVICE_BINARYC(name, cfunc, float32, complex128, bool, complex128) \
491
+ \
492
+ CPU_DEVICE_NOIMPL(name, cfunc, float64, complex32, bool, complex128) \
493
+ CPU_DEVICE_BINARYC(name, cfunc, float64, complex64, bool, complex128) \
494
+ CPU_DEVICE_BINARYC(name, cfunc, float64, complex128, bool, complex128) \
495
+ \
496
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, uint8, bool, complex32) \
497
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, uint16, bool, complex64) \
498
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, uint32, bool, complex128) \
499
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, int8, bool, complex32) \
500
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, int16, bool, complex64) \
501
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, int32, bool, complex128) \
502
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, float16, bool, complex32) \
503
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, float32, bool, complex64) \
504
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, float64, bool, complex128) \
505
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, complex32, bool, complex32) \
506
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, complex64, bool, complex64) \
507
+ CPU_DEVICE_NOIMPL(name, cfunc, complex32, complex128, bool, complex128) \
508
+ \
509
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, uint8, bool, complex64) \
510
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, uint16, bool, complex64) \
511
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, uint32, bool, complex128) \
512
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, int8, bool, complex64) \
513
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, int16, bool, complex64) \
514
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, int32, bool, complex128) \
515
+ CPU_DEVICE_NOIMPL(name, cfunc, complex64, float16, bool, complex64) \
516
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, float32, bool, complex64) \
517
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, float64, bool, complex128) \
518
+ CPU_DEVICE_NOIMPL(name, cfunc, complex64, complex32, bool, complex64) \
519
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, complex64, bool, complex64) \
520
+ CPU_DEVICE_BINARYC(name, cfunc, complex64, complex128, bool, complex128) \
521
+ \
522
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, uint8, bool, complex128) \
523
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, uint16, bool, complex128) \
524
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, uint32, bool, complex128) \
525
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, int8, bool, complex128) \
526
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, int16, bool, complex128) \
527
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, int32, bool, complex128) \
528
+ CPU_DEVICE_NOIMPL(name, cfunc, complex128, float16, bool, complex128) \
529
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, float32, bool, complex128) \
530
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, float64, bool, complex128) \
531
+ CPU_DEVICE_NOIMPL(name, cfunc, complex128, complex32, bool, complex128) \
532
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, complex64, bool, complex128) \
533
+ CPU_DEVICE_BINARYC(name, cfunc, complex128, complex128, bool, complex128) \
534
+
535
+
536
+ #define less(x, y) x < y
537
+ CPU_DEVICE_ALL_COMPARISON(less, less, less, lexorder_lt)
538
+
539
+ #define less_equal(x, y) x <= y
540
+ CPU_DEVICE_ALL_COMPARISON(less_equal, less_equal, less_equal, lexorder_le)
541
+
542
+ #define greater_equal(x, y) x >= y
543
+ CPU_DEVICE_ALL_COMPARISON(greater_equal, greater_equal, greater_equal, lexorder_ge)
544
+
545
+ #define greater(x, y) x > y
546
+ CPU_DEVICE_ALL_COMPARISON(greater, greater, greater, lexorder_gt)
547
+
548
+ #define equal(x, y) x == y
549
+ CPU_DEVICE_ALL_COMPARISON(equal, equal, equal, equal)
550
+
551
+ #define not_equal(x, y) x != y
552
+ CPU_DEVICE_ALL_COMPARISON(not_equal, not_equal, not_equal, not_equal)
553
+
554
+ #define equaln(x, y) (x == y || (x != x && y != y))
555
+ CPU_DEVICE_ALL_COMPARISON(equaln, equaln, equaln, lexorder_eqn)