gumath 0.2.0dev5 → 0.2.0dev8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +7 -2
  3. data/Gemfile +0 -3
  4. data/ext/ruby_gumath/GPATH +0 -0
  5. data/ext/ruby_gumath/GRTAGS +0 -0
  6. data/ext/ruby_gumath/GTAGS +0 -0
  7. data/ext/ruby_gumath/extconf.rb +0 -5
  8. data/ext/ruby_gumath/functions.c +10 -2
  9. data/ext/ruby_gumath/gufunc_object.c +15 -4
  10. data/ext/ruby_gumath/gufunc_object.h +9 -3
  11. data/ext/ruby_gumath/gumath/Makefile +63 -0
  12. data/ext/ruby_gumath/gumath/Makefile.in +1 -0
  13. data/ext/ruby_gumath/gumath/config.h +56 -0
  14. data/ext/ruby_gumath/gumath/config.h.in +3 -0
  15. data/ext/ruby_gumath/gumath/config.log +497 -0
  16. data/ext/ruby_gumath/gumath/config.status +1034 -0
  17. data/ext/ruby_gumath/gumath/configure +375 -4
  18. data/ext/ruby_gumath/gumath/configure.ac +47 -3
  19. data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
  20. data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
  21. data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
  22. data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
  23. data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
  24. data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
  25. data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
  26. data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
  27. data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
  28. data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
  29. data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
  30. data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
  31. data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
  32. data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
  33. data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
  34. data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
  35. data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
  36. data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
  37. data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
  38. data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
  39. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
  40. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
  41. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
  42. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
  43. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
  44. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
  45. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
  46. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
  47. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
  48. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
  49. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
  50. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
  51. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
  52. data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
  53. data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
  54. data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
  55. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
  56. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
  57. data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
  58. data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
  59. data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
  60. data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
  61. data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
  62. data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
  63. data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
  64. data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
  65. data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
  66. data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
  67. data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
  68. data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
  69. data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
  70. data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
  71. data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
  72. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
  73. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
  74. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
  75. data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
  76. data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
  77. data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
  78. data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
  79. data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
  80. data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
  81. data/ext/ruby_gumath/gumath/setup.py +67 -6
  82. data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
  83. data/ext/ruby_gumath/include/gumath.h +55 -14
  84. data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
  85. data/ext/ruby_gumath/lib/libgumath.a +0 -0
  86. data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
  87. data/ext/ruby_gumath/ruby_gumath.c +231 -70
  88. data/ext/ruby_gumath/ruby_gumath.h +4 -1
  89. data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
  90. data/ext/ruby_gumath/util.c +34 -0
  91. data/ext/ruby_gumath/util.h +9 -0
  92. data/gumath.gemspec +3 -2
  93. data/lib/gumath.rb +55 -1
  94. data/lib/gumath/version.rb +2 -2
  95. data/lib/ruby_gumath.so +0 -0
  96. metadata +63 -10
  97. data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
  98. data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
  99. data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449
@@ -0,0 +1,1062 @@
1
+ /*
2
+ * BSD 3-Clause License
3
+ *
4
+ * Copyright (c) 2017-2018, plures
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ *
10
+ * 1. Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ *
13
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ * this list of conditions and the following disclaimer in the documentation
15
+ * and/or other materials provided with the distribution.
16
+ *
17
+ * 3. Neither the name of the copyright holder nor the names of its
18
+ * contributors may be used to endorse or promote products derived from
19
+ * this software without specific prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+
34
+ #ifndef CPU_DEVICE_BINARY_H
35
+ #define CPU_DEVICE_BINARY_H
36
+
37
+
38
+ #ifdef __cplusplus
39
+ #include <cinttypes>
40
+ #include <complex>
41
+ #include "contrib/bfloat16.h"
42
+
43
+ typedef tf::bfloat16 bfloat16_t;
44
+ typedef std::complex<float> complex64_t;
45
+ typedef std::complex<double> complex128_t;
46
+ #else
47
+ #include <stdint.h>
48
+ #endif
49
+
50
+
51
+ typedef bool bool_t;
52
+ typedef float float32_t;
53
+ typedef double float64_t;
54
+
55
+
56
+ /*****************************************************************************/
57
+ /* Cuda device kernel signature */
58
+ /*****************************************************************************/
59
+
60
+ #ifdef __cplusplus
61
+ #define CPU_DEVICE_BINARY_DECL(name, t0, t1, t2) \
62
+ extern "C" void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2( \
63
+ const char *a0, const char *a1, char *a2, \
64
+ const int64_t N); \
65
+ extern "C" void gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1##_##t2( \
66
+ const char *a0, const char *a1, char *a2, \
67
+ const int64_t s0, const int64_t s1, const int64_t s2, \
68
+ const int64_t N); \
69
+ extern "C" void gm_cpu_device_0D_##name##_##t0##_##t1##_##t2( \
70
+ const char *a0, const char *a1, char *a2);
71
+
72
+ #define CPU_DEVICE_BINARY_MV_DECL(name, t0, t1, t2, t3) \
73
+ extern "C" void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2##_##t3( \
74
+ const char *a0, const char *a1, char *a2, char *a3, \
75
+ const int64_t N); \
76
+ extern "C" void gm_cpu_device_0D_##name##_##t0##_##t1##_##t2##_##t3( \
77
+ const char *a0, const char *a1, char *a2, char *a3);
78
+ #else
79
+ #define CPU_DEVICE_BINARY_DECL(name, t0, t1, t2) \
80
+ void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2( \
81
+ const char *a0, const char *a1, char *a2, \
82
+ const int64_t N); \
83
+ void gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1##_##t2( \
84
+ const char *a0, const char *a1, char *a2, \
85
+ const int64_t s0, const int64_t s1, const int64_t s2, \
86
+ const int64_t N); \
87
+ void gm_cpu_device_0D_##name##_##t0##_##t1##_##t2( \
88
+ const char *a0, const char *a1, char *a2);
89
+
90
+ #define CPU_DEVICE_BINARY_MV_DECL(name, t0, t1, t2, t3) \
91
+ void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2##_##t3( \
92
+ const char *a0, const char *a1, char *a2, char *a3, \
93
+ const int64_t N); \
94
+ void gm_cpu_device_0D_##name##_##t0##_##t1##_##t2##_##t3( \
95
+ const char *a0, const char *a1, char *a2, char *a3);
96
+ #endif
97
+
98
+ #define CPU_DEVICE_NOKERN_DECL(name, t0, t1, t2)
99
+ #define CPU_DEVICE_BINARY_NOIMPL_DECL(name, t0, t1, t2)
100
+
101
+
102
+ /*****************************************************************************/
103
+ /* Arithmetic */
104
+ /*****************************************************************************/
105
+
106
+ #define CPU_DEVICE_BINARY_ARITHMETIC_DECL(name) \
107
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint8, uint8) \
108
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint16, uint16) \
109
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint32, uint32) \
110
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint64, uint64) \
111
+ CPU_DEVICE_BINARY_DECL(name, uint8, int8, int16) \
112
+ CPU_DEVICE_BINARY_DECL(name, uint8, int16, int16) \
113
+ CPU_DEVICE_BINARY_DECL(name, uint8, int32, int32) \
114
+ CPU_DEVICE_BINARY_DECL(name, uint8, int64, int64) \
115
+ CPU_DEVICE_BINARY_DECL(name, uint8, bfloat16, bfloat16) \
116
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, float16, float16) \
117
+ CPU_DEVICE_BINARY_DECL(name, uint8, float32, float32) \
118
+ CPU_DEVICE_BINARY_DECL(name, uint8, float64, float64) \
119
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, complex32, complex32) \
120
+ CPU_DEVICE_BINARY_DECL(name, uint8, complex64, complex64) \
121
+ CPU_DEVICE_BINARY_DECL(name, uint8, complex128, complex128) \
122
+ \
123
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint8, uint16) \
124
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint16, uint16) \
125
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint32, uint32) \
126
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint64, uint64) \
127
+ CPU_DEVICE_BINARY_DECL(name, uint16, int8, int32) \
128
+ CPU_DEVICE_BINARY_DECL(name, uint16, int16, int32) \
129
+ CPU_DEVICE_BINARY_DECL(name, uint16, int32, int32) \
130
+ CPU_DEVICE_BINARY_DECL(name, uint16, int64, int64) \
131
+ CPU_DEVICE_BINARY_DECL(name, uint16, bfloat16, float32) \
132
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, float16, float32) \
133
+ CPU_DEVICE_BINARY_DECL(name, uint16, float32, float32) \
134
+ CPU_DEVICE_BINARY_DECL(name, uint16, float64, float64) \
135
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, complex32, complex64) \
136
+ CPU_DEVICE_BINARY_DECL(name, uint16, complex64, complex64) \
137
+ CPU_DEVICE_BINARY_DECL(name, uint16, complex128, complex128) \
138
+ \
139
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint8, uint32) \
140
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint16, uint32) \
141
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint32, uint32) \
142
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint64, uint64) \
143
+ CPU_DEVICE_BINARY_DECL(name, uint32, int8, int64) \
144
+ CPU_DEVICE_BINARY_DECL(name, uint32, int16, int64) \
145
+ CPU_DEVICE_BINARY_DECL(name, uint32, int32, int64) \
146
+ CPU_DEVICE_BINARY_DECL(name, uint32, int64, int64) \
147
+ CPU_DEVICE_BINARY_DECL(name, uint32, bfloat16, float64) \
148
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, float16, float64) \
149
+ CPU_DEVICE_BINARY_DECL(name, uint32, float32, float64) \
150
+ CPU_DEVICE_BINARY_DECL(name, uint32, float64, float64) \
151
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, complex32, complex128) \
152
+ CPU_DEVICE_BINARY_DECL(name, uint32, complex64, complex128) \
153
+ CPU_DEVICE_BINARY_DECL(name, uint32, complex128, complex128) \
154
+ \
155
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint8, uint64) \
156
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint16, uint64) \
157
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint32, uint64) \
158
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint64, uint64) \
159
+ \
160
+ CPU_DEVICE_BINARY_DECL(name, int8, uint8, int16) \
161
+ CPU_DEVICE_BINARY_DECL(name, int8, uint16, int32) \
162
+ CPU_DEVICE_BINARY_DECL(name, int8, uint32, int64) \
163
+ CPU_DEVICE_BINARY_DECL(name, int8, int8, int8) \
164
+ CPU_DEVICE_BINARY_DECL(name, int8, int16, int16) \
165
+ CPU_DEVICE_BINARY_DECL(name, int8, int32, int32) \
166
+ CPU_DEVICE_BINARY_DECL(name, int8, int64, int64) \
167
+ CPU_DEVICE_BINARY_DECL(name, int8, bfloat16, bfloat16) \
168
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, float16, float16) \
169
+ CPU_DEVICE_BINARY_DECL(name, int8, float32, float32) \
170
+ CPU_DEVICE_BINARY_DECL(name, int8, float64, float64) \
171
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, complex32, complex32) \
172
+ CPU_DEVICE_BINARY_DECL(name, int8, complex64, complex64) \
173
+ CPU_DEVICE_BINARY_DECL(name, int8, complex128, complex128) \
174
+ \
175
+ CPU_DEVICE_BINARY_DECL(name, int16, uint8, int16) \
176
+ CPU_DEVICE_BINARY_DECL(name, int16, uint16, int32) \
177
+ CPU_DEVICE_BINARY_DECL(name, int16, uint32, int64) \
178
+ CPU_DEVICE_BINARY_DECL(name, int16, int8, int16) \
179
+ CPU_DEVICE_BINARY_DECL(name, int16, int16, int16) \
180
+ CPU_DEVICE_BINARY_DECL(name, int16, int32, int32) \
181
+ CPU_DEVICE_BINARY_DECL(name, int16, int64, int64) \
182
+ CPU_DEVICE_BINARY_DECL(name, int16, bfloat16, float32) \
183
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, float16, float32) \
184
+ CPU_DEVICE_BINARY_DECL(name, int16, float32, float32) \
185
+ CPU_DEVICE_BINARY_DECL(name, int16, float64, float64) \
186
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, complex32, complex64) \
187
+ CPU_DEVICE_BINARY_DECL(name, int16, complex64, complex64) \
188
+ CPU_DEVICE_BINARY_DECL(name, int16, complex128, complex128) \
189
+ \
190
+ CPU_DEVICE_BINARY_DECL(name, int32, uint8, int32) \
191
+ CPU_DEVICE_BINARY_DECL(name, int32, uint16, int32) \
192
+ CPU_DEVICE_BINARY_DECL(name, int32, uint32, int64) \
193
+ CPU_DEVICE_BINARY_DECL(name, int32, int8, int32) \
194
+ CPU_DEVICE_BINARY_DECL(name, int32, int16, int32) \
195
+ CPU_DEVICE_BINARY_DECL(name, int32, int32, int32) \
196
+ CPU_DEVICE_BINARY_DECL(name, int32, int64, int64) \
197
+ CPU_DEVICE_BINARY_DECL(name, int32, bfloat16, float64) \
198
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, float16, float64) \
199
+ CPU_DEVICE_BINARY_DECL(name, int32, float32, float64) \
200
+ CPU_DEVICE_BINARY_DECL(name, int32, float64, float64) \
201
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, complex32, complex128) \
202
+ CPU_DEVICE_BINARY_DECL(name, int32, complex64, complex128) \
203
+ CPU_DEVICE_BINARY_DECL(name, int32, complex128, complex128) \
204
+ \
205
+ CPU_DEVICE_BINARY_DECL(name, int64, uint8, int64) \
206
+ CPU_DEVICE_BINARY_DECL(name, int64, uint16, int64) \
207
+ CPU_DEVICE_BINARY_DECL(name, int64, uint32, int64) \
208
+ CPU_DEVICE_BINARY_DECL(name, int64, int8, int64) \
209
+ CPU_DEVICE_BINARY_DECL(name, int64, int16, int64) \
210
+ CPU_DEVICE_BINARY_DECL(name, int64, int32, int64) \
211
+ CPU_DEVICE_BINARY_DECL(name, int64, int64, int64) \
212
+ \
213
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint8, bfloat16) \
214
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint16, float32) \
215
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint32, float64) \
216
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int8, bfloat16) \
217
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int16, float32) \
218
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int32, float64) \
219
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, bfloat16, bfloat16) \
220
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, bfloat16, float16, float32) \
221
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float32, float32) \
222
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float64, float64) \
223
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, bfloat16, complex32, complex64) \
224
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, complex64, complex64) \
225
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, complex128, complex128) \
226
+ \
227
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint8, float16) \
228
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint16, float32) \
229
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint32, float64) \
230
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int8, float16) \
231
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int16, float32) \
232
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int32, float64) \
233
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, bfloat16, float32) \
234
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float16, float16) \
235
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float32, float32) \
236
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float64, float64) \
237
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex32, complex32) \
238
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex64, complex64) \
239
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex128, complex128) \
240
+ \
241
+ CPU_DEVICE_BINARY_DECL(name, float32, uint8, float32) \
242
+ CPU_DEVICE_BINARY_DECL(name, float32, uint16, float32) \
243
+ CPU_DEVICE_BINARY_DECL(name, float32, uint32, float64) \
244
+ CPU_DEVICE_BINARY_DECL(name, float32, int8, float32) \
245
+ CPU_DEVICE_BINARY_DECL(name, float32, int16, float32) \
246
+ CPU_DEVICE_BINARY_DECL(name, float32, int32, float64) \
247
+ CPU_DEVICE_BINARY_DECL(name, float32, bfloat16, float32) \
248
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, float16, float32) \
249
+ CPU_DEVICE_BINARY_DECL(name, float32, float32, float32) \
250
+ CPU_DEVICE_BINARY_DECL(name, float32, float64, float64) \
251
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, complex32, complex64) \
252
+ CPU_DEVICE_BINARY_DECL(name, float32, complex64, complex64) \
253
+ CPU_DEVICE_BINARY_DECL(name, float32, complex128, complex128) \
254
+ \
255
+ CPU_DEVICE_BINARY_DECL(name, float64, uint8, float64) \
256
+ CPU_DEVICE_BINARY_DECL(name, float64, uint16, float64) \
257
+ CPU_DEVICE_BINARY_DECL(name, float64, uint32, float64) \
258
+ CPU_DEVICE_BINARY_DECL(name, float64, int8, float64) \
259
+ CPU_DEVICE_BINARY_DECL(name, float64, int16, float64) \
260
+ CPU_DEVICE_BINARY_DECL(name, float64, int32, float64) \
261
+ CPU_DEVICE_BINARY_DECL(name, float64, bfloat16, float64) \
262
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, float16, float64) \
263
+ CPU_DEVICE_BINARY_DECL(name, float64, float32, float64) \
264
+ CPU_DEVICE_BINARY_DECL(name, float64, float64, float64) \
265
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, complex32, complex128) \
266
+ CPU_DEVICE_BINARY_DECL(name, float64, complex64, complex128) \
267
+ CPU_DEVICE_BINARY_DECL(name, float64, complex128, complex128) \
268
+ \
269
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint8, complex32) \
270
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint16, complex64) \
271
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint32, complex128) \
272
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int8, complex32) \
273
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int16, complex64) \
274
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int32, complex128) \
275
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, bfloat16, complex64) \
276
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float16, complex32) \
277
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float32, complex64) \
278
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float64, complex128) \
279
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex32, complex32) \
280
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex64, complex64) \
281
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex128, complex128) \
282
+ \
283
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint8, complex64) \
284
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint16, complex64) \
285
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint32, complex128) \
286
+ CPU_DEVICE_BINARY_DECL(name, complex64, int8, complex64) \
287
+ CPU_DEVICE_BINARY_DECL(name, complex64, int16, complex64) \
288
+ CPU_DEVICE_BINARY_DECL(name, complex64, int32, complex128) \
289
+ CPU_DEVICE_BINARY_DECL(name, complex64, bfloat16, complex64) \
290
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex64, float16, complex64) \
291
+ CPU_DEVICE_BINARY_DECL(name, complex64, float32, complex64) \
292
+ CPU_DEVICE_BINARY_DECL(name, complex64, float64, complex128) \
293
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex64, complex32, complex64) \
294
+ CPU_DEVICE_BINARY_DECL(name, complex64, complex64, complex64) \
295
+ CPU_DEVICE_BINARY_DECL(name, complex64, complex128, complex128) \
296
+ \
297
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint8, complex128) \
298
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint16, complex128) \
299
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint32, complex128) \
300
+ CPU_DEVICE_BINARY_DECL(name, complex128, int8, complex128) \
301
+ CPU_DEVICE_BINARY_DECL(name, complex128, int16, complex128) \
302
+ CPU_DEVICE_BINARY_DECL(name, complex128, int32, complex128) \
303
+ CPU_DEVICE_BINARY_DECL(name, complex128, bfloat16, complex128) \
304
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex128, float16, complex128) \
305
+ CPU_DEVICE_BINARY_DECL(name, complex128, float32, complex128) \
306
+ CPU_DEVICE_BINARY_DECL(name, complex128, float64, complex128) \
307
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex128, complex32, complex128) \
308
+ CPU_DEVICE_BINARY_DECL(name, complex128, complex64, complex128) \
309
+ CPU_DEVICE_BINARY_DECL(name, complex128, complex128, complex128)
310
+
311
+ #define CPU_DEVICE_BINARY_ARITHMETIC_NO_COMPLEX_DECL(name) \
312
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint8, uint8) \
313
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint16, uint16) \
314
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint32, uint32) \
315
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint64, uint64) \
316
+ CPU_DEVICE_BINARY_DECL(name, uint8, int8, int16) \
317
+ CPU_DEVICE_BINARY_DECL(name, uint8, int16, int16) \
318
+ CPU_DEVICE_BINARY_DECL(name, uint8, int32, int32) \
319
+ CPU_DEVICE_BINARY_DECL(name, uint8, int64, int64) \
320
+ CPU_DEVICE_BINARY_DECL(name, uint8, bfloat16, bfloat16) \
321
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, float16, float16) \
322
+ CPU_DEVICE_BINARY_DECL(name, uint8, float32, float32) \
323
+ CPU_DEVICE_BINARY_DECL(name, uint8, float64, float64) \
324
+ CPU_DEVICE_NOKERN_DECL(name, uint8, complex32, complex32) \
325
+ CPU_DEVICE_NOKERN_DECL(name, uint8, complex64, complex64) \
326
+ CPU_DEVICE_NOKERN_DECL(name, uint8, complex128, complex128) \
327
+ \
328
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint8, uint16) \
329
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint16, uint16) \
330
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint32, uint32) \
331
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint64, uint64) \
332
+ CPU_DEVICE_BINARY_DECL(name, uint16, int8, int32) \
333
+ CPU_DEVICE_BINARY_DECL(name, uint16, int16, int32) \
334
+ CPU_DEVICE_BINARY_DECL(name, uint16, int32, int32) \
335
+ CPU_DEVICE_BINARY_DECL(name, uint16, int64, int64) \
336
+ CPU_DEVICE_BINARY_DECL(name, uint16, bfloat16, float32) \
337
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, float16, float32) \
338
+ CPU_DEVICE_BINARY_DECL(name, uint16, float32, float32) \
339
+ CPU_DEVICE_BINARY_DECL(name, uint16, float64, float64) \
340
+ CPU_DEVICE_NOKERN_DECL(name, uint16, complex32, complex64) \
341
+ CPU_DEVICE_NOKERN_DECL(name, uint16, complex64, complex64) \
342
+ CPU_DEVICE_NOKERN_DECL(name, uint16, complex128, complex128) \
343
+ \
344
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint8, uint32) \
345
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint16, uint32) \
346
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint32, uint32) \
347
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint64, uint64) \
348
+ CPU_DEVICE_BINARY_DECL(name, uint32, int8, int64) \
349
+ CPU_DEVICE_BINARY_DECL(name, uint32, int16, int64) \
350
+ CPU_DEVICE_BINARY_DECL(name, uint32, int32, int64) \
351
+ CPU_DEVICE_BINARY_DECL(name, uint32, int64, int64) \
352
+ CPU_DEVICE_BINARY_DECL(name, uint32, bfloat16, float64) \
353
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, float16, float64) \
354
+ CPU_DEVICE_BINARY_DECL(name, uint32, float32, float64) \
355
+ CPU_DEVICE_BINARY_DECL(name, uint32, float64, float64) \
356
+ CPU_DEVICE_NOKERN_DECL(name, uint32, complex32, complex128) \
357
+ CPU_DEVICE_NOKERN_DECL(name, uint32, complex64, complex128) \
358
+ CPU_DEVICE_NOKERN_DECL(name, uint32, complex128, complex128) \
359
+ \
360
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint8, uint64) \
361
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint16, uint64) \
362
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint32, uint64) \
363
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint64, uint64) \
364
+ \
365
+ CPU_DEVICE_BINARY_DECL(name, int8, uint8, int16) \
366
+ CPU_DEVICE_BINARY_DECL(name, int8, uint16, int32) \
367
+ CPU_DEVICE_BINARY_DECL(name, int8, uint32, int64) \
368
+ CPU_DEVICE_BINARY_DECL(name, int8, int8, int8) \
369
+ CPU_DEVICE_BINARY_DECL(name, int8, int16, int16) \
370
+ CPU_DEVICE_BINARY_DECL(name, int8, int32, int32) \
371
+ CPU_DEVICE_BINARY_DECL(name, int8, int64, int64) \
372
+ CPU_DEVICE_BINARY_DECL(name, int8, bfloat16, bfloat16) \
373
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, float16, float16) \
374
+ CPU_DEVICE_BINARY_DECL(name, int8, float32, float32) \
375
+ CPU_DEVICE_BINARY_DECL(name, int8, float64, float64) \
376
+ CPU_DEVICE_NOKERN_DECL(name, int8, complex32, complex32) \
377
+ CPU_DEVICE_NOKERN_DECL(name, int8, complex64, complex64) \
378
+ CPU_DEVICE_NOKERN_DECL(name, int8, complex128, complex128) \
379
+ \
380
+ CPU_DEVICE_BINARY_DECL(name, int16, uint8, int16) \
381
+ CPU_DEVICE_BINARY_DECL(name, int16, uint16, int32) \
382
+ CPU_DEVICE_BINARY_DECL(name, int16, uint32, int64) \
383
+ CPU_DEVICE_BINARY_DECL(name, int16, int8, int16) \
384
+ CPU_DEVICE_BINARY_DECL(name, int16, int16, int16) \
385
+ CPU_DEVICE_BINARY_DECL(name, int16, int32, int32) \
386
+ CPU_DEVICE_BINARY_DECL(name, int16, int64, int64) \
387
+ CPU_DEVICE_BINARY_DECL(name, int16, bfloat16, float32) \
388
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, float16, float32) \
389
+ CPU_DEVICE_BINARY_DECL(name, int16, float32, float32) \
390
+ CPU_DEVICE_BINARY_DECL(name, int16, float64, float64) \
391
+ CPU_DEVICE_NOKERN_DECL(name, int16, complex32, complex64) \
392
+ CPU_DEVICE_NOKERN_DECL(name, int16, complex64, complex64) \
393
+ CPU_DEVICE_NOKERN_DECL(name, int16, complex128, complex128) \
394
+ \
395
+ CPU_DEVICE_BINARY_DECL(name, int32, uint8, int32) \
396
+ CPU_DEVICE_BINARY_DECL(name, int32, uint16, int32) \
397
+ CPU_DEVICE_BINARY_DECL(name, int32, uint32, int64) \
398
+ CPU_DEVICE_BINARY_DECL(name, int32, int8, int32) \
399
+ CPU_DEVICE_BINARY_DECL(name, int32, int16, int32) \
400
+ CPU_DEVICE_BINARY_DECL(name, int32, int32, int32) \
401
+ CPU_DEVICE_BINARY_DECL(name, int32, int64, int64) \
402
+ CPU_DEVICE_BINARY_DECL(name, int32, bfloat16, float64) \
403
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, float16, float64) \
404
+ CPU_DEVICE_BINARY_DECL(name, int32, float32, float64) \
405
+ CPU_DEVICE_BINARY_DECL(name, int32, float64, float64) \
406
+ CPU_DEVICE_NOKERN_DECL(name, int32, complex32, complex128) \
407
+ CPU_DEVICE_NOKERN_DECL(name, int32, complex64, complex128) \
408
+ CPU_DEVICE_NOKERN_DECL(name, int32, complex128, complex128) \
409
+ \
410
+ CPU_DEVICE_BINARY_DECL(name, int64, uint8, int64) \
411
+ CPU_DEVICE_BINARY_DECL(name, int64, uint16, int64) \
412
+ CPU_DEVICE_BINARY_DECL(name, int64, uint32, int64) \
413
+ CPU_DEVICE_BINARY_DECL(name, int64, int8, int64) \
414
+ CPU_DEVICE_BINARY_DECL(name, int64, int16, int64) \
415
+ CPU_DEVICE_BINARY_DECL(name, int64, int32, int64) \
416
+ CPU_DEVICE_BINARY_DECL(name, int64, int64, int64) \
417
+ \
418
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint8, bfloat16) \
419
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint16, float32) \
420
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint32, float64) \
421
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int8, bfloat16) \
422
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int16, float32) \
423
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int32, float64) \
424
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, bfloat16, bfloat16) \
425
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, bfloat16, float16, float32) \
426
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float32, float32) \
427
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float64, float64) \
428
+ CPU_DEVICE_NOKERN_DECL(name, bfloat16, complex32, complex64) \
429
+ CPU_DEVICE_NOKERN_DECL(name, bfloat16, complex64, complex64) \
430
+ CPU_DEVICE_NOKERN_DECL(name, bfloat16, complex128, complex128) \
431
+ \
432
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint8, float16) \
433
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint16, float32) \
434
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint32, float64) \
435
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int8, float16) \
436
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int16, float32) \
437
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int32, float64) \
438
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, bfloat16, float32) \
439
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float16, float16) \
440
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float32, float32) \
441
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float64, float64) \
442
+ CPU_DEVICE_NOKERN_DECL(name, float16, complex32, complex32) \
443
+ CPU_DEVICE_NOKERN_DECL(name, float16, complex64, complex64) \
444
+ CPU_DEVICE_NOKERN_DECL(name, float16, complex128, complex128) \
445
+ \
446
+ CPU_DEVICE_BINARY_DECL(name, float32, uint8, float32) \
447
+ CPU_DEVICE_BINARY_DECL(name, float32, uint16, float32) \
448
+ CPU_DEVICE_BINARY_DECL(name, float32, uint32, float64) \
449
+ CPU_DEVICE_BINARY_DECL(name, float32, int8, float32) \
450
+ CPU_DEVICE_BINARY_DECL(name, float32, int16, float32) \
451
+ CPU_DEVICE_BINARY_DECL(name, float32, int32, float64) \
452
+ CPU_DEVICE_BINARY_DECL(name, float32, bfloat16, float32) \
453
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, float16, float32) \
454
+ CPU_DEVICE_BINARY_DECL(name, float32, float32, float32) \
455
+ CPU_DEVICE_BINARY_DECL(name, float32, float64, float64) \
456
+ CPU_DEVICE_NOKERN_DECL(name, float32, complex32, complex64) \
457
+ CPU_DEVICE_NOKERN_DECL(name, float32, complex64, complex64) \
458
+ CPU_DEVICE_NOKERN_DECL(name, float32, complex128, complex128) \
459
+ \
460
+ CPU_DEVICE_BINARY_DECL(name, float64, uint8, float64) \
461
+ CPU_DEVICE_BINARY_DECL(name, float64, uint16, float64) \
462
+ CPU_DEVICE_BINARY_DECL(name, float64, uint32, float64) \
463
+ CPU_DEVICE_BINARY_DECL(name, float64, int8, float64) \
464
+ CPU_DEVICE_BINARY_DECL(name, float64, int16, float64) \
465
+ CPU_DEVICE_BINARY_DECL(name, float64, int32, float64) \
466
+ CPU_DEVICE_BINARY_DECL(name, float64, bfloat16, float64) \
467
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, float16, float64) \
468
+ CPU_DEVICE_BINARY_DECL(name, float64, float32, float64) \
469
+ CPU_DEVICE_BINARY_DECL(name, float64, float64, float64) \
470
+ CPU_DEVICE_NOKERN_DECL(name, float64, complex32, complex128) \
471
+ CPU_DEVICE_NOKERN_DECL(name, float64, complex64, complex128) \
472
+ CPU_DEVICE_NOKERN_DECL(name, float64, complex128, complex128) \
473
+ \
474
+ CPU_DEVICE_NOKERN_DECL(name, complex32, uint8, complex32) \
475
+ CPU_DEVICE_NOKERN_DECL(name, complex32, uint16, complex64) \
476
+ CPU_DEVICE_NOKERN_DECL(name, complex32, uint32, complex128) \
477
+ CPU_DEVICE_NOKERN_DECL(name, complex32, int8, complex32) \
478
+ CPU_DEVICE_NOKERN_DECL(name, complex32, int16, complex64) \
479
+ CPU_DEVICE_NOKERN_DECL(name, complex32, int32, complex128) \
480
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, bfloat16, complex64) \
481
+ CPU_DEVICE_NOKERN_DECL(name, complex32, float16, complex32) \
482
+ CPU_DEVICE_NOKERN_DECL(name, complex32, float32, complex64) \
483
+ CPU_DEVICE_NOKERN_DECL(name, complex32, float64, complex128) \
484
+ CPU_DEVICE_NOKERN_DECL(name, complex32, complex32, complex32) \
485
+ CPU_DEVICE_NOKERN_DECL(name, complex32, complex64, complex64) \
486
+ CPU_DEVICE_NOKERN_DECL(name, complex32, complex128, complex128) \
487
+ \
488
+ CPU_DEVICE_NOKERN_DECL(name, complex64, uint8, complex64) \
489
+ CPU_DEVICE_NOKERN_DECL(name, complex64, uint16, complex64) \
490
+ CPU_DEVICE_NOKERN_DECL(name, complex64, uint32, complex128) \
491
+ CPU_DEVICE_NOKERN_DECL(name, complex64, int8, complex64) \
492
+ CPU_DEVICE_NOKERN_DECL(name, complex64, int16, complex64) \
493
+ CPU_DEVICE_NOKERN_DECL(name, complex64, int32, complex128) \
494
+ CPU_DEVICE_BINARY_DECL(name, complex64, bfloat16, complex64) \
495
+ CPU_DEVICE_NOKERN_DECL(name, complex64, float16, complex64) \
496
+ CPU_DEVICE_NOKERN_DECL(name, complex64, float32, complex64) \
497
+ CPU_DEVICE_NOKERN_DECL(name, complex64, float64, complex128) \
498
+ CPU_DEVICE_NOKERN_DECL(name, complex64, complex32, complex64) \
499
+ CPU_DEVICE_NOKERN_DECL(name, complex64, complex64, complex64) \
500
+ CPU_DEVICE_NOKERN_DECL(name, complex64, complex128, complex128) \
501
+ \
502
+ CPU_DEVICE_NOKERN_DECL(name, complex128, uint8, complex128) \
503
+ CPU_DEVICE_NOKERN_DECL(name, complex128, uint16, complex128) \
504
+ CPU_DEVICE_NOKERN_DECL(name, complex128, uint32, complex128) \
505
+ CPU_DEVICE_NOKERN_DECL(name, complex128, int8, complex128) \
506
+ CPU_DEVICE_NOKERN_DECL(name, complex128, int16, complex128) \
507
+ CPU_DEVICE_NOKERN_DECL(name, complex128, int32, complex128) \
508
+ CPU_DEVICE_BINARY_DECL(name, complex128, bfloat16, complex128) \
509
+ CPU_DEVICE_NOKERN_DECL(name, complex128, float16, complex128) \
510
+ CPU_DEVICE_NOKERN_DECL(name, complex128, float32, complex128) \
511
+ CPU_DEVICE_NOKERN_DECL(name, complex128, float64, complex128) \
512
+ CPU_DEVICE_NOKERN_DECL(name, complex128, complex32, complex128) \
513
+ CPU_DEVICE_NOKERN_DECL(name, complex128, complex64, complex128) \
514
+ CPU_DEVICE_NOKERN_DECL(name, complex128, complex128, complex128)
515
+
516
+ #define CPU_DEVICE_BINARY_ARITHMETIC_FLOAT_RETURN_DECL(name) \
517
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, uint8, float16) \
518
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint16, float32) \
519
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint32, float64) \
520
+ CPU_DEVICE_NOKERN_DECL(name, uint8, uint64, uint64) \
521
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, int8, float16) \
522
+ CPU_DEVICE_BINARY_DECL(name, uint8, int16, float32) \
523
+ CPU_DEVICE_BINARY_DECL(name, uint8, int32, float64) \
524
+ CPU_DEVICE_NOKERN_DECL(name, uint8, int64, int64) \
525
+ CPU_DEVICE_BINARY_DECL(name, uint8, bfloat16, bfloat16) \
526
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, float16, float16) \
527
+ CPU_DEVICE_BINARY_DECL(name, uint8, float32, float32) \
528
+ CPU_DEVICE_BINARY_DECL(name, uint8, float64, float64) \
529
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, complex32, complex32) \
530
+ CPU_DEVICE_BINARY_DECL(name, uint8, complex64, complex64) \
531
+ CPU_DEVICE_BINARY_DECL(name, uint8, complex128, complex128) \
532
+ \
533
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint8, float32) \
534
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint16, float32) \
535
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint32, float64) \
536
+ CPU_DEVICE_NOKERN_DECL(name, uint16, uint64, uint64) \
537
+ CPU_DEVICE_BINARY_DECL(name, uint16, int8, float32) \
538
+ CPU_DEVICE_BINARY_DECL(name, uint16, int16, float32) \
539
+ CPU_DEVICE_BINARY_DECL(name, uint16, int32, float64) \
540
+ CPU_DEVICE_NOKERN_DECL(name, uint16, int64, int64) \
541
+ CPU_DEVICE_BINARY_DECL(name, uint16, bfloat16, float32) \
542
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, float16, float32) \
543
+ CPU_DEVICE_BINARY_DECL(name, uint16, float32, float32) \
544
+ CPU_DEVICE_BINARY_DECL(name, uint16, float64, float64) \
545
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, complex32, complex64) \
546
+ CPU_DEVICE_BINARY_DECL(name, uint16, complex64, complex64) \
547
+ CPU_DEVICE_BINARY_DECL(name, uint16, complex128, complex128) \
548
+ \
549
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint8, float64) \
550
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint16, float64) \
551
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint32, float64) \
552
+ CPU_DEVICE_NOKERN_DECL(name, uint32, uint64, uint64) \
553
+ CPU_DEVICE_BINARY_DECL(name, uint32, int8, float64) \
554
+ CPU_DEVICE_BINARY_DECL(name, uint32, int16, float64) \
555
+ CPU_DEVICE_BINARY_DECL(name, uint32, int32, float64) \
556
+ CPU_DEVICE_NOKERN_DECL(name, uint32, int64, int64) \
557
+ CPU_DEVICE_BINARY_DECL(name, uint32, bfloat16, float64) \
558
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, float16, float64) \
559
+ CPU_DEVICE_BINARY_DECL(name, uint32, float32, float64) \
560
+ CPU_DEVICE_BINARY_DECL(name, uint32, float64, float64) \
561
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, complex32, complex128) \
562
+ CPU_DEVICE_BINARY_DECL(name, uint32, complex64, complex128) \
563
+ CPU_DEVICE_BINARY_DECL(name, uint32, complex128, complex128) \
564
+ \
565
+ CPU_DEVICE_NOKERN_DECL(name, uint64, uint8, uint64) \
566
+ CPU_DEVICE_NOKERN_DECL(name, uint64, uint16, uint64) \
567
+ CPU_DEVICE_NOKERN_DECL(name, uint64, uint32, uint64) \
568
+ CPU_DEVICE_NOKERN_DECL(name, uint64, uint64, uint64) \
569
+ \
570
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, uint8, float16) \
571
+ CPU_DEVICE_BINARY_DECL(name, int8, uint16, float32) \
572
+ CPU_DEVICE_BINARY_DECL(name, int8, uint32, float64) \
573
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, int8, float16) \
574
+ CPU_DEVICE_BINARY_DECL(name, int8, int16, float32) \
575
+ CPU_DEVICE_BINARY_DECL(name, int8, int32, float64) \
576
+ CPU_DEVICE_NOKERN_DECL(name, int8, int64, int64) \
577
+ CPU_DEVICE_BINARY_DECL(name, int8, bfloat16, bfloat16) \
578
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, float16, float16) \
579
+ CPU_DEVICE_BINARY_DECL(name, int8, float32, float32) \
580
+ CPU_DEVICE_BINARY_DECL(name, int8, float64, float64) \
581
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, complex32, complex32) \
582
+ CPU_DEVICE_BINARY_DECL(name, int8, complex64, complex64) \
583
+ CPU_DEVICE_BINARY_DECL(name, int8, complex128, complex128) \
584
+ \
585
+ CPU_DEVICE_BINARY_DECL(name, int16, uint8, float32) \
586
+ CPU_DEVICE_BINARY_DECL(name, int16, uint16, float32) \
587
+ CPU_DEVICE_BINARY_DECL(name, int16, uint32, float64) \
588
+ CPU_DEVICE_BINARY_DECL(name, int16, int8, float32) \
589
+ CPU_DEVICE_BINARY_DECL(name, int16, int16, float32) \
590
+ CPU_DEVICE_BINARY_DECL(name, int16, int32, float64) \
591
+ CPU_DEVICE_NOKERN_DECL(name, int16, int64, int64) \
592
+ CPU_DEVICE_BINARY_DECL(name, int16, bfloat16, float32) \
593
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, float16, float32) \
594
+ CPU_DEVICE_BINARY_DECL(name, int16, float32, float32) \
595
+ CPU_DEVICE_BINARY_DECL(name, int16, float64, float64) \
596
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, complex32, complex64) \
597
+ CPU_DEVICE_BINARY_DECL(name, int16, complex64, complex64) \
598
+ CPU_DEVICE_BINARY_DECL(name, int16, complex128, complex128) \
599
+ \
600
+ CPU_DEVICE_BINARY_DECL(name, int32, uint8, float64) \
601
+ CPU_DEVICE_BINARY_DECL(name, int32, uint16, float64) \
602
+ CPU_DEVICE_BINARY_DECL(name, int32, uint32, float64) \
603
+ CPU_DEVICE_BINARY_DECL(name, int32, int8, float64) \
604
+ CPU_DEVICE_BINARY_DECL(name, int32, int16, float64) \
605
+ CPU_DEVICE_BINARY_DECL(name, int32, int32, float64) \
606
+ CPU_DEVICE_NOKERN_DECL(name, int32, int64, int64) \
607
+ CPU_DEVICE_BINARY_DECL(name, int32, bfloat16, float64) \
608
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, float16, float64) \
609
+ CPU_DEVICE_BINARY_DECL(name, int32, float32, float64) \
610
+ CPU_DEVICE_BINARY_DECL(name, int32, float64, float64) \
611
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, complex32, complex128) \
612
+ CPU_DEVICE_BINARY_DECL(name, int32, complex64, complex128) \
613
+ CPU_DEVICE_BINARY_DECL(name, int32, complex128, complex128) \
614
+ \
615
+ CPU_DEVICE_NOKERN_DECL(name, int64, uint8, int64) \
616
+ CPU_DEVICE_NOKERN_DECL(name, int64, uint16, int64) \
617
+ CPU_DEVICE_NOKERN_DECL(name, int64, uint32, int64) \
618
+ CPU_DEVICE_NOKERN_DECL(name, int64, int8, int64) \
619
+ CPU_DEVICE_NOKERN_DECL(name, int64, int16, int64) \
620
+ CPU_DEVICE_NOKERN_DECL(name, int64, int32, int64) \
621
+ CPU_DEVICE_NOKERN_DECL(name, int64, int64, int64) \
622
+ \
623
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint8, bfloat16) \
624
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint16, float32) \
625
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint32, float64) \
626
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int8, bfloat16) \
627
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int16, float32) \
628
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int32, float64) \
629
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, bfloat16, bfloat16) \
630
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, bfloat16, float16, float32) \
631
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float32, float32) \
632
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float64, float64) \
633
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, bfloat16, complex32, complex64) \
634
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, complex64, complex64) \
635
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, complex128, complex128) \
636
+ \
637
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint8, float16) \
638
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint16, float32) \
639
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint32, float64) \
640
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int8, float16) \
641
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int16, float32) \
642
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int32, float64) \
643
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, bfloat16, float32) \
644
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float16, float16) \
645
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float32, float32) \
646
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float64, float64) \
647
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex32, complex32) \
648
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex64, complex64) \
649
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex128, complex128) \
650
+ \
651
+ CPU_DEVICE_BINARY_DECL(name, float32, uint8, float32) \
652
+ CPU_DEVICE_BINARY_DECL(name, float32, uint16, float32) \
653
+ CPU_DEVICE_BINARY_DECL(name, float32, uint32, float64) \
654
+ CPU_DEVICE_BINARY_DECL(name, float32, int8, float32) \
655
+ CPU_DEVICE_BINARY_DECL(name, float32, int16, float32) \
656
+ CPU_DEVICE_BINARY_DECL(name, float32, int32, float64) \
657
+ CPU_DEVICE_BINARY_DECL(name, float32, bfloat16, float32) \
658
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, float16, float32) \
659
+ CPU_DEVICE_BINARY_DECL(name, float32, float32, float32) \
660
+ CPU_DEVICE_BINARY_DECL(name, float32, float64, float64) \
661
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, complex32, complex64) \
662
+ CPU_DEVICE_BINARY_DECL(name, float32, complex64, complex64) \
663
+ CPU_DEVICE_BINARY_DECL(name, float32, complex128, complex128) \
664
+ \
665
+ CPU_DEVICE_BINARY_DECL(name, float64, uint8, float64) \
666
+ CPU_DEVICE_BINARY_DECL(name, float64, uint16, float64) \
667
+ CPU_DEVICE_BINARY_DECL(name, float64, uint32, float64) \
668
+ CPU_DEVICE_BINARY_DECL(name, float64, int8, float64) \
669
+ CPU_DEVICE_BINARY_DECL(name, float64, int16, float64) \
670
+ CPU_DEVICE_BINARY_DECL(name, float64, int32, float64) \
671
+ CPU_DEVICE_BINARY_DECL(name, float64, bfloat16, float64) \
672
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, float16, float64) \
673
+ CPU_DEVICE_BINARY_DECL(name, float64, float32, float64) \
674
+ CPU_DEVICE_BINARY_DECL(name, float64, float64, float64) \
675
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, complex32, complex128) \
676
+ CPU_DEVICE_BINARY_DECL(name, float64, complex64, complex128) \
677
+ CPU_DEVICE_BINARY_DECL(name, float64, complex128, complex128) \
678
+ \
679
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint8, complex32) \
680
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint16, complex64) \
681
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint32, complex128) \
682
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int8, complex32) \
683
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int16, complex64) \
684
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int32, complex128) \
685
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, bfloat16, complex64) \
686
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float16, complex32) \
687
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float32, complex64) \
688
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float64, complex128) \
689
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex32, complex32) \
690
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex64, complex64) \
691
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex128, complex128) \
692
+ \
693
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint8, complex64) \
694
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint16, complex64) \
695
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint32, complex128) \
696
+ CPU_DEVICE_BINARY_DECL(name, complex64, int8, complex64) \
697
+ CPU_DEVICE_BINARY_DECL(name, complex64, int16, complex64) \
698
+ CPU_DEVICE_BINARY_DECL(name, complex64, int32, complex128) \
699
+ CPU_DEVICE_BINARY_DECL(name, complex64, bfloat16, complex64) \
700
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex64, float16, complex64) \
701
+ CPU_DEVICE_BINARY_DECL(name, complex64, float32, complex64) \
702
+ CPU_DEVICE_BINARY_DECL(name, complex64, float64, complex128) \
703
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex64, complex32, complex64) \
704
+ CPU_DEVICE_BINARY_DECL(name, complex64, complex64, complex64) \
705
+ CPU_DEVICE_BINARY_DECL(name, complex64, complex128, complex128) \
706
+ \
707
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint8, complex128) \
708
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint16, complex128) \
709
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint32, complex128) \
710
+ CPU_DEVICE_BINARY_DECL(name, complex128, int8, complex128) \
711
+ CPU_DEVICE_BINARY_DECL(name, complex128, int16, complex128) \
712
+ CPU_DEVICE_BINARY_DECL(name, complex128, int32, complex128) \
713
+ CPU_DEVICE_BINARY_DECL(name, complex128, bfloat16, complex128) \
714
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex128, float16, complex128) \
715
+ CPU_DEVICE_BINARY_DECL(name, complex128, float32, complex128) \
716
+ CPU_DEVICE_BINARY_DECL(name, complex128, float64, complex128) \
717
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex128, complex32, complex128) \
718
+ CPU_DEVICE_BINARY_DECL(name, complex128, complex64, complex128) \
719
+ CPU_DEVICE_BINARY_DECL(name, complex128, complex128, complex128)
720
+
721
+
722
+ CPU_DEVICE_BINARY_ARITHMETIC_DECL(add)
723
+ CPU_DEVICE_BINARY_ARITHMETIC_DECL(subtract)
724
+ CPU_DEVICE_BINARY_ARITHMETIC_DECL(multiply)
725
+ CPU_DEVICE_BINARY_ARITHMETIC_NO_COMPLEX_DECL(floor_divide)
726
+ CPU_DEVICE_BINARY_ARITHMETIC_NO_COMPLEX_DECL(remainder)
727
+ CPU_DEVICE_BINARY_ARITHMETIC_FLOAT_RETURN_DECL(divide)
728
+ CPU_DEVICE_BINARY_ARITHMETIC_DECL(power)
729
+
730
+
731
+ /*****************************************************************************/
732
+ /* Comparison */
733
+ /*****************************************************************************/
734
+
735
+ #define CPU_DEVICE_ALL_COMPARISON_DECL(name) \
736
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint8, bool) \
737
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint16, bool) \
738
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint32, bool) \
739
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint64, bool) \
740
+ CPU_DEVICE_BINARY_DECL(name, uint8, int8, bool) \
741
+ CPU_DEVICE_BINARY_DECL(name, uint8, int16, bool) \
742
+ CPU_DEVICE_BINARY_DECL(name, uint8, int32, bool) \
743
+ CPU_DEVICE_BINARY_DECL(name, uint8, int64, bool) \
744
+ CPU_DEVICE_BINARY_DECL(name, uint8, bfloat16, bool) \
745
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, float16, bool) \
746
+ CPU_DEVICE_BINARY_DECL(name, uint8, float32, bool) \
747
+ CPU_DEVICE_BINARY_DECL(name, uint8, float64, bool) \
748
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint8, complex32, bool) \
749
+ CPU_DEVICE_BINARY_DECL(name, uint8, complex64, bool) \
750
+ CPU_DEVICE_BINARY_DECL(name, uint8, complex128, bool) \
751
+ \
752
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint8, bool) \
753
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint16, bool) \
754
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint32, bool) \
755
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint64, bool) \
756
+ CPU_DEVICE_BINARY_DECL(name, uint16, int8, bool) \
757
+ CPU_DEVICE_BINARY_DECL(name, uint16, int16, bool) \
758
+ CPU_DEVICE_BINARY_DECL(name, uint16, int32, bool) \
759
+ CPU_DEVICE_BINARY_DECL(name, uint16, int64, bool) \
760
+ CPU_DEVICE_BINARY_DECL(name, uint16, bfloat16, bool) \
761
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, float16, bool) \
762
+ CPU_DEVICE_BINARY_DECL(name, uint16, float32, bool) \
763
+ CPU_DEVICE_BINARY_DECL(name, uint16, float64, bool) \
764
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint16, complex32, bool) \
765
+ CPU_DEVICE_BINARY_DECL(name, uint16, complex64, bool) \
766
+ CPU_DEVICE_BINARY_DECL(name, uint16, complex128, bool) \
767
+ \
768
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint8, bool) \
769
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint16, bool) \
770
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint32, bool) \
771
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint64, bool) \
772
+ CPU_DEVICE_BINARY_DECL(name, uint32, int8, bool) \
773
+ CPU_DEVICE_BINARY_DECL(name, uint32, int16, bool) \
774
+ CPU_DEVICE_BINARY_DECL(name, uint32, int32, bool) \
775
+ CPU_DEVICE_BINARY_DECL(name, uint32, int64, bool) \
776
+ CPU_DEVICE_BINARY_DECL(name, uint32, bfloat16, bool) \
777
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, float16, bool) \
778
+ CPU_DEVICE_BINARY_DECL(name, uint32, float32, bool) \
779
+ CPU_DEVICE_BINARY_DECL(name, uint32, float64, bool) \
780
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, uint32, complex32, bool) \
781
+ CPU_DEVICE_BINARY_DECL(name, uint32, complex64, bool) \
782
+ CPU_DEVICE_BINARY_DECL(name, uint32, complex128, bool) \
783
+ \
784
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint8, bool) \
785
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint16, bool) \
786
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint32, bool) \
787
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint64, bool) \
788
+ \
789
+ CPU_DEVICE_BINARY_DECL(name, int8, uint8, bool) \
790
+ CPU_DEVICE_BINARY_DECL(name, int8, uint16, bool) \
791
+ CPU_DEVICE_BINARY_DECL(name, int8, uint32, bool) \
792
+ CPU_DEVICE_BINARY_DECL(name, int8, int8, bool) \
793
+ CPU_DEVICE_BINARY_DECL(name, int8, int16, bool) \
794
+ CPU_DEVICE_BINARY_DECL(name, int8, int32, bool) \
795
+ CPU_DEVICE_BINARY_DECL(name, int8, int64, bool) \
796
+ CPU_DEVICE_BINARY_DECL(name, int8, bfloat16, bool) \
797
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, float16, bool) \
798
+ CPU_DEVICE_BINARY_DECL(name, int8, float32, bool) \
799
+ CPU_DEVICE_BINARY_DECL(name, int8, float64, bool) \
800
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int8, complex32, bool) \
801
+ CPU_DEVICE_BINARY_DECL(name, int8, complex64, bool) \
802
+ CPU_DEVICE_BINARY_DECL(name, int8, complex128, bool) \
803
+ \
804
+ CPU_DEVICE_BINARY_DECL(name, int16, uint8, bool) \
805
+ CPU_DEVICE_BINARY_DECL(name, int16, uint16, bool) \
806
+ CPU_DEVICE_BINARY_DECL(name, int16, uint32, bool) \
807
+ CPU_DEVICE_BINARY_DECL(name, int16, int8, bool) \
808
+ CPU_DEVICE_BINARY_DECL(name, int16, int16, bool) \
809
+ CPU_DEVICE_BINARY_DECL(name, int16, int32, bool) \
810
+ CPU_DEVICE_BINARY_DECL(name, int16, int64, bool) \
811
+ CPU_DEVICE_BINARY_DECL(name, int16, bfloat16, bool) \
812
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, float16, bool) \
813
+ CPU_DEVICE_BINARY_DECL(name, int16, float32, bool) \
814
+ CPU_DEVICE_BINARY_DECL(name, int16, float64, bool) \
815
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int16, complex32, bool) \
816
+ CPU_DEVICE_BINARY_DECL(name, int16, complex64, bool) \
817
+ CPU_DEVICE_BINARY_DECL(name, int16, complex128, bool) \
818
+ \
819
+ CPU_DEVICE_BINARY_DECL(name, int32, uint8, bool) \
820
+ CPU_DEVICE_BINARY_DECL(name, int32, uint16, bool) \
821
+ CPU_DEVICE_BINARY_DECL(name, int32, uint32, bool) \
822
+ CPU_DEVICE_BINARY_DECL(name, int32, int8, bool) \
823
+ CPU_DEVICE_BINARY_DECL(name, int32, int16, bool) \
824
+ CPU_DEVICE_BINARY_DECL(name, int32, int32, bool) \
825
+ CPU_DEVICE_BINARY_DECL(name, int32, int64, bool) \
826
+ CPU_DEVICE_BINARY_DECL(name, int32, bfloat16, bool) \
827
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, float16, bool) \
828
+ CPU_DEVICE_BINARY_DECL(name, int32, float32, bool) \
829
+ CPU_DEVICE_BINARY_DECL(name, int32, float64, bool) \
830
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, int32, complex32, bool) \
831
+ CPU_DEVICE_BINARY_DECL(name, int32, complex64, bool) \
832
+ CPU_DEVICE_BINARY_DECL(name, int32, complex128, bool) \
833
+ \
834
+ CPU_DEVICE_BINARY_DECL(name, int64, uint8, bool) \
835
+ CPU_DEVICE_BINARY_DECL(name, int64, uint16, bool) \
836
+ CPU_DEVICE_BINARY_DECL(name, int64, uint32, bool) \
837
+ CPU_DEVICE_BINARY_DECL(name, int64, int8, bool) \
838
+ CPU_DEVICE_BINARY_DECL(name, int64, int16, bool) \
839
+ CPU_DEVICE_BINARY_DECL(name, int64, int32, bool) \
840
+ CPU_DEVICE_BINARY_DECL(name, int64, int64, bool) \
841
+ \
842
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint8, bool) \
843
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint16, bool) \
844
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, uint32, bool) \
845
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int8, bool) \
846
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int16, bool) \
847
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, int32, bool) \
848
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, bfloat16, bool) \
849
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float16, bool) \
850
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float32, bool) \
851
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, float64, bool) \
852
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, bfloat16, complex32, bool) \
853
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, complex64, bool) \
854
+ CPU_DEVICE_BINARY_DECL(name, bfloat16, complex128, bool) \
855
+ \
856
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint8, bool) \
857
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint16, bool) \
858
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, uint32, bool) \
859
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int8, bool) \
860
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int16, bool) \
861
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, int32, bool) \
862
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, bfloat16, bool) \
863
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float16, bool) \
864
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float32, bool) \
865
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, float64, bool) \
866
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex32, bool) \
867
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex64, bool) \
868
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float16, complex128, bool) \
869
+ \
870
+ CPU_DEVICE_BINARY_DECL(name, float32, uint8, bool) \
871
+ CPU_DEVICE_BINARY_DECL(name, float32, uint16, bool) \
872
+ CPU_DEVICE_BINARY_DECL(name, float32, uint32, bool) \
873
+ CPU_DEVICE_BINARY_DECL(name, float32, int8, bool) \
874
+ CPU_DEVICE_BINARY_DECL(name, float32, int16, bool) \
875
+ CPU_DEVICE_BINARY_DECL(name, float32, int32, bool) \
876
+ CPU_DEVICE_BINARY_DECL(name, float32, bfloat16, bool) \
877
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, float16, bool) \
878
+ CPU_DEVICE_BINARY_DECL(name, float32, float32, bool) \
879
+ CPU_DEVICE_BINARY_DECL(name, float32, float64, bool) \
880
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float32, complex32, bool) \
881
+ CPU_DEVICE_BINARY_DECL(name, float32, complex64, bool) \
882
+ CPU_DEVICE_BINARY_DECL(name, float32, complex128, bool) \
883
+ \
884
+ CPU_DEVICE_BINARY_DECL(name, float64, uint8, bool) \
885
+ CPU_DEVICE_BINARY_DECL(name, float64, uint16, bool) \
886
+ CPU_DEVICE_BINARY_DECL(name, float64, uint32, bool) \
887
+ CPU_DEVICE_BINARY_DECL(name, float64, int8, bool) \
888
+ CPU_DEVICE_BINARY_DECL(name, float64, int16, bool) \
889
+ CPU_DEVICE_BINARY_DECL(name, float64, int32, bool) \
890
+ CPU_DEVICE_BINARY_DECL(name, float64, bfloat16, bool) \
891
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, float16, bool) \
892
+ CPU_DEVICE_BINARY_DECL(name, float64, float32, bool) \
893
+ CPU_DEVICE_BINARY_DECL(name, float64, float64, bool) \
894
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, float64, complex32, bool) \
895
+ CPU_DEVICE_BINARY_DECL(name, float64, complex64, bool) \
896
+ CPU_DEVICE_BINARY_DECL(name, float64, complex128, bool) \
897
+ \
898
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint8, bool) \
899
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint16, bool) \
900
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, uint32, bool) \
901
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int8, bool) \
902
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int16, bool) \
903
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, int32, bool) \
904
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, bfloat16, bool) \
905
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float16, bool) \
906
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float32, bool) \
907
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, float64, bool) \
908
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex32, bool) \
909
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex64, bool) \
910
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex32, complex128, bool) \
911
+ \
912
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint8, bool) \
913
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint16, bool) \
914
+ CPU_DEVICE_BINARY_DECL(name, complex64, uint32, bool) \
915
+ CPU_DEVICE_BINARY_DECL(name, complex64, int8, bool) \
916
+ CPU_DEVICE_BINARY_DECL(name, complex64, int16, bool) \
917
+ CPU_DEVICE_BINARY_DECL(name, complex64, int32, bool) \
918
+ CPU_DEVICE_BINARY_DECL(name, complex64, bfloat16, bool) \
919
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex64, float16, bool) \
920
+ CPU_DEVICE_BINARY_DECL(name, complex64, float32, bool) \
921
+ CPU_DEVICE_BINARY_DECL(name, complex64, float64, bool) \
922
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex64, complex32, bool) \
923
+ CPU_DEVICE_BINARY_DECL(name, complex64, complex64, bool) \
924
+ CPU_DEVICE_BINARY_DECL(name, complex64, complex128, bool) \
925
+ \
926
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint8, bool) \
927
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint16, bool) \
928
+ CPU_DEVICE_BINARY_DECL(name, complex128, uint32, bool) \
929
+ CPU_DEVICE_BINARY_DECL(name, complex128, int8, bool) \
930
+ CPU_DEVICE_BINARY_DECL(name, complex128, int16, bool) \
931
+ CPU_DEVICE_BINARY_DECL(name, complex128, int32, bool) \
932
+ CPU_DEVICE_BINARY_DECL(name, complex128, bfloat16, bool) \
933
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex128, float16, bool) \
934
+ CPU_DEVICE_BINARY_DECL(name, complex128, float32, bool) \
935
+ CPU_DEVICE_BINARY_DECL(name, complex128, float64, bool) \
936
+ CPU_DEVICE_BINARY_NOIMPL_DECL(name, complex128, complex32, bool) \
937
+ CPU_DEVICE_BINARY_DECL(name, complex128, complex64, bool) \
938
+ CPU_DEVICE_BINARY_DECL(name, complex128, complex128, bool)
939
+
940
+
941
+ CPU_DEVICE_ALL_COMPARISON_DECL(less)
942
+ CPU_DEVICE_ALL_COMPARISON_DECL(less_equal)
943
+ CPU_DEVICE_ALL_COMPARISON_DECL(greater_equal)
944
+ CPU_DEVICE_ALL_COMPARISON_DECL(greater)
945
+ CPU_DEVICE_ALL_COMPARISON_DECL(equal)
946
+ CPU_DEVICE_ALL_COMPARISON_DECL(not_equal)
947
+ CPU_DEVICE_ALL_COMPARISON_DECL(equaln)
948
+
949
+
950
+ /*****************************************************************************/
951
+ /* Bitwise */
952
+ /*****************************************************************************/
953
+
954
+ #define CPU_DEVICE_ALL_BITWISE_DECL(name) \
955
+ CPU_DEVICE_BINARY_DECL(name, bool, bool, bool) \
956
+ CPU_DEVICE_BINARY_DECL(name, bool, uint8, uint8) \
957
+ CPU_DEVICE_BINARY_DECL(name, bool, uint16, uint16) \
958
+ CPU_DEVICE_BINARY_DECL(name, bool, uint32, uint32) \
959
+ CPU_DEVICE_BINARY_DECL(name, bool, uint64, uint64) \
960
+ CPU_DEVICE_BINARY_DECL(name, bool, int8, int8) \
961
+ CPU_DEVICE_BINARY_DECL(name, bool, int16, int16) \
962
+ CPU_DEVICE_BINARY_DECL(name, bool, int32, int32) \
963
+ CPU_DEVICE_BINARY_DECL(name, bool, int64, int64) \
964
+ \
965
+ CPU_DEVICE_BINARY_DECL(name, uint8, bool, uint8) \
966
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint8, uint8) \
967
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint16, uint16) \
968
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint32, uint32) \
969
+ CPU_DEVICE_BINARY_DECL(name, uint8, uint64, uint64) \
970
+ CPU_DEVICE_BINARY_DECL(name, uint8, int8, int16) \
971
+ CPU_DEVICE_BINARY_DECL(name, uint8, int16, int16) \
972
+ CPU_DEVICE_BINARY_DECL(name, uint8, int32, int32) \
973
+ CPU_DEVICE_BINARY_DECL(name, uint8, int64, int64) \
974
+ \
975
+ CPU_DEVICE_BINARY_DECL(name, uint16, bool, uint16) \
976
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint8, uint16) \
977
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint16, uint16) \
978
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint32, uint32) \
979
+ CPU_DEVICE_BINARY_DECL(name, uint16, uint64, uint64) \
980
+ CPU_DEVICE_BINARY_DECL(name, uint16, int8, int32) \
981
+ CPU_DEVICE_BINARY_DECL(name, uint16, int16, int32) \
982
+ CPU_DEVICE_BINARY_DECL(name, uint16, int32, int32) \
983
+ CPU_DEVICE_BINARY_DECL(name, uint16, int64, int64) \
984
+ \
985
+ CPU_DEVICE_BINARY_DECL(name, uint32, bool, uint32) \
986
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint8, uint32) \
987
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint16, uint32) \
988
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint32, uint32) \
989
+ CPU_DEVICE_BINARY_DECL(name, uint32, uint64, uint64) \
990
+ CPU_DEVICE_BINARY_DECL(name, uint32, int8, int64) \
991
+ CPU_DEVICE_BINARY_DECL(name, uint32, int16, int64) \
992
+ CPU_DEVICE_BINARY_DECL(name, uint32, int32, int64) \
993
+ CPU_DEVICE_BINARY_DECL(name, uint32, int64, int64) \
994
+ \
995
+ CPU_DEVICE_BINARY_DECL(name, uint64, bool, uint64) \
996
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint8, uint64) \
997
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint16, uint64) \
998
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint32, uint64) \
999
+ CPU_DEVICE_BINARY_DECL(name, uint64, uint64, uint64) \
1000
+ \
1001
+ CPU_DEVICE_BINARY_DECL(name, int8, bool, int8) \
1002
+ CPU_DEVICE_BINARY_DECL(name, int8, uint8, int16) \
1003
+ CPU_DEVICE_BINARY_DECL(name, int8, uint16, int32) \
1004
+ CPU_DEVICE_BINARY_DECL(name, int8, uint32, int64) \
1005
+ CPU_DEVICE_BINARY_DECL(name, int8, int8, int8) \
1006
+ CPU_DEVICE_BINARY_DECL(name, int8, int16, int16) \
1007
+ CPU_DEVICE_BINARY_DECL(name, int8, int32, int32) \
1008
+ CPU_DEVICE_BINARY_DECL(name, int8, int64, int64) \
1009
+ \
1010
+ CPU_DEVICE_BINARY_DECL(name, int16, bool, int16) \
1011
+ CPU_DEVICE_BINARY_DECL(name, int16, uint8, int16) \
1012
+ CPU_DEVICE_BINARY_DECL(name, int16, uint16, int32) \
1013
+ CPU_DEVICE_BINARY_DECL(name, int16, uint32, int64) \
1014
+ CPU_DEVICE_BINARY_DECL(name, int16, int8, int16) \
1015
+ CPU_DEVICE_BINARY_DECL(name, int16, int16, int16) \
1016
+ CPU_DEVICE_BINARY_DECL(name, int16, int32, int32) \
1017
+ CPU_DEVICE_BINARY_DECL(name, int16, int64, int64) \
1018
+ \
1019
+ CPU_DEVICE_BINARY_DECL(name, int32, bool, int32) \
1020
+ CPU_DEVICE_BINARY_DECL(name, int32, uint8, int32) \
1021
+ CPU_DEVICE_BINARY_DECL(name, int32, uint16, int32) \
1022
+ CPU_DEVICE_BINARY_DECL(name, int32, uint32, int64) \
1023
+ CPU_DEVICE_BINARY_DECL(name, int32, int8, int32) \
1024
+ CPU_DEVICE_BINARY_DECL(name, int32, int16, int32) \
1025
+ CPU_DEVICE_BINARY_DECL(name, int32, int32, int32) \
1026
+ CPU_DEVICE_BINARY_DECL(name, int32, int64, int64) \
1027
+ \
1028
+ CPU_DEVICE_BINARY_DECL(name, int64, bool, int64) \
1029
+ CPU_DEVICE_BINARY_DECL(name, int64, uint8, int64) \
1030
+ CPU_DEVICE_BINARY_DECL(name, int64, uint16, int64) \
1031
+ CPU_DEVICE_BINARY_DECL(name, int64, uint32, int64) \
1032
+ CPU_DEVICE_BINARY_DECL(name, int64, int8, int64) \
1033
+ CPU_DEVICE_BINARY_DECL(name, int64, int16, int64) \
1034
+ CPU_DEVICE_BINARY_DECL(name, int64, int32, int64) \
1035
+ CPU_DEVICE_BINARY_DECL(name, int64, int64, int64)
1036
+
1037
+ CPU_DEVICE_ALL_BITWISE_DECL(bitwise_and)
1038
+ CPU_DEVICE_ALL_BITWISE_DECL(bitwise_or)
1039
+ CPU_DEVICE_ALL_BITWISE_DECL(bitwise_xor)
1040
+
1041
+
1042
+ /*****************************************************************************/
1043
+ /* Two return values */
1044
+ /*****************************************************************************/
1045
+
1046
+ #define CPU_DEVICE_ALL_BINARY_MV_DECL(name) \
1047
+ CPU_DEVICE_BINARY_MV_DECL(name, uint8, uint8, uint8, uint8) \
1048
+ CPU_DEVICE_BINARY_MV_DECL(name, uint16, uint16, uint16, uint16) \
1049
+ CPU_DEVICE_BINARY_MV_DECL(name, uint32, uint32, uint32, uint32) \
1050
+ CPU_DEVICE_BINARY_MV_DECL(name, uint64, uint64, uint64, uint64) \
1051
+ CPU_DEVICE_BINARY_MV_DECL(name, int8, int8, int8, int8) \
1052
+ CPU_DEVICE_BINARY_MV_DECL(name, int16, int16, int16, int16) \
1053
+ CPU_DEVICE_BINARY_MV_DECL(name, int32, int32, int32, int32) \
1054
+ CPU_DEVICE_BINARY_MV_DECL(name, int64, int64, int64, int64) \
1055
+ CPU_DEVICE_BINARY_MV_DECL(name, bfloat16, bfloat16, bfloat16, bfloat16) \
1056
+ CPU_DEVICE_BINARY_MV_DECL(name, float32, float32, float32, float32) \
1057
+ CPU_DEVICE_BINARY_MV_DECL(name, float64, float64, float64, float64)
1058
+
1059
+ CPU_DEVICE_ALL_BINARY_MV_DECL(divmod)
1060
+
1061
+
1062
+ #endif /* CPU_DEVICE_BINARY_H */