gumath 0.2.0dev5 → 0.2.0dev8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +7 -2
  3. data/Gemfile +0 -3
  4. data/ext/ruby_gumath/GPATH +0 -0
  5. data/ext/ruby_gumath/GRTAGS +0 -0
  6. data/ext/ruby_gumath/GTAGS +0 -0
  7. data/ext/ruby_gumath/extconf.rb +0 -5
  8. data/ext/ruby_gumath/functions.c +10 -2
  9. data/ext/ruby_gumath/gufunc_object.c +15 -4
  10. data/ext/ruby_gumath/gufunc_object.h +9 -3
  11. data/ext/ruby_gumath/gumath/Makefile +63 -0
  12. data/ext/ruby_gumath/gumath/Makefile.in +1 -0
  13. data/ext/ruby_gumath/gumath/config.h +56 -0
  14. data/ext/ruby_gumath/gumath/config.h.in +3 -0
  15. data/ext/ruby_gumath/gumath/config.log +497 -0
  16. data/ext/ruby_gumath/gumath/config.status +1034 -0
  17. data/ext/ruby_gumath/gumath/configure +375 -4
  18. data/ext/ruby_gumath/gumath/configure.ac +47 -3
  19. data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
  20. data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
  21. data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
  22. data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
  23. data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
  24. data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
  25. data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
  26. data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
  27. data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
  28. data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
  29. data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
  30. data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
  31. data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
  32. data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
  33. data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
  34. data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
  35. data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
  36. data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
  37. data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
  38. data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
  39. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
  40. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
  41. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
  42. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
  43. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
  44. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
  45. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
  46. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
  47. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
  48. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
  49. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
  50. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
  51. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
  52. data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
  53. data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
  54. data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
  55. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
  56. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
  57. data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
  58. data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
  59. data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
  60. data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
  61. data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
  62. data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
  63. data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
  64. data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
  65. data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
  66. data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
  67. data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
  68. data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
  69. data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
  70. data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
  71. data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
  72. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
  73. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
  74. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
  75. data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
  76. data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
  77. data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
  78. data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
  79. data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
  80. data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
  81. data/ext/ruby_gumath/gumath/setup.py +67 -6
  82. data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
  83. data/ext/ruby_gumath/include/gumath.h +55 -14
  84. data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
  85. data/ext/ruby_gumath/lib/libgumath.a +0 -0
  86. data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
  87. data/ext/ruby_gumath/ruby_gumath.c +231 -70
  88. data/ext/ruby_gumath/ruby_gumath.h +4 -1
  89. data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
  90. data/ext/ruby_gumath/util.c +34 -0
  91. data/ext/ruby_gumath/util.h +9 -0
  92. data/gumath.gemspec +3 -2
  93. data/lib/gumath.rb +55 -1
  94. data/lib/gumath/version.rb +2 -2
  95. data/lib/ruby_gumath.so +0 -0
  96. metadata +63 -10
  97. data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
  98. data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
  99. data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449
@@ -0,0 +1,2952 @@
1
+ /*
2
+ * BSD 3-Clause License
3
+ *
4
+ * Copyright (c) 2017-2018, plures
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ *
10
+ * 1. Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ *
13
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ * this list of conditions and the following disclaimer in the documentation
15
+ * and/or other materials provided with the distribution.
16
+ *
17
+ * 3. Neither the name of the copyright holder nor the names of its
18
+ * contributors may be used to endorse or promote products derived from
19
+ * this software without specific prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+
34
+ #include <stdlib.h>
35
+ #include <stdint.h>
36
+ #include <string.h>
37
+ #include "ndtypes.h"
38
+ #include "xnd.h"
39
+ #include "gumath.h"
40
+ #include "common.h"
41
+ #include "cpu_device_binary.h"
42
+
43
+
44
+ /****************************************************************************/
45
+ /* Optimized dispatch (exact casting) */
46
+ /****************************************************************************/
47
+
48
+ /* Structured kernel locations for fast lookup. */
49
+ static int
50
+ binary_kernel_location(const ndt_t *in0, const ndt_t *in1, ndt_context_t *ctx)
51
+ {
52
+ const ndt_t *t0 = ndt_dtype(in0);
53
+ const ndt_t *t1 = ndt_dtype(in1);
54
+
55
+ switch (t0->tag) {
56
+ case Uint8: {
57
+ switch (t1->tag) {
58
+ case Uint8: return 0;
59
+ case Uint16: return 12;
60
+ case Uint32: return 24;
61
+ case Uint64: return 36;
62
+
63
+ case Int8: return 48;
64
+ case Int16: return 60;
65
+ case Int32: return 72;
66
+ case Int64: return 84;
67
+
68
+ case BFloat16: return 96;
69
+ case Float16: return 108;
70
+ case Float32: return 120;
71
+ case Float64: return 132;
72
+
73
+ case Complex32: return 144;
74
+ case Complex64: return 156;
75
+ case Complex128: return 168;
76
+
77
+ default: goto invalid_combination;
78
+ }
79
+ }
80
+
81
+ case Uint16: {
82
+ switch (t1->tag) {
83
+ case Uint8: return 180;
84
+ case Uint16: return 192;
85
+ case Uint32: return 204;
86
+ case Uint64: return 216;
87
+
88
+ case Int8: return 228;
89
+ case Int16: return 240;
90
+ case Int32: return 252;
91
+ case Int64: return 264;
92
+
93
+ case BFloat16: return 276;
94
+ case Float16: return 288;
95
+ case Float32: return 300;
96
+ case Float64: return 312;
97
+
98
+ case Complex32: return 324;
99
+ case Complex64: return 336;
100
+ case Complex128: return 348;
101
+
102
+ default: goto invalid_combination;
103
+ }
104
+ }
105
+
106
+ case Uint32: {
107
+ switch (t1->tag) {
108
+ case Uint8: return 360;
109
+ case Uint16: return 372;
110
+ case Uint32: return 384;
111
+ case Uint64: return 396;
112
+
113
+ case Int8: return 408;
114
+ case Int16: return 420;
115
+ case Int32: return 432;
116
+ case Int64: return 444;
117
+
118
+ case BFloat16: return 456;
119
+ case Float16: return 468;
120
+ case Float32: return 480;
121
+ case Float64: return 492;
122
+
123
+ case Complex32: return 504;
124
+ case Complex64: return 516;
125
+ case Complex128: return 528;
126
+
127
+ default: goto invalid_combination;
128
+ }
129
+ }
130
+
131
+ case Uint64: {
132
+ switch (t1->tag) {
133
+ case Uint8: return 540;
134
+ case Uint16: return 552;
135
+ case Uint32: return 564;
136
+ case Uint64: return 576;
137
+
138
+ default: goto invalid_combination;
139
+ }
140
+ }
141
+
142
+ case Int8: {
143
+ switch (t1->tag) {
144
+ case Uint8: return 588;
145
+ case Uint16: return 600;
146
+ case Uint32: return 612;
147
+
148
+ case Int8: return 624;
149
+ case Int16: return 636;
150
+ case Int32: return 648;
151
+ case Int64: return 660;
152
+
153
+ case BFloat16: return 672;
154
+ case Float16: return 684;
155
+ case Float32: return 696;
156
+ case Float64: return 708;
157
+
158
+ case Complex32: return 720;
159
+ case Complex64: return 732;
160
+ case Complex128: return 744;
161
+
162
+ default: goto invalid_combination;
163
+ }
164
+ }
165
+
166
+ case Int16: {
167
+ switch (t1->tag) {
168
+ case Uint8: return 756;
169
+ case Uint16: return 768;
170
+ case Uint32: return 780;
171
+
172
+ case Int8: return 792;
173
+ case Int16: return 804;
174
+ case Int32: return 816;
175
+ case Int64: return 828;
176
+
177
+ case BFloat16: return 840;
178
+ case Float16: return 852;
179
+ case Float32: return 864;
180
+ case Float64: return 876;
181
+
182
+ case Complex32: return 888;
183
+ case Complex64: return 900;
184
+ case Complex128: return 912;
185
+
186
+ default: goto invalid_combination;
187
+ }
188
+ }
189
+
190
+ case Int32: {
191
+ switch (t1->tag) {
192
+ case Uint8: return 924;
193
+ case Uint16: return 936;
194
+ case Uint32: return 948;
195
+
196
+ case Int8: return 960;
197
+ case Int16: return 972;
198
+ case Int32: return 984;
199
+ case Int64: return 996;
200
+
201
+ case BFloat16: return 1008;
202
+ case Float16: return 1020;
203
+ case Float32: return 1032;
204
+ case Float64: return 1044;
205
+
206
+ case Complex32: return 1056;
207
+ case Complex64: return 1068;
208
+ case Complex128: return 1080;
209
+
210
+ default: goto invalid_combination;
211
+ }
212
+ }
213
+
214
+ case Int64: {
215
+ switch (t1->tag) {
216
+ case Uint8: return 1092;
217
+ case Uint16: return 1104;
218
+ case Uint32: return 1116;
219
+
220
+ case Int8: return 1128;
221
+ case Int16: return 1140;
222
+ case Int32: return 1152;
223
+ case Int64: return 1164;
224
+
225
+ default: goto invalid_combination;
226
+ }
227
+ }
228
+
229
+ case BFloat16: {
230
+ switch (t1->tag) {
231
+ case Uint8: return 1176;
232
+ case Uint16: return 1188;
233
+ case Uint32: return 1200;
234
+
235
+ case Int8: return 1212;
236
+ case Int16: return 1224;
237
+ case Int32: return 1236;
238
+
239
+ case BFloat16: return 1248;
240
+ case Float16: return 1260;
241
+ case Float32: return 1272;
242
+ case Float64: return 1284;
243
+
244
+ case Complex32: return 1296;
245
+ case Complex64: return 1308;
246
+ case Complex128: return 1320;
247
+
248
+ default: goto invalid_combination;
249
+ }
250
+ }
251
+
252
+ case Float16: {
253
+ switch (t1->tag) {
254
+ case Uint8: return 1332;
255
+ case Uint16: return 1344;
256
+ case Uint32: return 1356;
257
+
258
+ case Int8: return 1368;
259
+ case Int16: return 1380;
260
+ case Int32: return 1392;
261
+
262
+ case BFloat16: return 1404;
263
+ case Float16: return 1416;
264
+ case Float32: return 1428;
265
+ case Float64: return 1440;
266
+
267
+ case Complex32: return 1452;
268
+ case Complex64: return 1464;
269
+ case Complex128: return 1476;
270
+
271
+ default: goto invalid_combination;
272
+ }
273
+ }
274
+
275
+ case Float32: {
276
+ switch (t1->tag) {
277
+ case Uint8: return 1488;
278
+ case Uint16: return 1500;
279
+ case Uint32: return 1512;
280
+
281
+ case Int8: return 1524;
282
+ case Int16: return 1536;
283
+ case Int32: return 1548;
284
+
285
+ case BFloat16: return 1560;
286
+ case Float16: return 1572;
287
+ case Float32: return 1584;
288
+ case Float64: return 1596;
289
+
290
+ case Complex32: return 1608;
291
+ case Complex64: return 1620;
292
+ case Complex128: return 1632;
293
+
294
+ default: goto invalid_combination;
295
+ }
296
+ }
297
+
298
+ case Float64: {
299
+ switch (t1->tag) {
300
+ case Uint8: return 1644;
301
+ case Uint16: return 1656;
302
+ case Uint32: return 1668;
303
+
304
+ case Int8: return 1680;
305
+ case Int16: return 1692;
306
+ case Int32: return 1704;
307
+
308
+ case BFloat16: return 1716;
309
+ case Float16: return 1728;
310
+ case Float32: return 1740;
311
+ case Float64: return 1752;
312
+
313
+ case Complex32: return 1764;
314
+ case Complex64: return 1776;
315
+ case Complex128: return 1788;
316
+
317
+ default: goto invalid_combination;
318
+ }
319
+ }
320
+
321
+ case Complex32: {
322
+ switch (t1->tag) {
323
+ case Uint8: return 1800;
324
+ case Uint16: return 1812;
325
+ case Uint32: return 1824;
326
+
327
+ case Int8: return 1836;
328
+ case Int16: return 1848;
329
+ case Int32: return 1860;
330
+
331
+ case BFloat16: return 1872;
332
+ case Float16: return 1884;
333
+ case Float32: return 1896;
334
+ case Float64: return 1908;
335
+
336
+ case Complex32: return 1920;
337
+ case Complex64: return 1932;
338
+ case Complex128: return 1944;
339
+
340
+ default: goto invalid_combination;
341
+ }
342
+ }
343
+
344
+ case Complex64: {
345
+ switch (t1->tag) {
346
+ case Uint8: return 1956;
347
+ case Uint16: return 1968;
348
+ case Uint32: return 1980;
349
+
350
+ case Int8: return 1992;
351
+ case Int16: return 2004;
352
+ case Int32: return 2016;
353
+
354
+ case BFloat16: return 2028;
355
+ case Float16: return 2040;
356
+ case Float32: return 2052;
357
+ case Float64: return 2064;
358
+
359
+ case Complex32: return 2076;
360
+ case Complex64: return 2088;
361
+ case Complex128: return 2100;
362
+
363
+ default: goto invalid_combination;
364
+ }
365
+ }
366
+
367
+ case Complex128: {
368
+ switch (t1->tag) {
369
+ case Uint8: return 2112;
370
+ case Uint16: return 2124;
371
+ case Uint32: return 2136;
372
+
373
+ case Int8: return 2148;
374
+ case Int16: return 2160;
375
+ case Int32: return 2172;
376
+
377
+ case BFloat16: return 2184;
378
+ case Float16: return 2196;
379
+ case Float32: return 2208;
380
+ case Float64: return 2220;
381
+
382
+ case Complex32: return 2232;
383
+ case Complex64: return 2244;
384
+ case Complex128: return 2256;
385
+
386
+ default: goto invalid_combination;
387
+ }
388
+ }
389
+
390
+ default:
391
+ goto invalid_combination;
392
+ }
393
+
394
+ invalid_combination:
395
+ ndt_err_format(ctx, NDT_ValueError, "invalid dtype");
396
+ return -1;
397
+ }
398
+
399
+ static int
400
+ bitwise_kernel_location(const ndt_t *in0, const ndt_t *in1, ndt_context_t *ctx)
401
+ {
402
+ const ndt_t *t0 = ndt_dtype(in0);
403
+ const ndt_t *t1 = ndt_dtype(in1);
404
+
405
+ switch (t0->tag) {
406
+ case Bool: {
407
+ switch (t1->tag) {
408
+ case Bool: return 0;
409
+
410
+ case Uint8: return 12;
411
+ case Uint16: return 24;
412
+ case Uint32: return 36;
413
+ case Uint64: return 48;
414
+
415
+ case Int8: return 60;
416
+ case Int16: return 72;
417
+ case Int32: return 84;
418
+ case Int64: return 96;
419
+
420
+ default: goto invalid_combination;
421
+ }
422
+ }
423
+
424
+ case Uint8: {
425
+ switch (t1->tag) {
426
+ case Bool: return 108;
427
+
428
+ case Uint8: return 120;
429
+ case Uint16: return 132;
430
+ case Uint32: return 144;
431
+ case Uint64: return 156;
432
+
433
+ case Int8: return 168;
434
+ case Int16: return 180;
435
+ case Int32: return 192;
436
+ case Int64: return 204;
437
+
438
+ default: goto invalid_combination;
439
+ }
440
+ }
441
+ case Uint16: {
442
+ switch (t1->tag) {
443
+ case Bool: return 216;
444
+
445
+ case Int8: return 228;
446
+ case Int16: return 240;
447
+ case Int32: return 252;
448
+ case Int64: return 264;
449
+
450
+ case Uint8: return 276;
451
+ case Uint16: return 288;
452
+ case Uint32: return 300;
453
+ case Uint64: return 312;
454
+
455
+ default: goto invalid_combination;
456
+ }
457
+ }
458
+ case Uint32: {
459
+ switch (t1->tag) {
460
+ case Bool: return 324;
461
+
462
+ case Uint8: return 336;
463
+ case Uint16: return 348;
464
+ case Uint32: return 360;
465
+ case Uint64: return 372;
466
+
467
+ case Int8: return 384;
468
+ case Int16: return 396;
469
+ case Int32: return 408;
470
+ case Int64: return 420;
471
+
472
+ default: goto invalid_combination;
473
+ }
474
+ }
475
+ case Uint64: {
476
+ switch (t1->tag) {
477
+ case Bool: return 432;
478
+
479
+ case Uint8: return 444;
480
+ case Uint16: return 456;
481
+ case Uint32: return 468;
482
+ case Uint64: return 480;
483
+
484
+ default: goto invalid_combination;
485
+ }
486
+ }
487
+
488
+ case Int8: {
489
+ switch (t1->tag) {
490
+ case Bool: return 492;
491
+
492
+ case Uint8: return 504;
493
+ case Uint16: return 516;
494
+ case Uint32: return 528;
495
+
496
+ case Int8: return 540;
497
+ case Int16: return 552;
498
+ case Int32: return 564;
499
+ case Int64: return 576;
500
+
501
+ default: goto invalid_combination;
502
+ }
503
+ }
504
+ case Int16: {
505
+ switch (t1->tag) {
506
+ case Bool: return 588;
507
+
508
+ case Uint8: return 600;
509
+ case Uint16: return 612;
510
+ case Uint32: return 624;
511
+
512
+ case Int8: return 636;
513
+ case Int16: return 648;
514
+ case Int32: return 660;
515
+ case Int64: return 672;
516
+
517
+ default: goto invalid_combination;
518
+ }
519
+ }
520
+ case Int32: {
521
+ switch (t1->tag) {
522
+ case Bool: return 684;
523
+
524
+ case Uint8: return 696;
525
+ case Uint16: return 708;
526
+ case Uint32: return 720;
527
+
528
+ case Int8: return 732;
529
+ case Int16: return 744;
530
+ case Int32: return 756;
531
+ case Int64: return 768;
532
+
533
+ default: goto invalid_combination;
534
+ }
535
+ }
536
+
537
+ case Int64: {
538
+ switch (t1->tag) {
539
+ case Bool: return 780;
540
+
541
+ case Uint8: return 792;
542
+ case Uint16: return 804;
543
+ case Uint32: return 816;
544
+
545
+ case Int8: return 828;
546
+ case Int16: return 840;
547
+ case Int32: return 852;
548
+ case Int64: return 864;
549
+
550
+ default: goto invalid_combination;
551
+ }
552
+ }
553
+
554
+ default:
555
+ goto invalid_combination;
556
+ }
557
+
558
+ invalid_combination:
559
+ ndt_err_format(ctx, NDT_ValueError, "invalid dtype");
560
+ return -1;
561
+ }
562
+
563
+
564
+ #define CPU_CHECK_POWER_EXP(t1) \
565
+ static inline int \
566
+ check_power_exp_##t1(const char *a1, ndt_context_t *ctx) \
567
+ { \
568
+ const t1##_t exp = *(const t1##_t *)a1; \
569
+ if (exp < 0) { \
570
+ ndt_err_format(ctx, NDT_ValueError, \
571
+ "negative exponents are not allowed for integer powers"); \
572
+ return -1; \
573
+ } \
574
+ \
575
+ return 0; \
576
+ }
577
+
578
+ #define CPU_CHECK_POWER_EXP_SUCCESS(t1) \
579
+ static inline int \
580
+ check_power_exp_##t1(const char *a1, ndt_context_t *ctx) \
581
+ { \
582
+ (void)a1; \
583
+ (void)ctx; \
584
+ \
585
+ return 0; \
586
+ }
587
+
588
+
589
+ CPU_CHECK_POWER_EXP(int8)
590
+ CPU_CHECK_POWER_EXP(int16)
591
+ CPU_CHECK_POWER_EXP(int32)
592
+ CPU_CHECK_POWER_EXP(int64)
593
+
594
+ CPU_CHECK_POWER_EXP_SUCCESS(bool)
595
+
596
+ CPU_CHECK_POWER_EXP_SUCCESS(uint8)
597
+ CPU_CHECK_POWER_EXP_SUCCESS(uint16)
598
+ CPU_CHECK_POWER_EXP_SUCCESS(uint32)
599
+ CPU_CHECK_POWER_EXP_SUCCESS(uint64)
600
+
601
+ CPU_CHECK_POWER_EXP_SUCCESS(bfloat16)
602
+ CPU_CHECK_POWER_EXP_SUCCESS(float32)
603
+ CPU_CHECK_POWER_EXP_SUCCESS(float64)
604
+
605
+ CPU_CHECK_POWER_EXP_SUCCESS(complex64)
606
+ CPU_CHECK_POWER_EXP_SUCCESS(complex128)
607
+
608
+
609
+ #define CPU_HOST_BINARY(name, t0, t1, t2) \
610
+ static int \
611
+ gm_cpu_host_fixed_1D_C_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
612
+ { \
613
+ const char *a0 = apply_index(&stack[0]); \
614
+ const char *a1 = apply_index(&stack[1]); \
615
+ char *a2 = apply_index(&stack[2]); \
616
+ const int64_t N = xnd_fixed_shape(&stack[0]); \
617
+ (void)ctx; \
618
+ \
619
+ if (strcmp(STRINGIZE(name), "power") == 0) { \
620
+ if (check_power_exp_##t1(a1, ctx) < 0) { \
621
+ return -1; \
622
+ } \
623
+ } \
624
+ \
625
+ gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2(a0, a1, a2, N); \
626
+ \
627
+ if (ndt_is_optional(ndt_dtype(stack[2].type))) { \
628
+ binary_update_bitmap_1D_S(stack); \
629
+ } \
630
+ else if (strcmp(STRINGIZE(name), "equaln") == 0) { \
631
+ binary_update_bitmap_1D_S_bool(stack); \
632
+ } \
633
+ \
634
+ return 0; \
635
+ } \
636
+ \
637
+ static int \
638
+ gm_cpu_host_fixed_1D_S_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
639
+ { \
640
+ const char *a0 = apply_index(&stack[0]); \
641
+ const char *a1 = apply_index(&stack[1]); \
642
+ char *a2 = apply_index(&stack[2]); \
643
+ const int64_t N = xnd_fixed_shape(&stack[0]); \
644
+ const int64_t s0 = xnd_fixed_step(&stack[0]); \
645
+ const int64_t s1 = xnd_fixed_step(&stack[1]); \
646
+ const int64_t s2 = xnd_fixed_step(&stack[2]); \
647
+ (void)ctx; \
648
+ \
649
+ if (strcmp(STRINGIZE(name), "power") == 0) { \
650
+ if (check_power_exp_##t1(a1, ctx) < 0) { \
651
+ return -1; \
652
+ } \
653
+ } \
654
+ \
655
+ gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1##_##t2(a0, a1, a2, s0, s1, s2, N); \
656
+ \
657
+ if (ndt_is_optional(ndt_dtype(stack[2].type))) { \
658
+ binary_update_bitmap_1D_S(stack); \
659
+ } \
660
+ else if (strcmp(STRINGIZE(name), "equaln") == 0) { \
661
+ binary_update_bitmap_1D_S_bool(stack); \
662
+ } \
663
+ \
664
+ return 0; \
665
+ } \
666
+ \
667
+ static int \
668
+ gm_cpu_host_array_1D_C_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
669
+ { \
670
+ const char *a0 = XND_ARRAY_DATA(stack[0].ptr); \
671
+ const int64_t N = XND_ARRAY_SHAPE(stack[0].ptr); \
672
+ (void)ctx; \
673
+ \
674
+ if (array_shape_check(&stack[1], N, ctx) < 0) { \
675
+ return -1; \
676
+ } \
677
+ const char *a1 = XND_ARRAY_DATA(stack[1].ptr); \
678
+ \
679
+ if (array_shape_check(&stack[2], N, ctx) < 0) { \
680
+ return -1; \
681
+ } \
682
+ char *a2 = XND_ARRAY_DATA(stack[2].ptr); \
683
+ \
684
+ if (strcmp(STRINGIZE(name), "power") == 0) { \
685
+ if (check_power_exp_##t1(a1, ctx) < 0) { \
686
+ return -1; \
687
+ } \
688
+ } \
689
+ \
690
+ gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2(a0, a1, a2, N); \
691
+ \
692
+ if (ndt_is_optional(ndt_dtype(stack[2].type))) { \
693
+ binary_update_bitmap_1D_S(stack); \
694
+ } \
695
+ else if (strcmp(STRINGIZE(name), "equaln") == 0) { \
696
+ binary_update_bitmap_1D_S_bool(stack); \
697
+ } \
698
+ \
699
+ return 0; \
700
+ } \
701
+ \
702
+ static int \
703
+ gm_cpu_host_0D_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
704
+ { \
705
+ const char *a0 = stack[0].ptr; \
706
+ const char *a1 = stack[1].ptr; \
707
+ char *a2 = stack[2].ptr; \
708
+ (void)ctx; \
709
+ \
710
+ if (strcmp(STRINGIZE(name), "power") == 0) { \
711
+ if (check_power_exp_##t1(a1, ctx) < 0) { \
712
+ return -1; \
713
+ } \
714
+ } \
715
+ \
716
+ gm_cpu_device_0D_##name##_##t0##_##t1##_##t2(a0, a1, a2); \
717
+ \
718
+ if (ndt_is_optional(ndt_dtype(stack[2].type))) { \
719
+ binary_update_bitmap_0D(stack); \
720
+ } \
721
+ else if (strcmp(STRINGIZE(name), "equaln") == 0) { \
722
+ binary_update_bitmap_0D_bool(stack); \
723
+ } \
724
+ \
725
+ return 0; \
726
+ }
727
+
728
+
729
+ #define CPU_HOST_NOIMPL(name, t0, t1, t2) \
730
+ static int \
731
+ gm_cpu_host_fixed_1D_C_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
732
+ { \
733
+ (void)stack; \
734
+ \
735
+ ndt_err_format(ctx, NDT_NotImplementedError, \
736
+ "implementation for " STRINGIZE(name) " : " \
737
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2) \
738
+ " currently requires double rounding"); \
739
+ \
740
+ return -1; \
741
+ } \
742
+ \
743
+ static int \
744
+ gm_cpu_host_fixed_1D_S_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
745
+ { \
746
+ (void)stack; \
747
+ \
748
+ ndt_err_format(ctx, NDT_NotImplementedError, \
749
+ "implementation for " STRINGIZE(name) " : " \
750
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2) \
751
+ " currently requires double rounding"); \
752
+ \
753
+ return -1; \
754
+ } \
755
+ \
756
+ static int \
757
+ gm_cpu_host_array_1D_C_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
758
+ { \
759
+ (void)stack; \
760
+ \
761
+ ndt_err_format(ctx, NDT_NotImplementedError, \
762
+ "implementation for " STRINGIZE(name) " : " \
763
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2) \
764
+ " currently requires double rounding"); \
765
+ \
766
+ return -1; \
767
+ } \
768
+ \
769
+ static int \
770
+ gm_cpu_host_0D_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
771
+ { \
772
+ (void)stack; \
773
+ \
774
+ ndt_err_format(ctx, NDT_NotImplementedError, \
775
+ "implementation for " STRINGIZE(name) " : " \
776
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2) \
777
+ " currently requires double rounding"); \
778
+ \
779
+ return -1; \
780
+ }
781
+
782
+ #define CPU_HOST_NOKERN(name, t0, t1, t2) \
783
+ static int \
784
+ gm_cpu_host_fixed_1D_C_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
785
+ { \
786
+ (void)stack; \
787
+ \
788
+ ndt_err_format(ctx, NDT_TypeError, \
789
+ "no kernel for " STRINGIZE(name) " : " \
790
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2)); \
791
+ \
792
+ return -1; \
793
+ } \
794
+ \
795
+ static int \
796
+ gm_cpu_host_fixed_1D_S_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
797
+ { \
798
+ (void)stack; \
799
+ \
800
+ ndt_err_format(ctx, NDT_TypeError, \
801
+ "no kernel for " STRINGIZE(name) " : " \
802
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2)); \
803
+ \
804
+ return -1; \
805
+ } \
806
+ \
807
+ static int \
808
+ gm_cpu_host_array_1D_C_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
809
+ { \
810
+ (void)stack; \
811
+ \
812
+ ndt_err_format(ctx, NDT_TypeError, \
813
+ "no kernel for " STRINGIZE(name) " : " \
814
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2)); \
815
+ \
816
+ return -1; \
817
+ } \
818
+ \
819
+ static int \
820
+ gm_cpu_host_0D_##name##_##t0##_##t1##_##t2(xnd_t stack[], ndt_context_t *ctx) \
821
+ { \
822
+ (void)stack; \
823
+ \
824
+ ndt_err_format(ctx, NDT_TypeError, \
825
+ "no kernel for " STRINGIZE(name) " : " \
826
+ STRINGIZE(t0) ", " STRINGIZE(t1) " -> " STRINGIZE(t2)); \
827
+ \
828
+ return -1; \
829
+ }
830
+
831
+
832
+ #define CPU_HOST_BINARY_INIT(func, t0, t1, t2) \
833
+ { .name = STRINGIZE(func), \
834
+ .sig = "... * " STRINGIZE(t0) ", ... * " STRINGIZE(t1) " -> ... * " STRINGIZE(t2), \
835
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
836
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
837
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
838
+ \
839
+ { .name = STRINGIZE(func), \
840
+ .sig = "... * ?" STRINGIZE(t0) ", ... * " STRINGIZE(t1) " -> ... * ?" STRINGIZE(t2), \
841
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
842
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
843
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
844
+ \
845
+ { .name = STRINGIZE(func), \
846
+ .sig = "... * " STRINGIZE(t0) ", ... * ?" STRINGIZE(t1) " -> ... * ?" STRINGIZE(t2), \
847
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
848
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
849
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
850
+ \
851
+ { .name = STRINGIZE(func), \
852
+ .sig = "... * ?" STRINGIZE(t0) ", ... * ?" STRINGIZE(t1) " -> ... * ?" STRINGIZE(t2), \
853
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
854
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
855
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
856
+ \
857
+ { .name = STRINGIZE(func), \
858
+ .sig = "var... * " STRINGIZE(t0) ", var... * " STRINGIZE(t1) " -> var... * " STRINGIZE(t2), \
859
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
860
+ \
861
+ { .name = STRINGIZE(func), \
862
+ .sig = "var... * ?" STRINGIZE(t0) ", var... * " STRINGIZE(t1) " -> var... * ?" STRINGIZE(t2), \
863
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
864
+ \
865
+ { .name = STRINGIZE(func), \
866
+ .sig = "var... * " STRINGIZE(t0) ", var... * ?" STRINGIZE(t1) " -> var... * ?" STRINGIZE(t2), \
867
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
868
+ \
869
+ { .name = STRINGIZE(func), \
870
+ .sig = "var... * ?" STRINGIZE(t0) ", var... * ?" STRINGIZE(t1) " -> var... * ?" STRINGIZE(t2), \
871
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
872
+ \
873
+ { .name = STRINGIZE(func), \
874
+ .sig = "array... * " STRINGIZE(t0) ", array... * " STRINGIZE(t1) " -> array... * " STRINGIZE(t2), \
875
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }, \
876
+ \
877
+ { .name = STRINGIZE(func), \
878
+ .sig = "array... * ?" STRINGIZE(t0) ", array... * " STRINGIZE(t1) " -> array... * ?" STRINGIZE(t2), \
879
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }, \
880
+ \
881
+ { .name = STRINGIZE(func), \
882
+ .sig = "array... * " STRINGIZE(t0) ", array... * ?" STRINGIZE(t1) " -> array... * ?" STRINGIZE(t2), \
883
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }, \
884
+ \
885
+ { .name = STRINGIZE(func), \
886
+ .sig = "array... * ?" STRINGIZE(t0) ", array... * ?" STRINGIZE(t1) " -> array... * ?" STRINGIZE(t2), \
887
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }
888
+
889
+
890
+ #define CPU_HOST_EQUALN_INIT(func, t0, t1, t2) \
891
+ { .name = STRINGIZE(func), \
892
+ .sig = "... * " STRINGIZE(t0) ", ... * " STRINGIZE(t1) " -> ... * " STRINGIZE(t2), \
893
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
894
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
895
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
896
+ \
897
+ { .name = STRINGIZE(func), \
898
+ .sig = "... * ?" STRINGIZE(t0) ", ... * " STRINGIZE(t1) " -> ... * " STRINGIZE(t2), \
899
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
900
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
901
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
902
+ \
903
+ { .name = STRINGIZE(func), \
904
+ .sig = "... * " STRINGIZE(t0) ", ... * ?" STRINGIZE(t1) " -> ... * " STRINGIZE(t2), \
905
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
906
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
907
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
908
+ \
909
+ { .name = STRINGIZE(func), \
910
+ .sig = "... * ?" STRINGIZE(t0) ", ... * ?" STRINGIZE(t1) " -> ... * " STRINGIZE(t2), \
911
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2, \
912
+ .OptS = gm_cpu_host_fixed_1D_S_##func##_##t0##_##t1##_##t2, \
913
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
914
+ \
915
+ { .name = STRINGIZE(func), \
916
+ .sig = "var... * " STRINGIZE(t0) ", var... * " STRINGIZE(t1) " -> var... * " STRINGIZE(t2), \
917
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
918
+ \
919
+ { .name = STRINGIZE(func), \
920
+ .sig = "var... * ?" STRINGIZE(t0) ", var... * " STRINGIZE(t1) " -> var... * " STRINGIZE(t2), \
921
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
922
+ \
923
+ { .name = STRINGIZE(func), \
924
+ .sig = "var... * " STRINGIZE(t0) ", var... * ?" STRINGIZE(t1) " -> var... * " STRINGIZE(t2), \
925
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
926
+ \
927
+ { .name = STRINGIZE(func), \
928
+ .sig = "var... * ?" STRINGIZE(t0) ", var... * ?" STRINGIZE(t1) " -> var... * " STRINGIZE(t2), \
929
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2 }, \
930
+ \
931
+ { .name = STRINGIZE(func), \
932
+ .sig = "array... * " STRINGIZE(t0) ", array... * " STRINGIZE(t1) " -> array... * " STRINGIZE(t2), \
933
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }, \
934
+ \
935
+ { .name = STRINGIZE(func), \
936
+ .sig = "array... * ?" STRINGIZE(t0) ", array... * " STRINGIZE(t1) " -> array... * " STRINGIZE(t2), \
937
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }, \
938
+ \
939
+ { .name = STRINGIZE(func), \
940
+ .sig = "array... * " STRINGIZE(t0) ", array... * ?" STRINGIZE(t1) " -> array... * " STRINGIZE(t2), \
941
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }, \
942
+ \
943
+ { .name = STRINGIZE(func), \
944
+ .sig = "array... * ?" STRINGIZE(t0) ", array... * ?" STRINGIZE(t1) " -> array... * " STRINGIZE(t2), \
945
+ .OptC = gm_cpu_host_array_1D_C_##func##_##t0##_##t1##_##t2 }
946
+
947
+
948
+ #undef bool
949
+ #define bool_t _Bool
950
+
951
+
952
+ /*****************************************************************************/
953
+ /* Arithmetic */
954
+ /*****************************************************************************/
955
+
956
+ #define CPU_HOST_ALL_ARITHMETIC(name) \
957
+ CPU_HOST_BINARY(name, uint8, uint8, uint8) \
958
+ CPU_HOST_BINARY(name, uint8, uint16, uint16) \
959
+ CPU_HOST_BINARY(name, uint8, uint32, uint32) \
960
+ CPU_HOST_BINARY(name, uint8, uint64, uint64) \
961
+ CPU_HOST_BINARY(name, uint8, int8, int16) \
962
+ CPU_HOST_BINARY(name, uint8, int16, int16) \
963
+ CPU_HOST_BINARY(name, uint8, int32, int32) \
964
+ CPU_HOST_BINARY(name, uint8, int64, int64) \
965
+ CPU_HOST_BINARY(name, uint8, bfloat16, bfloat16) \
966
+ CPU_HOST_NOIMPL(name, uint8, float16, float16) \
967
+ CPU_HOST_BINARY(name, uint8, float32, float32) \
968
+ CPU_HOST_BINARY(name, uint8, float64, float64) \
969
+ CPU_HOST_NOIMPL(name, uint8, complex32, complex32) \
970
+ CPU_HOST_BINARY(name, uint8, complex64, complex64) \
971
+ CPU_HOST_BINARY(name, uint8, complex128, complex128) \
972
+ \
973
+ CPU_HOST_BINARY(name, uint16, uint8, uint16) \
974
+ CPU_HOST_BINARY(name, uint16, uint16, uint16) \
975
+ CPU_HOST_BINARY(name, uint16, uint32, uint32) \
976
+ CPU_HOST_BINARY(name, uint16, uint64, uint64) \
977
+ CPU_HOST_BINARY(name, uint16, int8, int32) \
978
+ CPU_HOST_BINARY(name, uint16, int16, int32) \
979
+ CPU_HOST_BINARY(name, uint16, int32, int32) \
980
+ CPU_HOST_BINARY(name, uint16, int64, int64) \
981
+ CPU_HOST_BINARY(name, uint16, bfloat16, float32) \
982
+ CPU_HOST_NOIMPL(name, uint16, float16, float32) \
983
+ CPU_HOST_BINARY(name, uint16, float32, float32) \
984
+ CPU_HOST_BINARY(name, uint16, float64, float64) \
985
+ CPU_HOST_NOIMPL(name, uint16, complex32, complex64) \
986
+ CPU_HOST_BINARY(name, uint16, complex64, complex64) \
987
+ CPU_HOST_BINARY(name, uint16, complex128, complex128) \
988
+ \
989
+ CPU_HOST_BINARY(name, uint32, uint8, uint32) \
990
+ CPU_HOST_BINARY(name, uint32, uint16, uint32) \
991
+ CPU_HOST_BINARY(name, uint32, uint32, uint32) \
992
+ CPU_HOST_BINARY(name, uint32, uint64, uint64) \
993
+ CPU_HOST_BINARY(name, uint32, int8, int64) \
994
+ CPU_HOST_BINARY(name, uint32, int16, int64) \
995
+ CPU_HOST_BINARY(name, uint32, int32, int64) \
996
+ CPU_HOST_BINARY(name, uint32, int64, int64) \
997
+ CPU_HOST_BINARY(name, uint32, bfloat16, float64) \
998
+ CPU_HOST_NOIMPL(name, uint32, float16, float64) \
999
+ CPU_HOST_BINARY(name, uint32, float32, float64) \
1000
+ CPU_HOST_BINARY(name, uint32, float64, float64) \
1001
+ CPU_HOST_NOIMPL(name, uint32, complex32, complex128) \
1002
+ CPU_HOST_BINARY(name, uint32, complex64, complex128) \
1003
+ CPU_HOST_BINARY(name, uint32, complex128, complex128) \
1004
+ \
1005
+ CPU_HOST_BINARY(name, uint64, uint8, uint64) \
1006
+ CPU_HOST_BINARY(name, uint64, uint16, uint64) \
1007
+ CPU_HOST_BINARY(name, uint64, uint32, uint64) \
1008
+ CPU_HOST_BINARY(name, uint64, uint64, uint64) \
1009
+ \
1010
+ CPU_HOST_BINARY(name, int8, uint8, int16) \
1011
+ CPU_HOST_BINARY(name, int8, uint16, int32) \
1012
+ CPU_HOST_BINARY(name, int8, uint32, int64) \
1013
+ CPU_HOST_BINARY(name, int8, int8, int8) \
1014
+ CPU_HOST_BINARY(name, int8, int16, int16) \
1015
+ CPU_HOST_BINARY(name, int8, int32, int32) \
1016
+ CPU_HOST_BINARY(name, int8, int64, int64) \
1017
+ CPU_HOST_BINARY(name, int8, bfloat16, bfloat16) \
1018
+ CPU_HOST_NOIMPL(name, int8, float16, float16) \
1019
+ CPU_HOST_BINARY(name, int8, float32, float32) \
1020
+ CPU_HOST_BINARY(name, int8, float64, float64) \
1021
+ CPU_HOST_NOIMPL(name, int8, complex32, complex32) \
1022
+ CPU_HOST_BINARY(name, int8, complex64, complex64) \
1023
+ CPU_HOST_BINARY(name, int8, complex128, complex128) \
1024
+ \
1025
+ CPU_HOST_BINARY(name, int16, uint8, int16) \
1026
+ CPU_HOST_BINARY(name, int16, uint16, int32) \
1027
+ CPU_HOST_BINARY(name, int16, uint32, int64) \
1028
+ CPU_HOST_BINARY(name, int16, int8, int16) \
1029
+ CPU_HOST_BINARY(name, int16, int16, int16) \
1030
+ CPU_HOST_BINARY(name, int16, int32, int32) \
1031
+ CPU_HOST_BINARY(name, int16, int64, int64) \
1032
+ CPU_HOST_BINARY(name, int16, bfloat16, float32) \
1033
+ CPU_HOST_NOIMPL(name, int16, float16, float32) \
1034
+ CPU_HOST_BINARY(name, int16, float32, float32) \
1035
+ CPU_HOST_BINARY(name, int16, float64, float64) \
1036
+ CPU_HOST_NOIMPL(name, int16, complex32, complex64) \
1037
+ CPU_HOST_BINARY(name, int16, complex64, complex64) \
1038
+ CPU_HOST_BINARY(name, int16, complex128, complex128) \
1039
+ \
1040
+ CPU_HOST_BINARY(name, int32, uint8, int32) \
1041
+ CPU_HOST_BINARY(name, int32, uint16, int32) \
1042
+ CPU_HOST_BINARY(name, int32, uint32, int64) \
1043
+ CPU_HOST_BINARY(name, int32, int8, int32) \
1044
+ CPU_HOST_BINARY(name, int32, int16, int32) \
1045
+ CPU_HOST_BINARY(name, int32, int32, int32) \
1046
+ CPU_HOST_BINARY(name, int32, int64, int64) \
1047
+ CPU_HOST_BINARY(name, int32, bfloat16, float64) \
1048
+ CPU_HOST_NOIMPL(name, int32, float16, float64) \
1049
+ CPU_HOST_BINARY(name, int32, float32, float64) \
1050
+ CPU_HOST_BINARY(name, int32, float64, float64) \
1051
+ CPU_HOST_NOIMPL(name, int32, complex32, complex128) \
1052
+ CPU_HOST_BINARY(name, int32, complex64, complex128) \
1053
+ CPU_HOST_BINARY(name, int32, complex128, complex128) \
1054
+ \
1055
+ CPU_HOST_BINARY(name, int64, uint8, int64) \
1056
+ CPU_HOST_BINARY(name, int64, uint16, int64) \
1057
+ CPU_HOST_BINARY(name, int64, uint32, int64) \
1058
+ CPU_HOST_BINARY(name, int64, int8, int64) \
1059
+ CPU_HOST_BINARY(name, int64, int16, int64) \
1060
+ CPU_HOST_BINARY(name, int64, int32, int64) \
1061
+ CPU_HOST_BINARY(name, int64, int64, int64) \
1062
+ \
1063
+ CPU_HOST_BINARY(name, bfloat16, uint8, bfloat16) \
1064
+ CPU_HOST_BINARY(name, bfloat16, uint16, float32) \
1065
+ CPU_HOST_BINARY(name, bfloat16, uint32, float64) \
1066
+ CPU_HOST_BINARY(name, bfloat16, int8, bfloat16) \
1067
+ CPU_HOST_BINARY(name, bfloat16, int16, float32) \
1068
+ CPU_HOST_BINARY(name, bfloat16, int32, float64) \
1069
+ CPU_HOST_BINARY(name, bfloat16, bfloat16, bfloat16) \
1070
+ CPU_HOST_NOIMPL(name, bfloat16, float16, float32) \
1071
+ CPU_HOST_BINARY(name, bfloat16, float32, float32) \
1072
+ CPU_HOST_BINARY(name, bfloat16, float64, float64) \
1073
+ CPU_HOST_NOIMPL(name, bfloat16, complex32, complex64) \
1074
+ CPU_HOST_BINARY(name, bfloat16, complex64, complex64) \
1075
+ CPU_HOST_BINARY(name, bfloat16, complex128, complex128) \
1076
+ \
1077
+ CPU_HOST_NOIMPL(name, float16, uint8, float16) \
1078
+ CPU_HOST_NOIMPL(name, float16, uint16, float32) \
1079
+ CPU_HOST_NOIMPL(name, float16, uint32, float64) \
1080
+ CPU_HOST_NOIMPL(name, float16, int8, float16) \
1081
+ CPU_HOST_NOIMPL(name, float16, int16, float32) \
1082
+ CPU_HOST_NOIMPL(name, float16, int32, float64) \
1083
+ CPU_HOST_NOIMPL(name, float16, bfloat16, float32) \
1084
+ CPU_HOST_NOIMPL(name, float16, float16, float16) \
1085
+ CPU_HOST_NOIMPL(name, float16, float32, float32) \
1086
+ CPU_HOST_NOIMPL(name, float16, float64, float64) \
1087
+ CPU_HOST_NOIMPL(name, float16, complex32, complex32) \
1088
+ CPU_HOST_NOIMPL(name, float16, complex64, complex64) \
1089
+ CPU_HOST_NOIMPL(name, float16, complex128, complex128) \
1090
+ \
1091
+ CPU_HOST_BINARY(name, float32, uint8, float32) \
1092
+ CPU_HOST_BINARY(name, float32, uint16, float32) \
1093
+ CPU_HOST_BINARY(name, float32, uint32, float64) \
1094
+ CPU_HOST_BINARY(name, float32, int8, float32) \
1095
+ CPU_HOST_BINARY(name, float32, int16, float32) \
1096
+ CPU_HOST_BINARY(name, float32, int32, float64) \
1097
+ CPU_HOST_BINARY(name, float32, bfloat16, float32) \
1098
+ CPU_HOST_NOIMPL(name, float32, float16, float32) \
1099
+ CPU_HOST_BINARY(name, float32, float32, float32) \
1100
+ CPU_HOST_BINARY(name, float32, float64, float64) \
1101
+ CPU_HOST_NOIMPL(name, float32, complex32, complex64) \
1102
+ CPU_HOST_BINARY(name, float32, complex64, complex64) \
1103
+ CPU_HOST_BINARY(name, float32, complex128, complex128) \
1104
+ \
1105
+ CPU_HOST_BINARY(name, float64, uint8, float64) \
1106
+ CPU_HOST_BINARY(name, float64, uint16, float64) \
1107
+ CPU_HOST_BINARY(name, float64, uint32, float64) \
1108
+ CPU_HOST_BINARY(name, float64, int8, float64) \
1109
+ CPU_HOST_BINARY(name, float64, int16, float64) \
1110
+ CPU_HOST_BINARY(name, float64, int32, float64) \
1111
+ CPU_HOST_BINARY(name, float64, bfloat16, float64) \
1112
+ CPU_HOST_NOIMPL(name, float64, float16, float64) \
1113
+ CPU_HOST_BINARY(name, float64, float32, float64) \
1114
+ CPU_HOST_BINARY(name, float64, float64, float64) \
1115
+ CPU_HOST_NOIMPL(name, float64, complex32, complex128) \
1116
+ CPU_HOST_BINARY(name, float64, complex64, complex128) \
1117
+ CPU_HOST_BINARY(name, float64, complex128, complex128) \
1118
+ \
1119
+ CPU_HOST_NOIMPL(name, complex32, uint8, complex32) \
1120
+ CPU_HOST_NOIMPL(name, complex32, uint16, complex64) \
1121
+ CPU_HOST_NOIMPL(name, complex32, uint32, complex128) \
1122
+ CPU_HOST_NOIMPL(name, complex32, int8, complex32) \
1123
+ CPU_HOST_NOIMPL(name, complex32, int16, complex64) \
1124
+ CPU_HOST_NOIMPL(name, complex32, int32, complex128) \
1125
+ CPU_HOST_NOIMPL(name, complex32, bfloat16, complex64) \
1126
+ CPU_HOST_NOIMPL(name, complex32, float16, complex32) \
1127
+ CPU_HOST_NOIMPL(name, complex32, float32, complex64) \
1128
+ CPU_HOST_NOIMPL(name, complex32, float64, complex128) \
1129
+ CPU_HOST_NOIMPL(name, complex32, complex32, complex32) \
1130
+ CPU_HOST_NOIMPL(name, complex32, complex64, complex64) \
1131
+ CPU_HOST_NOIMPL(name, complex32, complex128, complex128) \
1132
+ \
1133
+ CPU_HOST_BINARY(name, complex64, uint8, complex64) \
1134
+ CPU_HOST_BINARY(name, complex64, uint16, complex64) \
1135
+ CPU_HOST_BINARY(name, complex64, uint32, complex128) \
1136
+ CPU_HOST_BINARY(name, complex64, int8, complex64) \
1137
+ CPU_HOST_BINARY(name, complex64, int16, complex64) \
1138
+ CPU_HOST_BINARY(name, complex64, int32, complex128) \
1139
+ CPU_HOST_BINARY(name, complex64, bfloat16, complex64) \
1140
+ CPU_HOST_NOIMPL(name, complex64, float16, complex64) \
1141
+ CPU_HOST_BINARY(name, complex64, float32, complex64) \
1142
+ CPU_HOST_BINARY(name, complex64, float64, complex128) \
1143
+ CPU_HOST_NOIMPL(name, complex64, complex32, complex64) \
1144
+ CPU_HOST_BINARY(name, complex64, complex64, complex64) \
1145
+ CPU_HOST_BINARY(name, complex64, complex128, complex128) \
1146
+ \
1147
+ CPU_HOST_BINARY(name, complex128, uint8, complex128) \
1148
+ CPU_HOST_BINARY(name, complex128, uint16, complex128) \
1149
+ CPU_HOST_BINARY(name, complex128, uint32, complex128) \
1150
+ CPU_HOST_BINARY(name, complex128, int8, complex128) \
1151
+ CPU_HOST_BINARY(name, complex128, int16, complex128) \
1152
+ CPU_HOST_BINARY(name, complex128, int32, complex128) \
1153
+ CPU_HOST_BINARY(name, complex128, bfloat16, complex128) \
1154
+ CPU_HOST_NOIMPL(name, complex128, float16, complex128) \
1155
+ CPU_HOST_BINARY(name, complex128, float32, complex128) \
1156
+ CPU_HOST_BINARY(name, complex128, float64, complex128) \
1157
+ CPU_HOST_NOIMPL(name, complex128, complex32, complex128) \
1158
+ CPU_HOST_BINARY(name, complex128, complex64, complex128) \
1159
+ CPU_HOST_BINARY(name, complex128, complex128, complex128)
1160
+
1161
+ #define CPU_HOST_ALL_ARITHMETIC_NO_COMPLEX(name) \
1162
+ CPU_HOST_BINARY(name, uint8, uint8, uint8) \
1163
+ CPU_HOST_BINARY(name, uint8, uint16, uint16) \
1164
+ CPU_HOST_BINARY(name, uint8, uint32, uint32) \
1165
+ CPU_HOST_BINARY(name, uint8, uint64, uint64) \
1166
+ CPU_HOST_BINARY(name, uint8, int8, int16) \
1167
+ CPU_HOST_BINARY(name, uint8, int16, int16) \
1168
+ CPU_HOST_BINARY(name, uint8, int32, int32) \
1169
+ CPU_HOST_BINARY(name, uint8, int64, int64) \
1170
+ CPU_HOST_BINARY(name, uint8, bfloat16, bfloat16) \
1171
+ CPU_HOST_NOIMPL(name, uint8, float16, float16) \
1172
+ CPU_HOST_BINARY(name, uint8, float32, float32) \
1173
+ CPU_HOST_BINARY(name, uint8, float64, float64) \
1174
+ CPU_HOST_NOKERN(name, uint8, complex32, complex32) \
1175
+ CPU_HOST_NOKERN(name, uint8, complex64, complex64) \
1176
+ CPU_HOST_NOKERN(name, uint8, complex128, complex128) \
1177
+ \
1178
+ CPU_HOST_BINARY(name, uint16, uint8, uint16) \
1179
+ CPU_HOST_BINARY(name, uint16, uint16, uint16) \
1180
+ CPU_HOST_BINARY(name, uint16, uint32, uint32) \
1181
+ CPU_HOST_BINARY(name, uint16, uint64, uint64) \
1182
+ CPU_HOST_BINARY(name, uint16, int8, int32) \
1183
+ CPU_HOST_BINARY(name, uint16, int16, int32) \
1184
+ CPU_HOST_BINARY(name, uint16, int32, int32) \
1185
+ CPU_HOST_BINARY(name, uint16, int64, int64) \
1186
+ CPU_HOST_BINARY(name, uint16, bfloat16, float32) \
1187
+ CPU_HOST_NOIMPL(name, uint16, float16, float32) \
1188
+ CPU_HOST_BINARY(name, uint16, float32, float32) \
1189
+ CPU_HOST_BINARY(name, uint16, float64, float64) \
1190
+ CPU_HOST_NOKERN(name, uint16, complex32, complex64) \
1191
+ CPU_HOST_NOKERN(name, uint16, complex64, complex64) \
1192
+ CPU_HOST_NOKERN(name, uint16, complex128, complex128) \
1193
+ \
1194
+ CPU_HOST_BINARY(name, uint32, uint8, uint32) \
1195
+ CPU_HOST_BINARY(name, uint32, uint16, uint32) \
1196
+ CPU_HOST_BINARY(name, uint32, uint32, uint32) \
1197
+ CPU_HOST_BINARY(name, uint32, uint64, uint64) \
1198
+ CPU_HOST_BINARY(name, uint32, int8, int64) \
1199
+ CPU_HOST_BINARY(name, uint32, int16, int64) \
1200
+ CPU_HOST_BINARY(name, uint32, int32, int64) \
1201
+ CPU_HOST_BINARY(name, uint32, int64, int64) \
1202
+ CPU_HOST_BINARY(name, uint32, bfloat16, float64) \
1203
+ CPU_HOST_NOIMPL(name, uint32, float16, float64) \
1204
+ CPU_HOST_BINARY(name, uint32, float32, float64) \
1205
+ CPU_HOST_BINARY(name, uint32, float64, float64) \
1206
+ CPU_HOST_NOKERN(name, uint32, complex32, complex128) \
1207
+ CPU_HOST_NOKERN(name, uint32, complex64, complex128) \
1208
+ CPU_HOST_NOKERN(name, uint32, complex128, complex128) \
1209
+ \
1210
+ CPU_HOST_BINARY(name, uint64, uint8, uint64) \
1211
+ CPU_HOST_BINARY(name, uint64, uint16, uint64) \
1212
+ CPU_HOST_BINARY(name, uint64, uint32, uint64) \
1213
+ CPU_HOST_BINARY(name, uint64, uint64, uint64) \
1214
+ \
1215
+ CPU_HOST_BINARY(name, int8, uint8, int16) \
1216
+ CPU_HOST_BINARY(name, int8, uint16, int32) \
1217
+ CPU_HOST_BINARY(name, int8, uint32, int64) \
1218
+ CPU_HOST_BINARY(name, int8, int8, int8) \
1219
+ CPU_HOST_BINARY(name, int8, int16, int16) \
1220
+ CPU_HOST_BINARY(name, int8, int32, int32) \
1221
+ CPU_HOST_BINARY(name, int8, int64, int64) \
1222
+ CPU_HOST_BINARY(name, int8, bfloat16, bfloat16) \
1223
+ CPU_HOST_NOIMPL(name, int8, float16, float16) \
1224
+ CPU_HOST_BINARY(name, int8, float32, float32) \
1225
+ CPU_HOST_BINARY(name, int8, float64, float64) \
1226
+ CPU_HOST_NOKERN(name, int8, complex32, complex32) \
1227
+ CPU_HOST_NOKERN(name, int8, complex64, complex64) \
1228
+ CPU_HOST_NOKERN(name, int8, complex128, complex128) \
1229
+ \
1230
+ CPU_HOST_BINARY(name, int16, uint8, int16) \
1231
+ CPU_HOST_BINARY(name, int16, uint16, int32) \
1232
+ CPU_HOST_BINARY(name, int16, uint32, int64) \
1233
+ CPU_HOST_BINARY(name, int16, int8, int16) \
1234
+ CPU_HOST_BINARY(name, int16, int16, int16) \
1235
+ CPU_HOST_BINARY(name, int16, int32, int32) \
1236
+ CPU_HOST_BINARY(name, int16, int64, int64) \
1237
+ CPU_HOST_BINARY(name, int16, bfloat16, float32) \
1238
+ CPU_HOST_NOIMPL(name, int16, float16, float32) \
1239
+ CPU_HOST_BINARY(name, int16, float32, float32) \
1240
+ CPU_HOST_BINARY(name, int16, float64, float64) \
1241
+ CPU_HOST_NOKERN(name, int16, complex32, complex64) \
1242
+ CPU_HOST_NOKERN(name, int16, complex64, complex64) \
1243
+ CPU_HOST_NOKERN(name, int16, complex128, complex128) \
1244
+ \
1245
+ CPU_HOST_BINARY(name, int32, uint8, int32) \
1246
+ CPU_HOST_BINARY(name, int32, uint16, int32) \
1247
+ CPU_HOST_BINARY(name, int32, uint32, int64) \
1248
+ CPU_HOST_BINARY(name, int32, int8, int32) \
1249
+ CPU_HOST_BINARY(name, int32, int16, int32) \
1250
+ CPU_HOST_BINARY(name, int32, int32, int32) \
1251
+ CPU_HOST_BINARY(name, int32, int64, int64) \
1252
+ CPU_HOST_BINARY(name, int32, bfloat16, float64) \
1253
+ CPU_HOST_NOIMPL(name, int32, float16, float64) \
1254
+ CPU_HOST_BINARY(name, int32, float32, float64) \
1255
+ CPU_HOST_BINARY(name, int32, float64, float64) \
1256
+ CPU_HOST_NOKERN(name, int32, complex32, complex128) \
1257
+ CPU_HOST_NOKERN(name, int32, complex64, complex128) \
1258
+ CPU_HOST_NOKERN(name, int32, complex128, complex128) \
1259
+ \
1260
+ CPU_HOST_BINARY(name, int64, uint8, int64) \
1261
+ CPU_HOST_BINARY(name, int64, uint16, int64) \
1262
+ CPU_HOST_BINARY(name, int64, uint32, int64) \
1263
+ CPU_HOST_BINARY(name, int64, int8, int64) \
1264
+ CPU_HOST_BINARY(name, int64, int16, int64) \
1265
+ CPU_HOST_BINARY(name, int64, int32, int64) \
1266
+ CPU_HOST_BINARY(name, int64, int64, int64) \
1267
+ \
1268
+ CPU_HOST_BINARY(name, bfloat16, uint8, bfloat16) \
1269
+ CPU_HOST_BINARY(name, bfloat16, uint16, float32) \
1270
+ CPU_HOST_BINARY(name, bfloat16, uint32, float64) \
1271
+ CPU_HOST_BINARY(name, bfloat16, int8, bfloat16) \
1272
+ CPU_HOST_BINARY(name, bfloat16, int16, float32) \
1273
+ CPU_HOST_BINARY(name, bfloat16, int32, float64) \
1274
+ CPU_HOST_BINARY(name, bfloat16, bfloat16, bfloat16) \
1275
+ CPU_HOST_NOIMPL(name, bfloat16, float16, float32) \
1276
+ CPU_HOST_BINARY(name, bfloat16, float32, float32) \
1277
+ CPU_HOST_BINARY(name, bfloat16, float64, float64) \
1278
+ CPU_HOST_NOKERN(name, bfloat16, complex32, complex64) \
1279
+ CPU_HOST_NOKERN(name, bfloat16, complex64, complex64) \
1280
+ CPU_HOST_NOKERN(name, bfloat16, complex128, complex128) \
1281
+ \
1282
+ CPU_HOST_NOIMPL(name, float16, uint8, float16) \
1283
+ CPU_HOST_NOIMPL(name, float16, uint16, float32) \
1284
+ CPU_HOST_NOIMPL(name, float16, uint32, float64) \
1285
+ CPU_HOST_NOIMPL(name, float16, int8, float16) \
1286
+ CPU_HOST_NOIMPL(name, float16, int16, float32) \
1287
+ CPU_HOST_NOIMPL(name, float16, int32, float64) \
1288
+ CPU_HOST_NOIMPL(name, float16, bfloat16, float32) \
1289
+ CPU_HOST_NOIMPL(name, float16, float16, float16) \
1290
+ CPU_HOST_NOIMPL(name, float16, float32, float32) \
1291
+ CPU_HOST_NOIMPL(name, float16, float64, float64) \
1292
+ CPU_HOST_NOKERN(name, float16, complex32, complex32) \
1293
+ CPU_HOST_NOKERN(name, float16, complex64, complex64) \
1294
+ CPU_HOST_NOKERN(name, float16, complex128, complex128) \
1295
+ \
1296
+ CPU_HOST_BINARY(name, float32, uint8, float32) \
1297
+ CPU_HOST_BINARY(name, float32, uint16, float32) \
1298
+ CPU_HOST_BINARY(name, float32, uint32, float64) \
1299
+ CPU_HOST_BINARY(name, float32, int8, float32) \
1300
+ CPU_HOST_BINARY(name, float32, int16, float32) \
1301
+ CPU_HOST_BINARY(name, float32, int32, float64) \
1302
+ CPU_HOST_BINARY(name, float32, bfloat16, float32) \
1303
+ CPU_HOST_NOIMPL(name, float32, float16, float32) \
1304
+ CPU_HOST_BINARY(name, float32, float32, float32) \
1305
+ CPU_HOST_BINARY(name, float32, float64, float64) \
1306
+ CPU_HOST_NOKERN(name, float32, complex32, complex64) \
1307
+ CPU_HOST_NOKERN(name, float32, complex64, complex64) \
1308
+ CPU_HOST_NOKERN(name, float32, complex128, complex128) \
1309
+ \
1310
+ CPU_HOST_BINARY(name, float64, uint8, float64) \
1311
+ CPU_HOST_BINARY(name, float64, uint16, float64) \
1312
+ CPU_HOST_BINARY(name, float64, uint32, float64) \
1313
+ CPU_HOST_BINARY(name, float64, int8, float64) \
1314
+ CPU_HOST_BINARY(name, float64, int16, float64) \
1315
+ CPU_HOST_BINARY(name, float64, int32, float64) \
1316
+ CPU_HOST_BINARY(name, float64, bfloat16, float64) \
1317
+ CPU_HOST_NOIMPL(name, float64, float16, float64) \
1318
+ CPU_HOST_BINARY(name, float64, float32, float64) \
1319
+ CPU_HOST_BINARY(name, float64, float64, float64) \
1320
+ CPU_HOST_NOKERN(name, float64, complex32, complex128) \
1321
+ CPU_HOST_NOKERN(name, float64, complex64, complex128) \
1322
+ CPU_HOST_NOKERN(name, float64, complex128, complex128) \
1323
+ \
1324
+ CPU_HOST_NOKERN(name, complex32, uint8, complex32) \
1325
+ CPU_HOST_NOKERN(name, complex32, uint16, complex64) \
1326
+ CPU_HOST_NOKERN(name, complex32, uint32, complex128) \
1327
+ CPU_HOST_NOKERN(name, complex32, int8, complex32) \
1328
+ CPU_HOST_NOKERN(name, complex32, int16, complex64) \
1329
+ CPU_HOST_NOKERN(name, complex32, int32, complex128) \
1330
+ CPU_HOST_NOKERN(name, complex32, bfloat16, complex64) \
1331
+ CPU_HOST_NOKERN(name, complex32, float16, complex32) \
1332
+ CPU_HOST_NOKERN(name, complex32, float32, complex64) \
1333
+ CPU_HOST_NOKERN(name, complex32, float64, complex128) \
1334
+ CPU_HOST_NOKERN(name, complex32, complex32, complex32) \
1335
+ CPU_HOST_NOKERN(name, complex32, complex64, complex64) \
1336
+ CPU_HOST_NOKERN(name, complex32, complex128, complex128) \
1337
+ \
1338
+ CPU_HOST_NOKERN(name, complex64, uint8, complex64) \
1339
+ CPU_HOST_NOKERN(name, complex64, uint16, complex64) \
1340
+ CPU_HOST_NOKERN(name, complex64, uint32, complex128) \
1341
+ CPU_HOST_NOKERN(name, complex64, int8, complex64) \
1342
+ CPU_HOST_NOKERN(name, complex64, int16, complex64) \
1343
+ CPU_HOST_NOKERN(name, complex64, int32, complex128) \
1344
+ CPU_HOST_NOKERN(name, complex64, bfloat16, complex64) \
1345
+ CPU_HOST_NOKERN(name, complex64, float16, complex64) \
1346
+ CPU_HOST_NOKERN(name, complex64, float32, complex64) \
1347
+ CPU_HOST_NOKERN(name, complex64, float64, complex128) \
1348
+ CPU_HOST_NOKERN(name, complex64, complex32, complex64) \
1349
+ CPU_HOST_NOKERN(name, complex64, complex64, complex64) \
1350
+ CPU_HOST_NOKERN(name, complex64, complex128, complex128) \
1351
+ \
1352
+ CPU_HOST_NOKERN(name, complex128, uint8, complex128) \
1353
+ CPU_HOST_NOKERN(name, complex128, uint16, complex128) \
1354
+ CPU_HOST_NOKERN(name, complex128, uint32, complex128) \
1355
+ CPU_HOST_NOKERN(name, complex128, int8, complex128) \
1356
+ CPU_HOST_NOKERN(name, complex128, int16, complex128) \
1357
+ CPU_HOST_NOKERN(name, complex128, int32, complex128) \
1358
+ CPU_HOST_NOKERN(name, complex128, bfloat16, complex128) \
1359
+ CPU_HOST_NOKERN(name, complex128, float16, complex128) \
1360
+ CPU_HOST_NOKERN(name, complex128, float32, complex128) \
1361
+ CPU_HOST_NOKERN(name, complex128, float64, complex128) \
1362
+ CPU_HOST_NOKERN(name, complex128, complex32, complex128) \
1363
+ CPU_HOST_NOKERN(name, complex128, complex64, complex128) \
1364
+ CPU_HOST_NOKERN(name, complex128, complex128, complex128)
1365
+
1366
+ #define CPU_HOST_ALL_ARITHMETIC_FLOAT_RETURN(name) \
1367
+ CPU_HOST_NOIMPL(name, uint8, uint8, float16) \
1368
+ CPU_HOST_BINARY(name, uint8, uint16, float32) \
1369
+ CPU_HOST_BINARY(name, uint8, uint32, float64) \
1370
+ CPU_HOST_NOKERN(name, uint8, uint64, uint64) \
1371
+ CPU_HOST_NOIMPL(name, uint8, int8, float16) \
1372
+ CPU_HOST_BINARY(name, uint8, int16, float32) \
1373
+ CPU_HOST_BINARY(name, uint8, int32, float64) \
1374
+ CPU_HOST_NOKERN(name, uint8, int64, int64) \
1375
+ CPU_HOST_BINARY(name, uint8, bfloat16, bfloat16) \
1376
+ CPU_HOST_NOIMPL(name, uint8, float16, float16) \
1377
+ CPU_HOST_BINARY(name, uint8, float32, float32) \
1378
+ CPU_HOST_BINARY(name, uint8, float64, float64) \
1379
+ CPU_HOST_NOIMPL(name, uint8, complex32, complex32) \
1380
+ CPU_HOST_BINARY(name, uint8, complex64, complex64) \
1381
+ CPU_HOST_BINARY(name, uint8, complex128, complex128) \
1382
+ \
1383
+ CPU_HOST_BINARY(name, uint16, uint8, float32) \
1384
+ CPU_HOST_BINARY(name, uint16, uint16, float32) \
1385
+ CPU_HOST_BINARY(name, uint16, uint32, float64) \
1386
+ CPU_HOST_NOKERN(name, uint16, uint64, uint64) \
1387
+ CPU_HOST_BINARY(name, uint16, int8, float32) \
1388
+ CPU_HOST_BINARY(name, uint16, int16, float32) \
1389
+ CPU_HOST_BINARY(name, uint16, int32, float64) \
1390
+ CPU_HOST_NOKERN(name, uint16, int64, int64) \
1391
+ CPU_HOST_BINARY(name, uint16, bfloat16, float32) \
1392
+ CPU_HOST_NOIMPL(name, uint16, float16, float32) \
1393
+ CPU_HOST_BINARY(name, uint16, float32, float32) \
1394
+ CPU_HOST_BINARY(name, uint16, float64, float64) \
1395
+ CPU_HOST_NOIMPL(name, uint16, complex32, complex64) \
1396
+ CPU_HOST_BINARY(name, uint16, complex64, complex64) \
1397
+ CPU_HOST_BINARY(name, uint16, complex128, complex128) \
1398
+ \
1399
+ CPU_HOST_BINARY(name, uint32, uint8, float64) \
1400
+ CPU_HOST_BINARY(name, uint32, uint16, float64) \
1401
+ CPU_HOST_BINARY(name, uint32, uint32, float64) \
1402
+ CPU_HOST_NOKERN(name, uint32, uint64, uint64) \
1403
+ CPU_HOST_BINARY(name, uint32, int8, float64) \
1404
+ CPU_HOST_BINARY(name, uint32, int16, float64) \
1405
+ CPU_HOST_BINARY(name, uint32, int32, float64) \
1406
+ CPU_HOST_NOKERN(name, uint32, int64, int64) \
1407
+ CPU_HOST_BINARY(name, uint32, bfloat16, float64) \
1408
+ CPU_HOST_NOIMPL(name, uint32, float16, float64) \
1409
+ CPU_HOST_BINARY(name, uint32, float32, float64) \
1410
+ CPU_HOST_BINARY(name, uint32, float64, float64) \
1411
+ CPU_HOST_NOIMPL(name, uint32, complex32, complex128) \
1412
+ CPU_HOST_BINARY(name, uint32, complex64, complex128) \
1413
+ CPU_HOST_BINARY(name, uint32, complex128, complex128) \
1414
+ \
1415
+ CPU_HOST_NOKERN(name, uint64, uint8, uint64) \
1416
+ CPU_HOST_NOKERN(name, uint64, uint16, uint64) \
1417
+ CPU_HOST_NOKERN(name, uint64, uint32, uint64) \
1418
+ CPU_HOST_NOKERN(name, uint64, uint64, uint64) \
1419
+ \
1420
+ CPU_HOST_NOIMPL(name, int8, uint8, float16) \
1421
+ CPU_HOST_BINARY(name, int8, uint16, float32) \
1422
+ CPU_HOST_BINARY(name, int8, uint32, float64) \
1423
+ CPU_HOST_NOIMPL(name, int8, int8, float16) \
1424
+ CPU_HOST_BINARY(name, int8, int16, float32) \
1425
+ CPU_HOST_BINARY(name, int8, int32, float64) \
1426
+ CPU_HOST_NOKERN(name, int8, int64, int64) \
1427
+ CPU_HOST_BINARY(name, int8, bfloat16, bfloat16) \
1428
+ CPU_HOST_NOIMPL(name, int8, float16, float16) \
1429
+ CPU_HOST_BINARY(name, int8, float32, float32) \
1430
+ CPU_HOST_BINARY(name, int8, float64, float64) \
1431
+ CPU_HOST_NOIMPL(name, int8, complex32, complex32) \
1432
+ CPU_HOST_BINARY(name, int8, complex64, complex64) \
1433
+ CPU_HOST_BINARY(name, int8, complex128, complex128) \
1434
+ \
1435
+ CPU_HOST_BINARY(name, int16, uint8, float32) \
1436
+ CPU_HOST_BINARY(name, int16, uint16, float32) \
1437
+ CPU_HOST_BINARY(name, int16, uint32, float64) \
1438
+ CPU_HOST_BINARY(name, int16, int8, float32) \
1439
+ CPU_HOST_BINARY(name, int16, int16, float32) \
1440
+ CPU_HOST_BINARY(name, int16, int32, float64) \
1441
+ CPU_HOST_NOKERN(name, int16, int64, int64) \
1442
+ CPU_HOST_BINARY(name, int16, bfloat16, float32) \
1443
+ CPU_HOST_NOIMPL(name, int16, float16, float32) \
1444
+ CPU_HOST_BINARY(name, int16, float32, float32) \
1445
+ CPU_HOST_BINARY(name, int16, float64, float64) \
1446
+ CPU_HOST_NOIMPL(name, int16, complex32, complex64) \
1447
+ CPU_HOST_BINARY(name, int16, complex64, complex64) \
1448
+ CPU_HOST_BINARY(name, int16, complex128, complex128) \
1449
+ \
1450
+ CPU_HOST_BINARY(name, int32, uint8, float64) \
1451
+ CPU_HOST_BINARY(name, int32, uint16, float64) \
1452
+ CPU_HOST_BINARY(name, int32, uint32, float64) \
1453
+ CPU_HOST_BINARY(name, int32, int8, float64) \
1454
+ CPU_HOST_BINARY(name, int32, int16, float64) \
1455
+ CPU_HOST_BINARY(name, int32, int32, float64) \
1456
+ CPU_HOST_NOKERN(name, int32, int64, int64) \
1457
+ CPU_HOST_BINARY(name, int32, bfloat16, float64) \
1458
+ CPU_HOST_NOIMPL(name, int32, float16, float64) \
1459
+ CPU_HOST_BINARY(name, int32, float32, float64) \
1460
+ CPU_HOST_BINARY(name, int32, float64, float64) \
1461
+ CPU_HOST_NOIMPL(name, int32, complex32, complex128) \
1462
+ CPU_HOST_BINARY(name, int32, complex64, complex128) \
1463
+ CPU_HOST_BINARY(name, int32, complex128, complex128) \
1464
+ \
1465
+ CPU_HOST_NOKERN(name, int64, uint8, int64) \
1466
+ CPU_HOST_NOKERN(name, int64, uint16, int64) \
1467
+ CPU_HOST_NOKERN(name, int64, uint32, int64) \
1468
+ CPU_HOST_NOKERN(name, int64, int8, int64) \
1469
+ CPU_HOST_NOKERN(name, int64, int16, int64) \
1470
+ CPU_HOST_NOKERN(name, int64, int32, int64) \
1471
+ CPU_HOST_NOKERN(name, int64, int64, int64) \
1472
+ \
1473
+ CPU_HOST_BINARY(name, bfloat16, uint8, bfloat16) \
1474
+ CPU_HOST_BINARY(name, bfloat16, uint16, float32) \
1475
+ CPU_HOST_BINARY(name, bfloat16, uint32, float64) \
1476
+ CPU_HOST_BINARY(name, bfloat16, int8, bfloat16) \
1477
+ CPU_HOST_BINARY(name, bfloat16, int16, float32) \
1478
+ CPU_HOST_BINARY(name, bfloat16, int32, float64) \
1479
+ CPU_HOST_BINARY(name, bfloat16, bfloat16, bfloat16) \
1480
+ CPU_HOST_NOIMPL(name, bfloat16, float16, float32) \
1481
+ CPU_HOST_BINARY(name, bfloat16, float32, float32) \
1482
+ CPU_HOST_BINARY(name, bfloat16, float64, float64) \
1483
+ CPU_HOST_NOIMPL(name, bfloat16, complex32, complex64) \
1484
+ CPU_HOST_BINARY(name, bfloat16, complex64, complex64) \
1485
+ CPU_HOST_BINARY(name, bfloat16, complex128, complex128) \
1486
+ \
1487
+ CPU_HOST_NOIMPL(name, float16, uint8, float16) \
1488
+ CPU_HOST_NOIMPL(name, float16, uint16, float32) \
1489
+ CPU_HOST_NOIMPL(name, float16, uint32, float64) \
1490
+ CPU_HOST_NOIMPL(name, float16, int8, float16) \
1491
+ CPU_HOST_NOIMPL(name, float16, int16, float32) \
1492
+ CPU_HOST_NOIMPL(name, float16, int32, float64) \
1493
+ CPU_HOST_NOIMPL(name, float16, bfloat16, float32) \
1494
+ CPU_HOST_NOIMPL(name, float16, float16, float16) \
1495
+ CPU_HOST_NOIMPL(name, float16, float32, float32) \
1496
+ CPU_HOST_NOIMPL(name, float16, float64, float64) \
1497
+ CPU_HOST_NOIMPL(name, float16, complex32, complex32) \
1498
+ CPU_HOST_NOIMPL(name, float16, complex64, complex64) \
1499
+ CPU_HOST_NOIMPL(name, float16, complex128, complex128) \
1500
+ \
1501
+ CPU_HOST_BINARY(name, float32, uint8, float32) \
1502
+ CPU_HOST_BINARY(name, float32, uint16, float32) \
1503
+ CPU_HOST_BINARY(name, float32, uint32, float64) \
1504
+ CPU_HOST_BINARY(name, float32, int8, float32) \
1505
+ CPU_HOST_BINARY(name, float32, int16, float32) \
1506
+ CPU_HOST_BINARY(name, float32, int32, float64) \
1507
+ CPU_HOST_BINARY(name, float32, bfloat16, float32) \
1508
+ CPU_HOST_NOIMPL(name, float32, float16, float32) \
1509
+ CPU_HOST_BINARY(name, float32, float32, float32) \
1510
+ CPU_HOST_BINARY(name, float32, float64, float64) \
1511
+ CPU_HOST_NOIMPL(name, float32, complex32, complex64) \
1512
+ CPU_HOST_BINARY(name, float32, complex64, complex64) \
1513
+ CPU_HOST_BINARY(name, float32, complex128, complex128) \
1514
+ \
1515
+ CPU_HOST_BINARY(name, float64, uint8, float64) \
1516
+ CPU_HOST_BINARY(name, float64, uint16, float64) \
1517
+ CPU_HOST_BINARY(name, float64, uint32, float64) \
1518
+ CPU_HOST_BINARY(name, float64, int8, float64) \
1519
+ CPU_HOST_BINARY(name, float64, int16, float64) \
1520
+ CPU_HOST_BINARY(name, float64, int32, float64) \
1521
+ CPU_HOST_BINARY(name, float64, bfloat16, float64) \
1522
+ CPU_HOST_NOIMPL(name, float64, float16, float64) \
1523
+ CPU_HOST_BINARY(name, float64, float32, float64) \
1524
+ CPU_HOST_BINARY(name, float64, float64, float64) \
1525
+ CPU_HOST_NOIMPL(name, float64, complex32, complex128) \
1526
+ CPU_HOST_BINARY(name, float64, complex64, complex128) \
1527
+ CPU_HOST_BINARY(name, float64, complex128, complex128) \
1528
+ \
1529
+ CPU_HOST_NOIMPL(name, complex32, uint8, complex32) \
1530
+ CPU_HOST_NOIMPL(name, complex32, uint16, complex64) \
1531
+ CPU_HOST_NOIMPL(name, complex32, uint32, complex128) \
1532
+ CPU_HOST_NOIMPL(name, complex32, int8, complex32) \
1533
+ CPU_HOST_NOIMPL(name, complex32, int16, complex64) \
1534
+ CPU_HOST_NOIMPL(name, complex32, int32, complex128) \
1535
+ CPU_HOST_NOIMPL(name, complex32, bfloat16, complex64) \
1536
+ CPU_HOST_NOIMPL(name, complex32, float16, complex32) \
1537
+ CPU_HOST_NOIMPL(name, complex32, float32, complex64) \
1538
+ CPU_HOST_NOIMPL(name, complex32, float64, complex128) \
1539
+ CPU_HOST_NOIMPL(name, complex32, complex32, complex32) \
1540
+ CPU_HOST_NOIMPL(name, complex32, complex64, complex64) \
1541
+ CPU_HOST_NOIMPL(name, complex32, complex128, complex128) \
1542
+ \
1543
+ CPU_HOST_BINARY(name, complex64, uint8, complex64) \
1544
+ CPU_HOST_BINARY(name, complex64, uint16, complex64) \
1545
+ CPU_HOST_BINARY(name, complex64, uint32, complex128) \
1546
+ CPU_HOST_BINARY(name, complex64, int8, complex64) \
1547
+ CPU_HOST_BINARY(name, complex64, int16, complex64) \
1548
+ CPU_HOST_BINARY(name, complex64, int32, complex128) \
1549
+ CPU_HOST_BINARY(name, complex64, bfloat16, complex64) \
1550
+ CPU_HOST_NOIMPL(name, complex64, float16, complex64) \
1551
+ CPU_HOST_BINARY(name, complex64, float32, complex64) \
1552
+ CPU_HOST_BINARY(name, complex64, float64, complex128) \
1553
+ CPU_HOST_NOIMPL(name, complex64, complex32, complex64) \
1554
+ CPU_HOST_BINARY(name, complex64, complex64, complex64) \
1555
+ CPU_HOST_BINARY(name, complex64, complex128, complex128) \
1556
+ \
1557
+ CPU_HOST_BINARY(name, complex128, uint8, complex128) \
1558
+ CPU_HOST_BINARY(name, complex128, uint16, complex128) \
1559
+ CPU_HOST_BINARY(name, complex128, uint32, complex128) \
1560
+ CPU_HOST_BINARY(name, complex128, int8, complex128) \
1561
+ CPU_HOST_BINARY(name, complex128, int16, complex128) \
1562
+ CPU_HOST_BINARY(name, complex128, int32, complex128) \
1563
+ CPU_HOST_BINARY(name, complex128, bfloat16, complex128) \
1564
+ CPU_HOST_NOIMPL(name, complex128, float16, complex128) \
1565
+ CPU_HOST_BINARY(name, complex128, float32, complex128) \
1566
+ CPU_HOST_BINARY(name, complex128, float64, complex128) \
1567
+ CPU_HOST_NOIMPL(name, complex128, complex32, complex128) \
1568
+ CPU_HOST_BINARY(name, complex128, complex64, complex128) \
1569
+ CPU_HOST_BINARY(name, complex128, complex128, complex128)
1570
+
1571
+ #define CPU_HOST_ALL_ARITHMETIC_INIT(name) \
1572
+ CPU_HOST_BINARY_INIT(name, uint8, uint8, uint8), \
1573
+ CPU_HOST_BINARY_INIT(name, uint8, uint16, uint16), \
1574
+ CPU_HOST_BINARY_INIT(name, uint8, uint32, uint32), \
1575
+ CPU_HOST_BINARY_INIT(name, uint8, uint64, uint64), \
1576
+ CPU_HOST_BINARY_INIT(name, uint8, int8, int16), \
1577
+ CPU_HOST_BINARY_INIT(name, uint8, int16, int16), \
1578
+ CPU_HOST_BINARY_INIT(name, uint8, int32, int32), \
1579
+ CPU_HOST_BINARY_INIT(name, uint8, int64, int64), \
1580
+ CPU_HOST_BINARY_INIT(name, uint8, bfloat16, bfloat16), \
1581
+ CPU_HOST_BINARY_INIT(name, uint8, float16, float16), \
1582
+ CPU_HOST_BINARY_INIT(name, uint8, float32, float32), \
1583
+ CPU_HOST_BINARY_INIT(name, uint8, float64, float64), \
1584
+ CPU_HOST_BINARY_INIT(name, uint8, complex32, complex32), \
1585
+ CPU_HOST_BINARY_INIT(name, uint8, complex64, complex64), \
1586
+ CPU_HOST_BINARY_INIT(name, uint8, complex128, complex128), \
1587
+ \
1588
+ CPU_HOST_BINARY_INIT(name, uint16, uint8, uint16), \
1589
+ CPU_HOST_BINARY_INIT(name, uint16, uint16, uint16), \
1590
+ CPU_HOST_BINARY_INIT(name, uint16, uint32, uint32), \
1591
+ CPU_HOST_BINARY_INIT(name, uint16, uint64, uint64), \
1592
+ CPU_HOST_BINARY_INIT(name, uint16, int8, int32), \
1593
+ CPU_HOST_BINARY_INIT(name, uint16, int16, int32), \
1594
+ CPU_HOST_BINARY_INIT(name, uint16, int32, int32), \
1595
+ CPU_HOST_BINARY_INIT(name, uint16, int64, int64), \
1596
+ CPU_HOST_BINARY_INIT(name, uint16, bfloat16, float32), \
1597
+ CPU_HOST_BINARY_INIT(name, uint16, float16, float32), \
1598
+ CPU_HOST_BINARY_INIT(name, uint16, float32, float32), \
1599
+ CPU_HOST_BINARY_INIT(name, uint16, float64, float64), \
1600
+ CPU_HOST_BINARY_INIT(name, uint16, complex32, complex64), \
1601
+ CPU_HOST_BINARY_INIT(name, uint16, complex64, complex64), \
1602
+ CPU_HOST_BINARY_INIT(name, uint16, complex128, complex128), \
1603
+ \
1604
+ CPU_HOST_BINARY_INIT(name, uint32, uint8, uint32), \
1605
+ CPU_HOST_BINARY_INIT(name, uint32, uint16, uint32), \
1606
+ CPU_HOST_BINARY_INIT(name, uint32, uint32, uint32), \
1607
+ CPU_HOST_BINARY_INIT(name, uint32, uint64, uint64), \
1608
+ CPU_HOST_BINARY_INIT(name, uint32, int8, int64), \
1609
+ CPU_HOST_BINARY_INIT(name, uint32, int16, int64), \
1610
+ CPU_HOST_BINARY_INIT(name, uint32, int32, int64), \
1611
+ CPU_HOST_BINARY_INIT(name, uint32, int64, int64), \
1612
+ CPU_HOST_BINARY_INIT(name, uint32, bfloat16, float64), \
1613
+ CPU_HOST_BINARY_INIT(name, uint32, float16, float64), \
1614
+ CPU_HOST_BINARY_INIT(name, uint32, float32, float64), \
1615
+ CPU_HOST_BINARY_INIT(name, uint32, float64, float64), \
1616
+ CPU_HOST_BINARY_INIT(name, uint32, complex32, complex128), \
1617
+ CPU_HOST_BINARY_INIT(name, uint32, complex64, complex128), \
1618
+ CPU_HOST_BINARY_INIT(name, uint32, complex128, complex128), \
1619
+ \
1620
+ CPU_HOST_BINARY_INIT(name, uint64, uint8, uint64), \
1621
+ CPU_HOST_BINARY_INIT(name, uint64, uint16, uint64), \
1622
+ CPU_HOST_BINARY_INIT(name, uint64, uint32, uint64), \
1623
+ CPU_HOST_BINARY_INIT(name, uint64, uint64, uint64), \
1624
+ \
1625
+ CPU_HOST_BINARY_INIT(name, int8, uint8, int16), \
1626
+ CPU_HOST_BINARY_INIT(name, int8, uint16, int32), \
1627
+ CPU_HOST_BINARY_INIT(name, int8, uint32, int64), \
1628
+ CPU_HOST_BINARY_INIT(name, int8, int8, int8), \
1629
+ CPU_HOST_BINARY_INIT(name, int8, int16, int16), \
1630
+ CPU_HOST_BINARY_INIT(name, int8, int32, int32), \
1631
+ CPU_HOST_BINARY_INIT(name, int8, int64, int64), \
1632
+ CPU_HOST_BINARY_INIT(name, int8, bfloat16, bfloat16), \
1633
+ CPU_HOST_BINARY_INIT(name, int8, float16, float16), \
1634
+ CPU_HOST_BINARY_INIT(name, int8, float32, float32), \
1635
+ CPU_HOST_BINARY_INIT(name, int8, float64, float64), \
1636
+ CPU_HOST_BINARY_INIT(name, int8, complex32, complex32), \
1637
+ CPU_HOST_BINARY_INIT(name, int8, complex64, complex64), \
1638
+ CPU_HOST_BINARY_INIT(name, int8, complex128, complex128), \
1639
+ \
1640
+ CPU_HOST_BINARY_INIT(name, int16, uint8, int16), \
1641
+ CPU_HOST_BINARY_INIT(name, int16, uint16, int32), \
1642
+ CPU_HOST_BINARY_INIT(name, int16, uint32, int64), \
1643
+ CPU_HOST_BINARY_INIT(name, int16, int8, int16), \
1644
+ CPU_HOST_BINARY_INIT(name, int16, int16, int16), \
1645
+ CPU_HOST_BINARY_INIT(name, int16, int32, int32), \
1646
+ CPU_HOST_BINARY_INIT(name, int16, int64, int64), \
1647
+ CPU_HOST_BINARY_INIT(name, int16, bfloat16, float32), \
1648
+ CPU_HOST_BINARY_INIT(name, int16, float16, float32), \
1649
+ CPU_HOST_BINARY_INIT(name, int16, float32, float32), \
1650
+ CPU_HOST_BINARY_INIT(name, int16, float64, float64), \
1651
+ CPU_HOST_BINARY_INIT(name, int16, complex32, complex64), \
1652
+ CPU_HOST_BINARY_INIT(name, int16, complex64, complex64), \
1653
+ CPU_HOST_BINARY_INIT(name, int16, complex128, complex128), \
1654
+ \
1655
+ CPU_HOST_BINARY_INIT(name, int32, uint8, int32), \
1656
+ CPU_HOST_BINARY_INIT(name, int32, uint16, int32), \
1657
+ CPU_HOST_BINARY_INIT(name, int32, uint32, int64), \
1658
+ CPU_HOST_BINARY_INIT(name, int32, int8, int32), \
1659
+ CPU_HOST_BINARY_INIT(name, int32, int16, int32), \
1660
+ CPU_HOST_BINARY_INIT(name, int32, int32, int32), \
1661
+ CPU_HOST_BINARY_INIT(name, int32, int64, int64), \
1662
+ CPU_HOST_BINARY_INIT(name, int32, bfloat16, float64), \
1663
+ CPU_HOST_BINARY_INIT(name, int32, float16, float64), \
1664
+ CPU_HOST_BINARY_INIT(name, int32, float32, float64), \
1665
+ CPU_HOST_BINARY_INIT(name, int32, float64, float64), \
1666
+ CPU_HOST_BINARY_INIT(name, int32, complex32, complex128), \
1667
+ CPU_HOST_BINARY_INIT(name, int32, complex64, complex128), \
1668
+ CPU_HOST_BINARY_INIT(name, int32, complex128, complex128), \
1669
+ \
1670
+ CPU_HOST_BINARY_INIT(name, int64, uint8, int64), \
1671
+ CPU_HOST_BINARY_INIT(name, int64, uint16, int64), \
1672
+ CPU_HOST_BINARY_INIT(name, int64, uint32, int64), \
1673
+ CPU_HOST_BINARY_INIT(name, int64, int8, int64), \
1674
+ CPU_HOST_BINARY_INIT(name, int64, int16, int64), \
1675
+ CPU_HOST_BINARY_INIT(name, int64, int32, int64), \
1676
+ CPU_HOST_BINARY_INIT(name, int64, int64, int64), \
1677
+ \
1678
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint8, bfloat16), \
1679
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint16, float32), \
1680
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint32, float64), \
1681
+ CPU_HOST_BINARY_INIT(name, bfloat16, int8, bfloat16), \
1682
+ CPU_HOST_BINARY_INIT(name, bfloat16, int16, float32), \
1683
+ CPU_HOST_BINARY_INIT(name, bfloat16, int32, float64), \
1684
+ CPU_HOST_BINARY_INIT(name, bfloat16, bfloat16, bfloat16), \
1685
+ CPU_HOST_BINARY_INIT(name, bfloat16, float16, float32), \
1686
+ CPU_HOST_BINARY_INIT(name, bfloat16, float32, float32), \
1687
+ CPU_HOST_BINARY_INIT(name, bfloat16, float64, float64), \
1688
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex32, complex64), \
1689
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex64, complex64), \
1690
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex128, complex128), \
1691
+ \
1692
+ CPU_HOST_BINARY_INIT(name, float16, uint8, float16), \
1693
+ CPU_HOST_BINARY_INIT(name, float16, uint16, float32), \
1694
+ CPU_HOST_BINARY_INIT(name, float16, uint32, float64), \
1695
+ CPU_HOST_BINARY_INIT(name, float16, int8, float16), \
1696
+ CPU_HOST_BINARY_INIT(name, float16, int16, float32), \
1697
+ CPU_HOST_BINARY_INIT(name, float16, int32, float64), \
1698
+ CPU_HOST_BINARY_INIT(name, float16, bfloat16, float32), \
1699
+ CPU_HOST_BINARY_INIT(name, float16, float16, float16), \
1700
+ CPU_HOST_BINARY_INIT(name, float16, float32, float32), \
1701
+ CPU_HOST_BINARY_INIT(name, float16, float64, float64), \
1702
+ CPU_HOST_BINARY_INIT(name, float16, complex32, complex32), \
1703
+ CPU_HOST_BINARY_INIT(name, float16, complex64, complex64), \
1704
+ CPU_HOST_BINARY_INIT(name, float16, complex128, complex128), \
1705
+ \
1706
+ CPU_HOST_BINARY_INIT(name, float32, uint8, float32), \
1707
+ CPU_HOST_BINARY_INIT(name, float32, uint16, float32), \
1708
+ CPU_HOST_BINARY_INIT(name, float32, uint32, float64), \
1709
+ CPU_HOST_BINARY_INIT(name, float32, int8, float32), \
1710
+ CPU_HOST_BINARY_INIT(name, float32, int16, float32), \
1711
+ CPU_HOST_BINARY_INIT(name, float32, int32, float64), \
1712
+ CPU_HOST_BINARY_INIT(name, float32, bfloat16, float32), \
1713
+ CPU_HOST_BINARY_INIT(name, float32, float16, float32), \
1714
+ CPU_HOST_BINARY_INIT(name, float32, float32, float32), \
1715
+ CPU_HOST_BINARY_INIT(name, float32, float64, float64), \
1716
+ CPU_HOST_BINARY_INIT(name, float32, complex32, complex64), \
1717
+ CPU_HOST_BINARY_INIT(name, float32, complex64, complex64), \
1718
+ CPU_HOST_BINARY_INIT(name, float32, complex128, complex128), \
1719
+ \
1720
+ CPU_HOST_BINARY_INIT(name, float64, uint8, float64), \
1721
+ CPU_HOST_BINARY_INIT(name, float64, uint16, float64), \
1722
+ CPU_HOST_BINARY_INIT(name, float64, uint32, float64), \
1723
+ CPU_HOST_BINARY_INIT(name, float64, int8, float64), \
1724
+ CPU_HOST_BINARY_INIT(name, float64, int16, float64), \
1725
+ CPU_HOST_BINARY_INIT(name, float64, int32, float64), \
1726
+ CPU_HOST_BINARY_INIT(name, float64, bfloat16, float64), \
1727
+ CPU_HOST_BINARY_INIT(name, float64, float16, float64), \
1728
+ CPU_HOST_BINARY_INIT(name, float64, float32, float64), \
1729
+ CPU_HOST_BINARY_INIT(name, float64, float64, float64), \
1730
+ CPU_HOST_BINARY_INIT(name, float64, complex32, complex128), \
1731
+ CPU_HOST_BINARY_INIT(name, float64, complex64, complex128), \
1732
+ CPU_HOST_BINARY_INIT(name, float64, complex128, complex128), \
1733
+ \
1734
+ CPU_HOST_BINARY_INIT(name, complex32, uint8, complex32), \
1735
+ CPU_HOST_BINARY_INIT(name, complex32, uint16, complex64), \
1736
+ CPU_HOST_BINARY_INIT(name, complex32, uint32, complex128), \
1737
+ CPU_HOST_BINARY_INIT(name, complex32, int8, complex32), \
1738
+ CPU_HOST_BINARY_INIT(name, complex32, int16, complex64), \
1739
+ CPU_HOST_BINARY_INIT(name, complex32, int32, complex128), \
1740
+ CPU_HOST_BINARY_INIT(name, complex32, bfloat16, complex64), \
1741
+ CPU_HOST_BINARY_INIT(name, complex32, float16, complex32), \
1742
+ CPU_HOST_BINARY_INIT(name, complex32, float32, complex64), \
1743
+ CPU_HOST_BINARY_INIT(name, complex32, float64, complex128), \
1744
+ CPU_HOST_BINARY_INIT(name, complex32, complex32, complex32), \
1745
+ CPU_HOST_BINARY_INIT(name, complex32, complex64, complex64), \
1746
+ CPU_HOST_BINARY_INIT(name, complex32, complex128, complex128), \
1747
+ \
1748
+ CPU_HOST_BINARY_INIT(name, complex64, uint8, complex64), \
1749
+ CPU_HOST_BINARY_INIT(name, complex64, uint16, complex64), \
1750
+ CPU_HOST_BINARY_INIT(name, complex64, uint32, complex128), \
1751
+ CPU_HOST_BINARY_INIT(name, complex64, int8, complex64), \
1752
+ CPU_HOST_BINARY_INIT(name, complex64, int16, complex64), \
1753
+ CPU_HOST_BINARY_INIT(name, complex64, int32, complex128), \
1754
+ CPU_HOST_BINARY_INIT(name, complex64, bfloat16, complex64), \
1755
+ CPU_HOST_BINARY_INIT(name, complex64, float16, complex64), \
1756
+ CPU_HOST_BINARY_INIT(name, complex64, float32, complex64), \
1757
+ CPU_HOST_BINARY_INIT(name, complex64, float64, complex128), \
1758
+ CPU_HOST_BINARY_INIT(name, complex64, complex32, complex64), \
1759
+ CPU_HOST_BINARY_INIT(name, complex64, complex64, complex64), \
1760
+ CPU_HOST_BINARY_INIT(name, complex64, complex128, complex128), \
1761
+ \
1762
+ CPU_HOST_BINARY_INIT(name, complex128, uint8, complex128), \
1763
+ CPU_HOST_BINARY_INIT(name, complex128, uint16, complex128), \
1764
+ CPU_HOST_BINARY_INIT(name, complex128, uint32, complex128), \
1765
+ CPU_HOST_BINARY_INIT(name, complex128, int8, complex128), \
1766
+ CPU_HOST_BINARY_INIT(name, complex128, int16, complex128), \
1767
+ CPU_HOST_BINARY_INIT(name, complex128, int32, complex128), \
1768
+ CPU_HOST_BINARY_INIT(name, complex128, bfloat16, complex128), \
1769
+ CPU_HOST_BINARY_INIT(name, complex128, float16, complex128), \
1770
+ CPU_HOST_BINARY_INIT(name, complex128, float32, complex128), \
1771
+ CPU_HOST_BINARY_INIT(name, complex128, float64, complex128), \
1772
+ CPU_HOST_BINARY_INIT(name, complex128, complex32, complex128), \
1773
+ CPU_HOST_BINARY_INIT(name, complex128, complex64, complex128), \
1774
+ CPU_HOST_BINARY_INIT(name, complex128, complex128, complex128)
1775
+
1776
+ #define CPU_HOST_ALL_ARITHMETIC_FLOAT_RETURN_INIT(name) \
1777
+ CPU_HOST_BINARY_INIT(name, uint8, uint8, float16), \
1778
+ CPU_HOST_BINARY_INIT(name, uint8, uint16, float32), \
1779
+ CPU_HOST_BINARY_INIT(name, uint8, uint32, float64), \
1780
+ CPU_HOST_BINARY_INIT(name, uint8, uint64, uint64), \
1781
+ CPU_HOST_BINARY_INIT(name, uint8, int8, float16), \
1782
+ CPU_HOST_BINARY_INIT(name, uint8, int16, float32), \
1783
+ CPU_HOST_BINARY_INIT(name, uint8, int32, float64), \
1784
+ CPU_HOST_BINARY_INIT(name, uint8, int64, int64), \
1785
+ CPU_HOST_BINARY_INIT(name, uint8, bfloat16, bfloat16), \
1786
+ CPU_HOST_BINARY_INIT(name, uint8, float16, float16), \
1787
+ CPU_HOST_BINARY_INIT(name, uint8, float32, float32), \
1788
+ CPU_HOST_BINARY_INIT(name, uint8, float64, float64), \
1789
+ CPU_HOST_BINARY_INIT(name, uint8, complex32, complex32), \
1790
+ CPU_HOST_BINARY_INIT(name, uint8, complex64, complex64), \
1791
+ CPU_HOST_BINARY_INIT(name, uint8, complex128, complex128), \
1792
+ \
1793
+ CPU_HOST_BINARY_INIT(name, uint16, uint8, float32), \
1794
+ CPU_HOST_BINARY_INIT(name, uint16, uint16, float32), \
1795
+ CPU_HOST_BINARY_INIT(name, uint16, uint32, float64), \
1796
+ CPU_HOST_BINARY_INIT(name, uint16, uint64, uint64), \
1797
+ CPU_HOST_BINARY_INIT(name, uint16, int8, float32), \
1798
+ CPU_HOST_BINARY_INIT(name, uint16, int16, float32), \
1799
+ CPU_HOST_BINARY_INIT(name, uint16, int32, float64), \
1800
+ CPU_HOST_BINARY_INIT(name, uint16, int64, int64), \
1801
+ CPU_HOST_BINARY_INIT(name, uint16, bfloat16, float32), \
1802
+ CPU_HOST_BINARY_INIT(name, uint16, float16, float32), \
1803
+ CPU_HOST_BINARY_INIT(name, uint16, float32, float32), \
1804
+ CPU_HOST_BINARY_INIT(name, uint16, float64, float64), \
1805
+ CPU_HOST_BINARY_INIT(name, uint16, complex32, complex64), \
1806
+ CPU_HOST_BINARY_INIT(name, uint16, complex64, complex64), \
1807
+ CPU_HOST_BINARY_INIT(name, uint16, complex128, complex128), \
1808
+ \
1809
+ CPU_HOST_BINARY_INIT(name, uint32, uint8, float64), \
1810
+ CPU_HOST_BINARY_INIT(name, uint32, uint16, float64), \
1811
+ CPU_HOST_BINARY_INIT(name, uint32, uint32, float64), \
1812
+ CPU_HOST_BINARY_INIT(name, uint32, uint64, uint64), \
1813
+ CPU_HOST_BINARY_INIT(name, uint32, int8, float64), \
1814
+ CPU_HOST_BINARY_INIT(name, uint32, int16, float64), \
1815
+ CPU_HOST_BINARY_INIT(name, uint32, int32, float64), \
1816
+ CPU_HOST_BINARY_INIT(name, uint32, int64, int64), \
1817
+ CPU_HOST_BINARY_INIT(name, uint32, bfloat16, float64), \
1818
+ CPU_HOST_BINARY_INIT(name, uint32, float16, float64), \
1819
+ CPU_HOST_BINARY_INIT(name, uint32, float32, float64), \
1820
+ CPU_HOST_BINARY_INIT(name, uint32, float64, float64), \
1821
+ CPU_HOST_BINARY_INIT(name, uint32, complex32, complex128), \
1822
+ CPU_HOST_BINARY_INIT(name, uint32, complex64, complex128), \
1823
+ CPU_HOST_BINARY_INIT(name, uint32, complex128, complex128), \
1824
+ \
1825
+ CPU_HOST_BINARY_INIT(name, uint64, uint8, uint64), \
1826
+ CPU_HOST_BINARY_INIT(name, uint64, uint16, uint64), \
1827
+ CPU_HOST_BINARY_INIT(name, uint64, uint32, uint64), \
1828
+ CPU_HOST_BINARY_INIT(name, uint64, uint64, uint64), \
1829
+ \
1830
+ CPU_HOST_BINARY_INIT(name, int8, uint8, float16), \
1831
+ CPU_HOST_BINARY_INIT(name, int8, uint16, float32), \
1832
+ CPU_HOST_BINARY_INIT(name, int8, uint32, float64), \
1833
+ CPU_HOST_BINARY_INIT(name, int8, int8, float16), \
1834
+ CPU_HOST_BINARY_INIT(name, int8, int16, float32), \
1835
+ CPU_HOST_BINARY_INIT(name, int8, int32, float64), \
1836
+ CPU_HOST_BINARY_INIT(name, int8, int64, int64), \
1837
+ CPU_HOST_BINARY_INIT(name, int8, bfloat16, bfloat16), \
1838
+ CPU_HOST_BINARY_INIT(name, int8, float16, float16), \
1839
+ CPU_HOST_BINARY_INIT(name, int8, float32, float32), \
1840
+ CPU_HOST_BINARY_INIT(name, int8, float64, float64), \
1841
+ CPU_HOST_BINARY_INIT(name, int8, complex32, complex32), \
1842
+ CPU_HOST_BINARY_INIT(name, int8, complex64, complex64), \
1843
+ CPU_HOST_BINARY_INIT(name, int8, complex128, complex128), \
1844
+ \
1845
+ CPU_HOST_BINARY_INIT(name, int16, uint8, float32), \
1846
+ CPU_HOST_BINARY_INIT(name, int16, uint16, float32), \
1847
+ CPU_HOST_BINARY_INIT(name, int16, uint32, float64), \
1848
+ CPU_HOST_BINARY_INIT(name, int16, int8, float32), \
1849
+ CPU_HOST_BINARY_INIT(name, int16, int16, float32), \
1850
+ CPU_HOST_BINARY_INIT(name, int16, int32, float64), \
1851
+ CPU_HOST_BINARY_INIT(name, int16, int64, int64), \
1852
+ CPU_HOST_BINARY_INIT(name, int16, bfloat16, float32), \
1853
+ CPU_HOST_BINARY_INIT(name, int16, float16, float32), \
1854
+ CPU_HOST_BINARY_INIT(name, int16, float32, float32), \
1855
+ CPU_HOST_BINARY_INIT(name, int16, float64, float64), \
1856
+ CPU_HOST_BINARY_INIT(name, int16, complex32, complex64), \
1857
+ CPU_HOST_BINARY_INIT(name, int16, complex64, complex64), \
1858
+ CPU_HOST_BINARY_INIT(name, int16, complex128, complex128), \
1859
+ \
1860
+ CPU_HOST_BINARY_INIT(name, int32, uint8, float64), \
1861
+ CPU_HOST_BINARY_INIT(name, int32, uint16, float64), \
1862
+ CPU_HOST_BINARY_INIT(name, int32, uint32, float64), \
1863
+ CPU_HOST_BINARY_INIT(name, int32, int8, float64), \
1864
+ CPU_HOST_BINARY_INIT(name, int32, int16, float64), \
1865
+ CPU_HOST_BINARY_INIT(name, int32, int32, float64), \
1866
+ CPU_HOST_BINARY_INIT(name, int32, int64, int64), \
1867
+ CPU_HOST_BINARY_INIT(name, int32, bfloat16, float64), \
1868
+ CPU_HOST_BINARY_INIT(name, int32, float16, float64), \
1869
+ CPU_HOST_BINARY_INIT(name, int32, float32, float64), \
1870
+ CPU_HOST_BINARY_INIT(name, int32, float64, float64), \
1871
+ CPU_HOST_BINARY_INIT(name, int32, complex32, complex128), \
1872
+ CPU_HOST_BINARY_INIT(name, int32, complex64, complex128), \
1873
+ CPU_HOST_BINARY_INIT(name, int32, complex128, complex128), \
1874
+ \
1875
+ CPU_HOST_BINARY_INIT(name, int64, uint8, int64), \
1876
+ CPU_HOST_BINARY_INIT(name, int64, uint16, int64), \
1877
+ CPU_HOST_BINARY_INIT(name, int64, uint32, int64), \
1878
+ CPU_HOST_BINARY_INIT(name, int64, int8, int64), \
1879
+ CPU_HOST_BINARY_INIT(name, int64, int16, int64), \
1880
+ CPU_HOST_BINARY_INIT(name, int64, int32, int64), \
1881
+ CPU_HOST_BINARY_INIT(name, int64, int64, int64), \
1882
+ \
1883
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint8, bfloat16), \
1884
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint16, float32), \
1885
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint32, float64), \
1886
+ CPU_HOST_BINARY_INIT(name, bfloat16, int8, bfloat16), \
1887
+ CPU_HOST_BINARY_INIT(name, bfloat16, int16, float32), \
1888
+ CPU_HOST_BINARY_INIT(name, bfloat16, int32, float64), \
1889
+ CPU_HOST_BINARY_INIT(name, bfloat16, bfloat16, bfloat16), \
1890
+ CPU_HOST_BINARY_INIT(name, bfloat16, float16, float32), \
1891
+ CPU_HOST_BINARY_INIT(name, bfloat16, float32, float32), \
1892
+ CPU_HOST_BINARY_INIT(name, bfloat16, float64, float64), \
1893
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex32, complex64), \
1894
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex64, complex64), \
1895
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex128, complex128), \
1896
+ \
1897
+ CPU_HOST_BINARY_INIT(name, float16, uint8, float16), \
1898
+ CPU_HOST_BINARY_INIT(name, float16, uint16, float32), \
1899
+ CPU_HOST_BINARY_INIT(name, float16, uint32, float64), \
1900
+ CPU_HOST_BINARY_INIT(name, float16, int8, float16), \
1901
+ CPU_HOST_BINARY_INIT(name, float16, int16, float32), \
1902
+ CPU_HOST_BINARY_INIT(name, float16, int32, float64), \
1903
+ CPU_HOST_BINARY_INIT(name, float16, bfloat16, float32), \
1904
+ CPU_HOST_BINARY_INIT(name, float16, float16, float16), \
1905
+ CPU_HOST_BINARY_INIT(name, float16, float32, float32), \
1906
+ CPU_HOST_BINARY_INIT(name, float16, float64, float64), \
1907
+ CPU_HOST_BINARY_INIT(name, float16, complex32, complex32), \
1908
+ CPU_HOST_BINARY_INIT(name, float16, complex64, complex64), \
1909
+ CPU_HOST_BINARY_INIT(name, float16, complex128, complex128), \
1910
+ \
1911
+ CPU_HOST_BINARY_INIT(name, float32, uint8, float32), \
1912
+ CPU_HOST_BINARY_INIT(name, float32, uint16, float32), \
1913
+ CPU_HOST_BINARY_INIT(name, float32, uint32, float64), \
1914
+ CPU_HOST_BINARY_INIT(name, float32, int8, float32), \
1915
+ CPU_HOST_BINARY_INIT(name, float32, int16, float32), \
1916
+ CPU_HOST_BINARY_INIT(name, float32, int32, float64), \
1917
+ CPU_HOST_BINARY_INIT(name, float32, bfloat16, float32), \
1918
+ CPU_HOST_BINARY_INIT(name, float32, float16, float32), \
1919
+ CPU_HOST_BINARY_INIT(name, float32, float32, float32), \
1920
+ CPU_HOST_BINARY_INIT(name, float32, float64, float64), \
1921
+ CPU_HOST_BINARY_INIT(name, float32, complex32, complex64), \
1922
+ CPU_HOST_BINARY_INIT(name, float32, complex64, complex64), \
1923
+ CPU_HOST_BINARY_INIT(name, float32, complex128, complex128), \
1924
+ \
1925
+ CPU_HOST_BINARY_INIT(name, float64, uint8, float64), \
1926
+ CPU_HOST_BINARY_INIT(name, float64, uint16, float64), \
1927
+ CPU_HOST_BINARY_INIT(name, float64, uint32, float64), \
1928
+ CPU_HOST_BINARY_INIT(name, float64, int8, float64), \
1929
+ CPU_HOST_BINARY_INIT(name, float64, int16, float64), \
1930
+ CPU_HOST_BINARY_INIT(name, float64, int32, float64), \
1931
+ CPU_HOST_BINARY_INIT(name, float64, bfloat16, float64), \
1932
+ CPU_HOST_BINARY_INIT(name, float64, float16, float64), \
1933
+ CPU_HOST_BINARY_INIT(name, float64, float32, float64), \
1934
+ CPU_HOST_BINARY_INIT(name, float64, float64, float64), \
1935
+ CPU_HOST_BINARY_INIT(name, float64, complex32, complex128), \
1936
+ CPU_HOST_BINARY_INIT(name, float64, complex64, complex128), \
1937
+ CPU_HOST_BINARY_INIT(name, float64, complex128, complex128), \
1938
+ \
1939
+ CPU_HOST_BINARY_INIT(name, complex32, uint8, complex32), \
1940
+ CPU_HOST_BINARY_INIT(name, complex32, uint16, complex64), \
1941
+ CPU_HOST_BINARY_INIT(name, complex32, uint32, complex128), \
1942
+ CPU_HOST_BINARY_INIT(name, complex32, int8, complex32), \
1943
+ CPU_HOST_BINARY_INIT(name, complex32, int16, complex64), \
1944
+ CPU_HOST_BINARY_INIT(name, complex32, int32, complex128), \
1945
+ CPU_HOST_BINARY_INIT(name, complex32, bfloat16, complex64), \
1946
+ CPU_HOST_BINARY_INIT(name, complex32, float16, complex32), \
1947
+ CPU_HOST_BINARY_INIT(name, complex32, float32, complex64), \
1948
+ CPU_HOST_BINARY_INIT(name, complex32, float64, complex128), \
1949
+ CPU_HOST_BINARY_INIT(name, complex32, complex32, complex32), \
1950
+ CPU_HOST_BINARY_INIT(name, complex32, complex64, complex64), \
1951
+ CPU_HOST_BINARY_INIT(name, complex32, complex128, complex128), \
1952
+ \
1953
+ CPU_HOST_BINARY_INIT(name, complex64, uint8, complex64), \
1954
+ CPU_HOST_BINARY_INIT(name, complex64, uint16, complex64), \
1955
+ CPU_HOST_BINARY_INIT(name, complex64, uint32, complex128), \
1956
+ CPU_HOST_BINARY_INIT(name, complex64, int8, complex64), \
1957
+ CPU_HOST_BINARY_INIT(name, complex64, int16, complex64), \
1958
+ CPU_HOST_BINARY_INIT(name, complex64, int32, complex128), \
1959
+ CPU_HOST_BINARY_INIT(name, complex64, bfloat16, complex64), \
1960
+ CPU_HOST_BINARY_INIT(name, complex64, float16, complex64), \
1961
+ CPU_HOST_BINARY_INIT(name, complex64, float32, complex64), \
1962
+ CPU_HOST_BINARY_INIT(name, complex64, float64, complex128), \
1963
+ CPU_HOST_BINARY_INIT(name, complex64, complex32, complex64), \
1964
+ CPU_HOST_BINARY_INIT(name, complex64, complex64, complex64), \
1965
+ CPU_HOST_BINARY_INIT(name, complex64, complex128, complex128), \
1966
+ \
1967
+ CPU_HOST_BINARY_INIT(name, complex128, uint8, complex128), \
1968
+ CPU_HOST_BINARY_INIT(name, complex128, uint16, complex128), \
1969
+ CPU_HOST_BINARY_INIT(name, complex128, uint32, complex128), \
1970
+ CPU_HOST_BINARY_INIT(name, complex128, int8, complex128), \
1971
+ CPU_HOST_BINARY_INIT(name, complex128, int16, complex128), \
1972
+ CPU_HOST_BINARY_INIT(name, complex128, int32, complex128), \
1973
+ CPU_HOST_BINARY_INIT(name, complex128, bfloat16, complex128), \
1974
+ CPU_HOST_BINARY_INIT(name, complex128, float16, complex128), \
1975
+ CPU_HOST_BINARY_INIT(name, complex128, float32, complex128), \
1976
+ CPU_HOST_BINARY_INIT(name, complex128, float64, complex128), \
1977
+ CPU_HOST_BINARY_INIT(name, complex128, complex32, complex128), \
1978
+ CPU_HOST_BINARY_INIT(name, complex128, complex64, complex128), \
1979
+ CPU_HOST_BINARY_INIT(name, complex128, complex128, complex128)
1980
+
1981
+
1982
+ CPU_HOST_ALL_ARITHMETIC(add)
1983
+ CPU_HOST_ALL_ARITHMETIC(subtract)
1984
+ CPU_HOST_ALL_ARITHMETIC(multiply)
1985
+ CPU_HOST_ALL_ARITHMETIC_NO_COMPLEX(floor_divide)
1986
+ CPU_HOST_ALL_ARITHMETIC_NO_COMPLEX(remainder)
1987
+ CPU_HOST_ALL_ARITHMETIC_FLOAT_RETURN(divide)
1988
+ CPU_HOST_ALL_ARITHMETIC(power)
1989
+
1990
+
1991
+ /*****************************************************************************/
1992
+ /* Comparison */
1993
+ /*****************************************************************************/
1994
+
1995
+ #define CPU_HOST_ALL_COMPARISON(name) \
1996
+ CPU_HOST_BINARY(name, uint8, uint8, bool) \
1997
+ CPU_HOST_BINARY(name, uint8, uint16, bool) \
1998
+ CPU_HOST_BINARY(name, uint8, uint32, bool) \
1999
+ CPU_HOST_BINARY(name, uint8, uint64, bool) \
2000
+ CPU_HOST_BINARY(name, uint8, int8, bool) \
2001
+ CPU_HOST_BINARY(name, uint8, int16, bool) \
2002
+ CPU_HOST_BINARY(name, uint8, int32, bool) \
2003
+ CPU_HOST_BINARY(name, uint8, int64, bool) \
2004
+ CPU_HOST_BINARY(name, uint8, bfloat16, bool) \
2005
+ CPU_HOST_NOIMPL(name, uint8, float16, bool) \
2006
+ CPU_HOST_BINARY(name, uint8, float32, bool) \
2007
+ CPU_HOST_BINARY(name, uint8, float64, bool) \
2008
+ CPU_HOST_NOIMPL(name, uint8, complex32, bool) \
2009
+ CPU_HOST_BINARY(name, uint8, complex64, bool) \
2010
+ CPU_HOST_BINARY(name, uint8, complex128, bool) \
2011
+ \
2012
+ CPU_HOST_BINARY(name, uint16, uint8, bool) \
2013
+ CPU_HOST_BINARY(name, uint16, uint16, bool) \
2014
+ CPU_HOST_BINARY(name, uint16, uint32, bool) \
2015
+ CPU_HOST_BINARY(name, uint16, uint64, bool) \
2016
+ CPU_HOST_BINARY(name, uint16, int8, bool) \
2017
+ CPU_HOST_BINARY(name, uint16, int16, bool) \
2018
+ CPU_HOST_BINARY(name, uint16, int32, bool) \
2019
+ CPU_HOST_BINARY(name, uint16, int64, bool) \
2020
+ CPU_HOST_BINARY(name, uint16, bfloat16, bool) \
2021
+ CPU_HOST_NOIMPL(name, uint16, float16, bool) \
2022
+ CPU_HOST_BINARY(name, uint16, float32, bool) \
2023
+ CPU_HOST_BINARY(name, uint16, float64, bool) \
2024
+ CPU_HOST_NOIMPL(name, uint16, complex32, bool) \
2025
+ CPU_HOST_BINARY(name, uint16, complex64, bool) \
2026
+ CPU_HOST_BINARY(name, uint16, complex128, bool) \
2027
+ \
2028
+ CPU_HOST_BINARY(name, uint32, uint8, bool) \
2029
+ CPU_HOST_BINARY(name, uint32, uint16, bool) \
2030
+ CPU_HOST_BINARY(name, uint32, uint32, bool) \
2031
+ CPU_HOST_BINARY(name, uint32, uint64, bool) \
2032
+ CPU_HOST_BINARY(name, uint32, int8, bool) \
2033
+ CPU_HOST_BINARY(name, uint32, int16, bool) \
2034
+ CPU_HOST_BINARY(name, uint32, int32, bool) \
2035
+ CPU_HOST_BINARY(name, uint32, int64, bool) \
2036
+ CPU_HOST_BINARY(name, uint32, bfloat16, bool) \
2037
+ CPU_HOST_NOIMPL(name, uint32, float16, bool) \
2038
+ CPU_HOST_BINARY(name, uint32, float32, bool) \
2039
+ CPU_HOST_BINARY(name, uint32, float64, bool) \
2040
+ CPU_HOST_NOIMPL(name, uint32, complex32, bool) \
2041
+ CPU_HOST_BINARY(name, uint32, complex64, bool) \
2042
+ CPU_HOST_BINARY(name, uint32, complex128, bool) \
2043
+ \
2044
+ CPU_HOST_BINARY(name, uint64, uint8, bool) \
2045
+ CPU_HOST_BINARY(name, uint64, uint16, bool) \
2046
+ CPU_HOST_BINARY(name, uint64, uint32, bool) \
2047
+ CPU_HOST_BINARY(name, uint64, uint64, bool) \
2048
+ \
2049
+ CPU_HOST_BINARY(name, int8, uint8, bool) \
2050
+ CPU_HOST_BINARY(name, int8, uint16, bool) \
2051
+ CPU_HOST_BINARY(name, int8, uint32, bool) \
2052
+ CPU_HOST_BINARY(name, int8, int8, bool) \
2053
+ CPU_HOST_BINARY(name, int8, int16, bool) \
2054
+ CPU_HOST_BINARY(name, int8, int32, bool) \
2055
+ CPU_HOST_BINARY(name, int8, int64, bool) \
2056
+ CPU_HOST_BINARY(name, int8, bfloat16, bool) \
2057
+ CPU_HOST_NOIMPL(name, int8, float16, bool) \
2058
+ CPU_HOST_BINARY(name, int8, float32, bool) \
2059
+ CPU_HOST_BINARY(name, int8, float64, bool) \
2060
+ CPU_HOST_NOIMPL(name, int8, complex32, bool) \
2061
+ CPU_HOST_BINARY(name, int8, complex64, bool) \
2062
+ CPU_HOST_BINARY(name, int8, complex128, bool) \
2063
+ \
2064
+ CPU_HOST_BINARY(name, int16, uint8, bool) \
2065
+ CPU_HOST_BINARY(name, int16, uint16, bool) \
2066
+ CPU_HOST_BINARY(name, int16, uint32, bool) \
2067
+ CPU_HOST_BINARY(name, int16, int8, bool) \
2068
+ CPU_HOST_BINARY(name, int16, int16, bool) \
2069
+ CPU_HOST_BINARY(name, int16, int32, bool) \
2070
+ CPU_HOST_BINARY(name, int16, int64, bool) \
2071
+ CPU_HOST_BINARY(name, int16, bfloat16, bool) \
2072
+ CPU_HOST_NOIMPL(name, int16, float16, bool) \
2073
+ CPU_HOST_BINARY(name, int16, float32, bool) \
2074
+ CPU_HOST_BINARY(name, int16, float64, bool) \
2075
+ CPU_HOST_NOIMPL(name, int16, complex32, bool) \
2076
+ CPU_HOST_BINARY(name, int16, complex64, bool) \
2077
+ CPU_HOST_BINARY(name, int16, complex128, bool) \
2078
+ \
2079
+ CPU_HOST_BINARY(name, int32, uint8, bool) \
2080
+ CPU_HOST_BINARY(name, int32, uint16, bool) \
2081
+ CPU_HOST_BINARY(name, int32, uint32, bool) \
2082
+ CPU_HOST_BINARY(name, int32, int8, bool) \
2083
+ CPU_HOST_BINARY(name, int32, int16, bool) \
2084
+ CPU_HOST_BINARY(name, int32, int32, bool) \
2085
+ CPU_HOST_BINARY(name, int32, int64, bool) \
2086
+ CPU_HOST_BINARY(name, int32, bfloat16, bool) \
2087
+ CPU_HOST_NOIMPL(name, int32, float16, bool) \
2088
+ CPU_HOST_BINARY(name, int32, float32, bool) \
2089
+ CPU_HOST_BINARY(name, int32, float64, bool) \
2090
+ CPU_HOST_NOIMPL(name, int32, complex32, bool) \
2091
+ CPU_HOST_BINARY(name, int32, complex64, bool) \
2092
+ CPU_HOST_BINARY(name, int32, complex128, bool) \
2093
+ \
2094
+ CPU_HOST_BINARY(name, int64, uint8, bool) \
2095
+ CPU_HOST_BINARY(name, int64, uint16, bool) \
2096
+ CPU_HOST_BINARY(name, int64, uint32, bool) \
2097
+ CPU_HOST_BINARY(name, int64, int8, bool) \
2098
+ CPU_HOST_BINARY(name, int64, int16, bool) \
2099
+ CPU_HOST_BINARY(name, int64, int32, bool) \
2100
+ CPU_HOST_BINARY(name, int64, int64, bool) \
2101
+ \
2102
+ CPU_HOST_BINARY(name, bfloat16, uint8, bool) \
2103
+ CPU_HOST_BINARY(name, bfloat16, uint16, bool) \
2104
+ CPU_HOST_BINARY(name, bfloat16, uint32, bool) \
2105
+ CPU_HOST_BINARY(name, bfloat16, int8, bool) \
2106
+ CPU_HOST_BINARY(name, bfloat16, int16, bool) \
2107
+ CPU_HOST_BINARY(name, bfloat16, int32, bool) \
2108
+ CPU_HOST_BINARY(name, bfloat16, bfloat16, bool) \
2109
+ CPU_HOST_NOIMPL(name, bfloat16, float16, bool) \
2110
+ CPU_HOST_BINARY(name, bfloat16, float32, bool) \
2111
+ CPU_HOST_BINARY(name, bfloat16, float64, bool) \
2112
+ CPU_HOST_NOIMPL(name, bfloat16, complex32, bool) \
2113
+ CPU_HOST_BINARY(name, bfloat16, complex64, bool) \
2114
+ CPU_HOST_BINARY(name, bfloat16, complex128, bool) \
2115
+ \
2116
+ CPU_HOST_NOIMPL(name, float16, uint8, bool) \
2117
+ CPU_HOST_NOIMPL(name, float16, uint16, bool) \
2118
+ CPU_HOST_NOIMPL(name, float16, uint32, bool) \
2119
+ CPU_HOST_NOIMPL(name, float16, int8, bool) \
2120
+ CPU_HOST_NOIMPL(name, float16, int16, bool) \
2121
+ CPU_HOST_NOIMPL(name, float16, int32, bool) \
2122
+ CPU_HOST_NOIMPL(name, float16, bfloat16, bool) \
2123
+ CPU_HOST_NOIMPL(name, float16, float16, bool) \
2124
+ CPU_HOST_NOIMPL(name, float16, float32, bool) \
2125
+ CPU_HOST_NOIMPL(name, float16, float64, bool) \
2126
+ CPU_HOST_NOIMPL(name, float16, complex32, bool) \
2127
+ CPU_HOST_NOIMPL(name, float16, complex64, bool) \
2128
+ CPU_HOST_NOIMPL(name, float16, complex128, bool) \
2129
+ \
2130
+ CPU_HOST_BINARY(name, float32, uint8, bool) \
2131
+ CPU_HOST_BINARY(name, float32, uint16, bool) \
2132
+ CPU_HOST_BINARY(name, float32, uint32, bool) \
2133
+ CPU_HOST_BINARY(name, float32, int8, bool) \
2134
+ CPU_HOST_BINARY(name, float32, int16, bool) \
2135
+ CPU_HOST_BINARY(name, float32, int32, bool) \
2136
+ CPU_HOST_BINARY(name, float32, bfloat16, bool) \
2137
+ CPU_HOST_NOIMPL(name, float32, float16, bool) \
2138
+ CPU_HOST_BINARY(name, float32, float32, bool) \
2139
+ CPU_HOST_BINARY(name, float32, float64, bool) \
2140
+ CPU_HOST_NOIMPL(name, float32, complex32, bool) \
2141
+ CPU_HOST_BINARY(name, float32, complex64, bool) \
2142
+ CPU_HOST_BINARY(name, float32, complex128, bool) \
2143
+ \
2144
+ CPU_HOST_BINARY(name, float64, uint8, bool) \
2145
+ CPU_HOST_BINARY(name, float64, uint16, bool) \
2146
+ CPU_HOST_BINARY(name, float64, uint32, bool) \
2147
+ CPU_HOST_BINARY(name, float64, int8, bool) \
2148
+ CPU_HOST_BINARY(name, float64, int16, bool) \
2149
+ CPU_HOST_BINARY(name, float64, int32, bool) \
2150
+ CPU_HOST_BINARY(name, float64, bfloat16, bool) \
2151
+ CPU_HOST_NOIMPL(name, float64, float16, bool) \
2152
+ CPU_HOST_BINARY(name, float64, float32, bool) \
2153
+ CPU_HOST_BINARY(name, float64, float64, bool) \
2154
+ CPU_HOST_NOIMPL(name, float64, complex32, bool) \
2155
+ CPU_HOST_BINARY(name, float64, complex64, bool) \
2156
+ CPU_HOST_BINARY(name, float64, complex128, bool) \
2157
+ \
2158
+ CPU_HOST_NOIMPL(name, complex32, uint8, bool) \
2159
+ CPU_HOST_NOIMPL(name, complex32, uint16, bool) \
2160
+ CPU_HOST_NOIMPL(name, complex32, uint32, bool) \
2161
+ CPU_HOST_NOIMPL(name, complex32, int8, bool) \
2162
+ CPU_HOST_NOIMPL(name, complex32, int16, bool) \
2163
+ CPU_HOST_NOIMPL(name, complex32, int32, bool) \
2164
+ CPU_HOST_NOIMPL(name, complex32, bfloat16, bool) \
2165
+ CPU_HOST_NOIMPL(name, complex32, float16, bool) \
2166
+ CPU_HOST_NOIMPL(name, complex32, float32, bool) \
2167
+ CPU_HOST_NOIMPL(name, complex32, float64, bool) \
2168
+ CPU_HOST_NOIMPL(name, complex32, complex32, bool) \
2169
+ CPU_HOST_NOIMPL(name, complex32, complex64, bool) \
2170
+ CPU_HOST_NOIMPL(name, complex32, complex128, bool) \
2171
+ \
2172
+ CPU_HOST_BINARY(name, complex64, uint8, bool) \
2173
+ CPU_HOST_BINARY(name, complex64, uint16, bool) \
2174
+ CPU_HOST_BINARY(name, complex64, uint32, bool) \
2175
+ CPU_HOST_BINARY(name, complex64, int8, bool) \
2176
+ CPU_HOST_BINARY(name, complex64, int16, bool) \
2177
+ CPU_HOST_BINARY(name, complex64, int32, bool) \
2178
+ CPU_HOST_BINARY(name, complex64, bfloat16, bool) \
2179
+ CPU_HOST_NOIMPL(name, complex64, float16, bool) \
2180
+ CPU_HOST_BINARY(name, complex64, float32, bool) \
2181
+ CPU_HOST_BINARY(name, complex64, float64, bool) \
2182
+ CPU_HOST_NOIMPL(name, complex64, complex32, bool) \
2183
+ CPU_HOST_BINARY(name, complex64, complex64, bool) \
2184
+ CPU_HOST_BINARY(name, complex64, complex128, bool) \
2185
+ \
2186
+ CPU_HOST_BINARY(name, complex128, uint8, bool) \
2187
+ CPU_HOST_BINARY(name, complex128, uint16, bool) \
2188
+ CPU_HOST_BINARY(name, complex128, uint32, bool) \
2189
+ CPU_HOST_BINARY(name, complex128, int8, bool) \
2190
+ CPU_HOST_BINARY(name, complex128, int16, bool) \
2191
+ CPU_HOST_BINARY(name, complex128, int32, bool) \
2192
+ CPU_HOST_BINARY(name, complex128, bfloat16, bool) \
2193
+ CPU_HOST_NOIMPL(name, complex128, float16, bool) \
2194
+ CPU_HOST_BINARY(name, complex128, float32, bool) \
2195
+ CPU_HOST_BINARY(name, complex128, float64, bool) \
2196
+ CPU_HOST_NOIMPL(name, complex128, complex32, bool) \
2197
+ CPU_HOST_BINARY(name, complex128, complex64, bool) \
2198
+ CPU_HOST_BINARY(name, complex128, complex128, bool)
2199
+
2200
+ #define CPU_HOST_ALL_COMPARISON_INIT(name) \
2201
+ CPU_HOST_BINARY_INIT(name, uint8, uint8, bool), \
2202
+ CPU_HOST_BINARY_INIT(name, uint8, uint16, bool), \
2203
+ CPU_HOST_BINARY_INIT(name, uint8, uint32, bool), \
2204
+ CPU_HOST_BINARY_INIT(name, uint8, uint64, bool), \
2205
+ CPU_HOST_BINARY_INIT(name, uint8, int8, bool), \
2206
+ CPU_HOST_BINARY_INIT(name, uint8, int16, bool), \
2207
+ CPU_HOST_BINARY_INIT(name, uint8, int32, bool), \
2208
+ CPU_HOST_BINARY_INIT(name, uint8, int64, bool), \
2209
+ CPU_HOST_BINARY_INIT(name, uint8, bfloat16, bool), \
2210
+ CPU_HOST_BINARY_INIT(name, uint8, float16, bool), \
2211
+ CPU_HOST_BINARY_INIT(name, uint8, float32, bool), \
2212
+ CPU_HOST_BINARY_INIT(name, uint8, float64, bool), \
2213
+ CPU_HOST_BINARY_INIT(name, uint8, complex32, bool), \
2214
+ CPU_HOST_BINARY_INIT(name, uint8, complex64, bool), \
2215
+ CPU_HOST_BINARY_INIT(name, uint8, complex128, bool), \
2216
+ \
2217
+ CPU_HOST_BINARY_INIT(name, uint16, uint8, bool), \
2218
+ CPU_HOST_BINARY_INIT(name, uint16, uint16, bool), \
2219
+ CPU_HOST_BINARY_INIT(name, uint16, uint32, bool), \
2220
+ CPU_HOST_BINARY_INIT(name, uint16, uint64, bool), \
2221
+ CPU_HOST_BINARY_INIT(name, uint16, int8, bool), \
2222
+ CPU_HOST_BINARY_INIT(name, uint16, int16, bool), \
2223
+ CPU_HOST_BINARY_INIT(name, uint16, int32, bool), \
2224
+ CPU_HOST_BINARY_INIT(name, uint16, int64, bool), \
2225
+ CPU_HOST_BINARY_INIT(name, uint16, bfloat16, bool), \
2226
+ CPU_HOST_BINARY_INIT(name, uint16, float16, bool), \
2227
+ CPU_HOST_BINARY_INIT(name, uint16, float32, bool), \
2228
+ CPU_HOST_BINARY_INIT(name, uint16, float64, bool), \
2229
+ CPU_HOST_BINARY_INIT(name, uint16, complex32, bool), \
2230
+ CPU_HOST_BINARY_INIT(name, uint16, complex64, bool), \
2231
+ CPU_HOST_BINARY_INIT(name, uint16, complex128, bool), \
2232
+ \
2233
+ CPU_HOST_BINARY_INIT(name, uint32, uint8, bool), \
2234
+ CPU_HOST_BINARY_INIT(name, uint32, uint16, bool), \
2235
+ CPU_HOST_BINARY_INIT(name, uint32, uint32, bool), \
2236
+ CPU_HOST_BINARY_INIT(name, uint32, uint64, bool), \
2237
+ CPU_HOST_BINARY_INIT(name, uint32, int8, bool), \
2238
+ CPU_HOST_BINARY_INIT(name, uint32, int16, bool), \
2239
+ CPU_HOST_BINARY_INIT(name, uint32, int32, bool), \
2240
+ CPU_HOST_BINARY_INIT(name, uint32, int64, bool), \
2241
+ CPU_HOST_BINARY_INIT(name, uint32, bfloat16, bool), \
2242
+ CPU_HOST_BINARY_INIT(name, uint32, float16, bool), \
2243
+ CPU_HOST_BINARY_INIT(name, uint32, float32, bool), \
2244
+ CPU_HOST_BINARY_INIT(name, uint32, float64, bool), \
2245
+ CPU_HOST_BINARY_INIT(name, uint32, complex32, bool), \
2246
+ CPU_HOST_BINARY_INIT(name, uint32, complex64, bool), \
2247
+ CPU_HOST_BINARY_INIT(name, uint32, complex128, bool), \
2248
+ \
2249
+ CPU_HOST_BINARY_INIT(name, uint64, uint8, bool), \
2250
+ CPU_HOST_BINARY_INIT(name, uint64, uint16, bool), \
2251
+ CPU_HOST_BINARY_INIT(name, uint64, uint32, bool), \
2252
+ CPU_HOST_BINARY_INIT(name, uint64, uint64, bool), \
2253
+ \
2254
+ CPU_HOST_BINARY_INIT(name, int8, uint8, bool), \
2255
+ CPU_HOST_BINARY_INIT(name, int8, uint16, bool), \
2256
+ CPU_HOST_BINARY_INIT(name, int8, uint32, bool), \
2257
+ CPU_HOST_BINARY_INIT(name, int8, int8, bool), \
2258
+ CPU_HOST_BINARY_INIT(name, int8, int16, bool), \
2259
+ CPU_HOST_BINARY_INIT(name, int8, int32, bool), \
2260
+ CPU_HOST_BINARY_INIT(name, int8, int64, bool), \
2261
+ CPU_HOST_BINARY_INIT(name, int8, bfloat16, bool), \
2262
+ CPU_HOST_BINARY_INIT(name, int8, float16, bool), \
2263
+ CPU_HOST_BINARY_INIT(name, int8, float32, bool), \
2264
+ CPU_HOST_BINARY_INIT(name, int8, float64, bool), \
2265
+ CPU_HOST_BINARY_INIT(name, int8, complex32, bool), \
2266
+ CPU_HOST_BINARY_INIT(name, int8, complex64, bool), \
2267
+ CPU_HOST_BINARY_INIT(name, int8, complex128, bool), \
2268
+ \
2269
+ CPU_HOST_BINARY_INIT(name, int16, uint8, bool), \
2270
+ CPU_HOST_BINARY_INIT(name, int16, uint16, bool), \
2271
+ CPU_HOST_BINARY_INIT(name, int16, uint32, bool), \
2272
+ CPU_HOST_BINARY_INIT(name, int16, int8, bool), \
2273
+ CPU_HOST_BINARY_INIT(name, int16, int16, bool), \
2274
+ CPU_HOST_BINARY_INIT(name, int16, int32, bool), \
2275
+ CPU_HOST_BINARY_INIT(name, int16, int64, bool), \
2276
+ CPU_HOST_BINARY_INIT(name, int16, bfloat16, bool), \
2277
+ CPU_HOST_BINARY_INIT(name, int16, float16, bool), \
2278
+ CPU_HOST_BINARY_INIT(name, int16, float32, bool), \
2279
+ CPU_HOST_BINARY_INIT(name, int16, float64, bool), \
2280
+ CPU_HOST_BINARY_INIT(name, int16, complex32, bool), \
2281
+ CPU_HOST_BINARY_INIT(name, int16, complex64, bool), \
2282
+ CPU_HOST_BINARY_INIT(name, int16, complex128, bool), \
2283
+ \
2284
+ CPU_HOST_BINARY_INIT(name, int32, uint8, bool), \
2285
+ CPU_HOST_BINARY_INIT(name, int32, uint16, bool), \
2286
+ CPU_HOST_BINARY_INIT(name, int32, uint32, bool), \
2287
+ CPU_HOST_BINARY_INIT(name, int32, int8, bool), \
2288
+ CPU_HOST_BINARY_INIT(name, int32, int16, bool), \
2289
+ CPU_HOST_BINARY_INIT(name, int32, int32, bool), \
2290
+ CPU_HOST_BINARY_INIT(name, int32, int64, bool), \
2291
+ CPU_HOST_BINARY_INIT(name, int32, bfloat16, bool), \
2292
+ CPU_HOST_BINARY_INIT(name, int32, float16, bool), \
2293
+ CPU_HOST_BINARY_INIT(name, int32, float32, bool), \
2294
+ CPU_HOST_BINARY_INIT(name, int32, float64, bool), \
2295
+ CPU_HOST_BINARY_INIT(name, int32, complex32, bool), \
2296
+ CPU_HOST_BINARY_INIT(name, int32, complex64, bool), \
2297
+ CPU_HOST_BINARY_INIT(name, int32, complex128, bool), \
2298
+ \
2299
+ CPU_HOST_BINARY_INIT(name, int64, uint8, bool), \
2300
+ CPU_HOST_BINARY_INIT(name, int64, uint16, bool), \
2301
+ CPU_HOST_BINARY_INIT(name, int64, uint32, bool), \
2302
+ CPU_HOST_BINARY_INIT(name, int64, int8, bool), \
2303
+ CPU_HOST_BINARY_INIT(name, int64, int16, bool), \
2304
+ CPU_HOST_BINARY_INIT(name, int64, int32, bool), \
2305
+ CPU_HOST_BINARY_INIT(name, int64, int64, bool), \
2306
+ \
2307
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint8, bool), \
2308
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint16, bool), \
2309
+ CPU_HOST_BINARY_INIT(name, bfloat16, uint32, bool), \
2310
+ CPU_HOST_BINARY_INIT(name, bfloat16, int8, bool), \
2311
+ CPU_HOST_BINARY_INIT(name, bfloat16, int16, bool), \
2312
+ CPU_HOST_BINARY_INIT(name, bfloat16, int32, bool), \
2313
+ CPU_HOST_BINARY_INIT(name, bfloat16, bfloat16, bool), \
2314
+ CPU_HOST_BINARY_INIT(name, bfloat16, float16, bool), \
2315
+ CPU_HOST_BINARY_INIT(name, bfloat16, float32, bool), \
2316
+ CPU_HOST_BINARY_INIT(name, bfloat16, float64, bool), \
2317
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex32, bool), \
2318
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex64, bool), \
2319
+ CPU_HOST_BINARY_INIT(name, bfloat16, complex128, bool), \
2320
+ \
2321
+ CPU_HOST_BINARY_INIT(name, float16, uint8, bool), \
2322
+ CPU_HOST_BINARY_INIT(name, float16, uint16, bool), \
2323
+ CPU_HOST_BINARY_INIT(name, float16, uint32, bool), \
2324
+ CPU_HOST_BINARY_INIT(name, float16, int8, bool), \
2325
+ CPU_HOST_BINARY_INIT(name, float16, int16, bool), \
2326
+ CPU_HOST_BINARY_INIT(name, float16, int32, bool), \
2327
+ CPU_HOST_BINARY_INIT(name, float16, bfloat16, bool), \
2328
+ CPU_HOST_BINARY_INIT(name, float16, float16, bool), \
2329
+ CPU_HOST_BINARY_INIT(name, float16, float32, bool), \
2330
+ CPU_HOST_BINARY_INIT(name, float16, float64, bool), \
2331
+ CPU_HOST_BINARY_INIT(name, float16, complex32, bool), \
2332
+ CPU_HOST_BINARY_INIT(name, float16, complex64, bool), \
2333
+ CPU_HOST_BINARY_INIT(name, float16, complex128, bool), \
2334
+ \
2335
+ CPU_HOST_BINARY_INIT(name, float32, uint8, bool), \
2336
+ CPU_HOST_BINARY_INIT(name, float32, uint16, bool), \
2337
+ CPU_HOST_BINARY_INIT(name, float32, uint32, bool), \
2338
+ CPU_HOST_BINARY_INIT(name, float32, int8, bool), \
2339
+ CPU_HOST_BINARY_INIT(name, float32, int16, bool), \
2340
+ CPU_HOST_BINARY_INIT(name, float32, int32, bool), \
2341
+ CPU_HOST_BINARY_INIT(name, float32, bfloat16, bool), \
2342
+ CPU_HOST_BINARY_INIT(name, float32, float16, bool), \
2343
+ CPU_HOST_BINARY_INIT(name, float32, float32, bool), \
2344
+ CPU_HOST_BINARY_INIT(name, float32, float64, bool), \
2345
+ CPU_HOST_BINARY_INIT(name, float32, complex32, bool), \
2346
+ CPU_HOST_BINARY_INIT(name, float32, complex64, bool), \
2347
+ CPU_HOST_BINARY_INIT(name, float32, complex128, bool), \
2348
+ \
2349
+ CPU_HOST_BINARY_INIT(name, float64, uint8, bool), \
2350
+ CPU_HOST_BINARY_INIT(name, float64, uint16, bool), \
2351
+ CPU_HOST_BINARY_INIT(name, float64, uint32, bool), \
2352
+ CPU_HOST_BINARY_INIT(name, float64, int8, bool), \
2353
+ CPU_HOST_BINARY_INIT(name, float64, int16, bool), \
2354
+ CPU_HOST_BINARY_INIT(name, float64, int32, bool), \
2355
+ CPU_HOST_BINARY_INIT(name, float64, bfloat16, bool), \
2356
+ CPU_HOST_BINARY_INIT(name, float64, float16, bool), \
2357
+ CPU_HOST_BINARY_INIT(name, float64, float32, bool), \
2358
+ CPU_HOST_BINARY_INIT(name, float64, float64, bool), \
2359
+ CPU_HOST_BINARY_INIT(name, float64, complex32, bool), \
2360
+ CPU_HOST_BINARY_INIT(name, float64, complex64, bool), \
2361
+ CPU_HOST_BINARY_INIT(name, float64, complex128, bool), \
2362
+ \
2363
+ CPU_HOST_BINARY_INIT(name, complex32, uint8, bool), \
2364
+ CPU_HOST_BINARY_INIT(name, complex32, uint16, bool), \
2365
+ CPU_HOST_BINARY_INIT(name, complex32, uint32, bool), \
2366
+ CPU_HOST_BINARY_INIT(name, complex32, int8, bool), \
2367
+ CPU_HOST_BINARY_INIT(name, complex32, int16, bool), \
2368
+ CPU_HOST_BINARY_INIT(name, complex32, int32, bool), \
2369
+ CPU_HOST_BINARY_INIT(name, complex32, bfloat16, bool), \
2370
+ CPU_HOST_BINARY_INIT(name, complex32, float16, bool), \
2371
+ CPU_HOST_BINARY_INIT(name, complex32, float32, bool), \
2372
+ CPU_HOST_BINARY_INIT(name, complex32, float64, bool), \
2373
+ CPU_HOST_BINARY_INIT(name, complex32, complex32, bool), \
2374
+ CPU_HOST_BINARY_INIT(name, complex32, complex64, bool), \
2375
+ CPU_HOST_BINARY_INIT(name, complex32, complex128, bool), \
2376
+ \
2377
+ CPU_HOST_BINARY_INIT(name, complex64, uint8, bool), \
2378
+ CPU_HOST_BINARY_INIT(name, complex64, uint16, bool), \
2379
+ CPU_HOST_BINARY_INIT(name, complex64, uint32, bool), \
2380
+ CPU_HOST_BINARY_INIT(name, complex64, int8, bool), \
2381
+ CPU_HOST_BINARY_INIT(name, complex64, int16, bool), \
2382
+ CPU_HOST_BINARY_INIT(name, complex64, int32, bool), \
2383
+ CPU_HOST_BINARY_INIT(name, complex64, bfloat16, bool), \
2384
+ CPU_HOST_BINARY_INIT(name, complex64, float16, bool), \
2385
+ CPU_HOST_BINARY_INIT(name, complex64, float32, bool), \
2386
+ CPU_HOST_BINARY_INIT(name, complex64, float64, bool), \
2387
+ CPU_HOST_BINARY_INIT(name, complex64, complex32, bool), \
2388
+ CPU_HOST_BINARY_INIT(name, complex64, complex64, bool), \
2389
+ CPU_HOST_BINARY_INIT(name, complex64, complex128, bool), \
2390
+ \
2391
+ CPU_HOST_BINARY_INIT(name, complex128, uint8, bool), \
2392
+ CPU_HOST_BINARY_INIT(name, complex128, uint16, bool), \
2393
+ CPU_HOST_BINARY_INIT(name, complex128, uint32, bool), \
2394
+ CPU_HOST_BINARY_INIT(name, complex128, int8, bool), \
2395
+ CPU_HOST_BINARY_INIT(name, complex128, int16, bool), \
2396
+ CPU_HOST_BINARY_INIT(name, complex128, int32, bool), \
2397
+ CPU_HOST_BINARY_INIT(name, complex128, bfloat16, bool), \
2398
+ CPU_HOST_BINARY_INIT(name, complex128, float16, bool), \
2399
+ CPU_HOST_BINARY_INIT(name, complex128, float32, bool), \
2400
+ CPU_HOST_BINARY_INIT(name, complex128, float64, bool), \
2401
+ CPU_HOST_BINARY_INIT(name, complex128, complex32, bool), \
2402
+ CPU_HOST_BINARY_INIT(name, complex128, complex64, bool), \
2403
+ CPU_HOST_BINARY_INIT(name, complex128, complex128, bool)
2404
+
2405
+ #define CPU_HOST_ALL_EQUALN_INIT(name) \
2406
+ CPU_HOST_EQUALN_INIT(name, uint8, uint8, bool), \
2407
+ CPU_HOST_EQUALN_INIT(name, uint8, uint16, bool), \
2408
+ CPU_HOST_EQUALN_INIT(name, uint8, uint32, bool), \
2409
+ CPU_HOST_EQUALN_INIT(name, uint8, uint64, bool), \
2410
+ CPU_HOST_EQUALN_INIT(name, uint8, int8, bool), \
2411
+ CPU_HOST_EQUALN_INIT(name, uint8, int16, bool), \
2412
+ CPU_HOST_EQUALN_INIT(name, uint8, int32, bool), \
2413
+ CPU_HOST_EQUALN_INIT(name, uint8, int64, bool), \
2414
+ CPU_HOST_EQUALN_INIT(name, uint8, bfloat16, bool), \
2415
+ CPU_HOST_EQUALN_INIT(name, uint8, float16, bool), \
2416
+ CPU_HOST_EQUALN_INIT(name, uint8, float32, bool), \
2417
+ CPU_HOST_EQUALN_INIT(name, uint8, float64, bool), \
2418
+ CPU_HOST_EQUALN_INIT(name, uint8, complex32, bool), \
2419
+ CPU_HOST_EQUALN_INIT(name, uint8, complex64, bool), \
2420
+ CPU_HOST_EQUALN_INIT(name, uint8, complex128, bool), \
2421
+ \
2422
+ CPU_HOST_EQUALN_INIT(name, uint16, uint8, bool), \
2423
+ CPU_HOST_EQUALN_INIT(name, uint16, uint16, bool), \
2424
+ CPU_HOST_EQUALN_INIT(name, uint16, uint32, bool), \
2425
+ CPU_HOST_EQUALN_INIT(name, uint16, uint64, bool), \
2426
+ CPU_HOST_EQUALN_INIT(name, uint16, int8, bool), \
2427
+ CPU_HOST_EQUALN_INIT(name, uint16, int16, bool), \
2428
+ CPU_HOST_EQUALN_INIT(name, uint16, int32, bool), \
2429
+ CPU_HOST_EQUALN_INIT(name, uint16, int64, bool), \
2430
+ CPU_HOST_EQUALN_INIT(name, uint16, bfloat16, bool), \
2431
+ CPU_HOST_EQUALN_INIT(name, uint16, float16, bool), \
2432
+ CPU_HOST_EQUALN_INIT(name, uint16, float32, bool), \
2433
+ CPU_HOST_EQUALN_INIT(name, uint16, float64, bool), \
2434
+ CPU_HOST_EQUALN_INIT(name, uint16, complex32, bool), \
2435
+ CPU_HOST_EQUALN_INIT(name, uint16, complex64, bool), \
2436
+ CPU_HOST_EQUALN_INIT(name, uint16, complex128, bool), \
2437
+ \
2438
+ CPU_HOST_EQUALN_INIT(name, uint32, uint8, bool), \
2439
+ CPU_HOST_EQUALN_INIT(name, uint32, uint16, bool), \
2440
+ CPU_HOST_EQUALN_INIT(name, uint32, uint32, bool), \
2441
+ CPU_HOST_EQUALN_INIT(name, uint32, uint64, bool), \
2442
+ CPU_HOST_EQUALN_INIT(name, uint32, int8, bool), \
2443
+ CPU_HOST_EQUALN_INIT(name, uint32, int16, bool), \
2444
+ CPU_HOST_EQUALN_INIT(name, uint32, int32, bool), \
2445
+ CPU_HOST_EQUALN_INIT(name, uint32, int64, bool), \
2446
+ CPU_HOST_EQUALN_INIT(name, uint32, bfloat16, bool), \
2447
+ CPU_HOST_EQUALN_INIT(name, uint32, float16, bool), \
2448
+ CPU_HOST_EQUALN_INIT(name, uint32, float32, bool), \
2449
+ CPU_HOST_EQUALN_INIT(name, uint32, float64, bool), \
2450
+ CPU_HOST_EQUALN_INIT(name, uint32, complex32, bool), \
2451
+ CPU_HOST_EQUALN_INIT(name, uint32, complex64, bool), \
2452
+ CPU_HOST_EQUALN_INIT(name, uint32, complex128, bool), \
2453
+ \
2454
+ CPU_HOST_EQUALN_INIT(name, uint64, uint8, bool), \
2455
+ CPU_HOST_EQUALN_INIT(name, uint64, uint16, bool), \
2456
+ CPU_HOST_EQUALN_INIT(name, uint64, uint32, bool), \
2457
+ CPU_HOST_EQUALN_INIT(name, uint64, uint64, bool), \
2458
+ \
2459
+ CPU_HOST_EQUALN_INIT(name, int8, uint8, bool), \
2460
+ CPU_HOST_EQUALN_INIT(name, int8, uint16, bool), \
2461
+ CPU_HOST_EQUALN_INIT(name, int8, uint32, bool), \
2462
+ CPU_HOST_EQUALN_INIT(name, int8, int8, bool), \
2463
+ CPU_HOST_EQUALN_INIT(name, int8, int16, bool), \
2464
+ CPU_HOST_EQUALN_INIT(name, int8, int32, bool), \
2465
+ CPU_HOST_EQUALN_INIT(name, int8, int64, bool), \
2466
+ CPU_HOST_EQUALN_INIT(name, int8, bfloat16, bool), \
2467
+ CPU_HOST_EQUALN_INIT(name, int8, float16, bool), \
2468
+ CPU_HOST_EQUALN_INIT(name, int8, float32, bool), \
2469
+ CPU_HOST_EQUALN_INIT(name, int8, float64, bool), \
2470
+ CPU_HOST_EQUALN_INIT(name, int8, complex32, bool), \
2471
+ CPU_HOST_EQUALN_INIT(name, int8, complex64, bool), \
2472
+ CPU_HOST_EQUALN_INIT(name, int8, complex128, bool), \
2473
+ \
2474
+ CPU_HOST_EQUALN_INIT(name, int16, uint8, bool), \
2475
+ CPU_HOST_EQUALN_INIT(name, int16, uint16, bool), \
2476
+ CPU_HOST_EQUALN_INIT(name, int16, uint32, bool), \
2477
+ CPU_HOST_EQUALN_INIT(name, int16, int8, bool), \
2478
+ CPU_HOST_EQUALN_INIT(name, int16, int16, bool), \
2479
+ CPU_HOST_EQUALN_INIT(name, int16, int32, bool), \
2480
+ CPU_HOST_EQUALN_INIT(name, int16, int64, bool), \
2481
+ CPU_HOST_EQUALN_INIT(name, int16, bfloat16, bool), \
2482
+ CPU_HOST_EQUALN_INIT(name, int16, float16, bool), \
2483
+ CPU_HOST_EQUALN_INIT(name, int16, float32, bool), \
2484
+ CPU_HOST_EQUALN_INIT(name, int16, float64, bool), \
2485
+ CPU_HOST_EQUALN_INIT(name, int16, complex32, bool), \
2486
+ CPU_HOST_EQUALN_INIT(name, int16, complex64, bool), \
2487
+ CPU_HOST_EQUALN_INIT(name, int16, complex128, bool), \
2488
+ \
2489
+ CPU_HOST_EQUALN_INIT(name, int32, uint8, bool), \
2490
+ CPU_HOST_EQUALN_INIT(name, int32, uint16, bool), \
2491
+ CPU_HOST_EQUALN_INIT(name, int32, uint32, bool), \
2492
+ CPU_HOST_EQUALN_INIT(name, int32, int8, bool), \
2493
+ CPU_HOST_EQUALN_INIT(name, int32, int16, bool), \
2494
+ CPU_HOST_EQUALN_INIT(name, int32, int32, bool), \
2495
+ CPU_HOST_EQUALN_INIT(name, int32, int64, bool), \
2496
+ CPU_HOST_EQUALN_INIT(name, int32, bfloat16, bool), \
2497
+ CPU_HOST_EQUALN_INIT(name, int32, float16, bool), \
2498
+ CPU_HOST_EQUALN_INIT(name, int32, float32, bool), \
2499
+ CPU_HOST_EQUALN_INIT(name, int32, float64, bool), \
2500
+ CPU_HOST_EQUALN_INIT(name, int32, complex32, bool), \
2501
+ CPU_HOST_EQUALN_INIT(name, int32, complex64, bool), \
2502
+ CPU_HOST_EQUALN_INIT(name, int32, complex128, bool), \
2503
+ \
2504
+ CPU_HOST_EQUALN_INIT(name, int64, uint8, bool), \
2505
+ CPU_HOST_EQUALN_INIT(name, int64, uint16, bool), \
2506
+ CPU_HOST_EQUALN_INIT(name, int64, uint32, bool), \
2507
+ CPU_HOST_EQUALN_INIT(name, int64, int8, bool), \
2508
+ CPU_HOST_EQUALN_INIT(name, int64, int16, bool), \
2509
+ CPU_HOST_EQUALN_INIT(name, int64, int32, bool), \
2510
+ CPU_HOST_EQUALN_INIT(name, int64, int64, bool), \
2511
+ \
2512
+ CPU_HOST_EQUALN_INIT(name, bfloat16, uint8, bool), \
2513
+ CPU_HOST_EQUALN_INIT(name, bfloat16, uint16, bool), \
2514
+ CPU_HOST_EQUALN_INIT(name, bfloat16, uint32, bool), \
2515
+ CPU_HOST_EQUALN_INIT(name, bfloat16, int8, bool), \
2516
+ CPU_HOST_EQUALN_INIT(name, bfloat16, int16, bool), \
2517
+ CPU_HOST_EQUALN_INIT(name, bfloat16, int32, bool), \
2518
+ CPU_HOST_EQUALN_INIT(name, bfloat16, bfloat16, bool), \
2519
+ CPU_HOST_EQUALN_INIT(name, bfloat16, float16, bool), \
2520
+ CPU_HOST_EQUALN_INIT(name, bfloat16, float32, bool), \
2521
+ CPU_HOST_EQUALN_INIT(name, bfloat16, float64, bool), \
2522
+ CPU_HOST_EQUALN_INIT(name, bfloat16, complex32, bool), \
2523
+ CPU_HOST_EQUALN_INIT(name, bfloat16, complex64, bool), \
2524
+ CPU_HOST_EQUALN_INIT(name, bfloat16, complex128, bool), \
2525
+ \
2526
+ CPU_HOST_EQUALN_INIT(name, float16, uint8, bool), \
2527
+ CPU_HOST_EQUALN_INIT(name, float16, uint16, bool), \
2528
+ CPU_HOST_EQUALN_INIT(name, float16, uint32, bool), \
2529
+ CPU_HOST_EQUALN_INIT(name, float16, int8, bool), \
2530
+ CPU_HOST_EQUALN_INIT(name, float16, int16, bool), \
2531
+ CPU_HOST_EQUALN_INIT(name, float16, int32, bool), \
2532
+ CPU_HOST_EQUALN_INIT(name, float16, bfloat16, bool), \
2533
+ CPU_HOST_EQUALN_INIT(name, float16, float16, bool), \
2534
+ CPU_HOST_EQUALN_INIT(name, float16, float32, bool), \
2535
+ CPU_HOST_EQUALN_INIT(name, float16, float64, bool), \
2536
+ CPU_HOST_EQUALN_INIT(name, float16, complex32, bool), \
2537
+ CPU_HOST_EQUALN_INIT(name, float16, complex64, bool), \
2538
+ CPU_HOST_EQUALN_INIT(name, float16, complex128, bool), \
2539
+ \
2540
+ CPU_HOST_EQUALN_INIT(name, float32, uint8, bool), \
2541
+ CPU_HOST_EQUALN_INIT(name, float32, uint16, bool), \
2542
+ CPU_HOST_EQUALN_INIT(name, float32, uint32, bool), \
2543
+ CPU_HOST_EQUALN_INIT(name, float32, int8, bool), \
2544
+ CPU_HOST_EQUALN_INIT(name, float32, int16, bool), \
2545
+ CPU_HOST_EQUALN_INIT(name, float32, int32, bool), \
2546
+ CPU_HOST_EQUALN_INIT(name, float32, bfloat16, bool), \
2547
+ CPU_HOST_EQUALN_INIT(name, float32, float16, bool), \
2548
+ CPU_HOST_EQUALN_INIT(name, float32, float32, bool), \
2549
+ CPU_HOST_EQUALN_INIT(name, float32, float64, bool), \
2550
+ CPU_HOST_EQUALN_INIT(name, float32, complex32, bool), \
2551
+ CPU_HOST_EQUALN_INIT(name, float32, complex64, bool), \
2552
+ CPU_HOST_EQUALN_INIT(name, float32, complex128, bool), \
2553
+ \
2554
+ CPU_HOST_EQUALN_INIT(name, float64, uint8, bool), \
2555
+ CPU_HOST_EQUALN_INIT(name, float64, uint16, bool), \
2556
+ CPU_HOST_EQUALN_INIT(name, float64, uint32, bool), \
2557
+ CPU_HOST_EQUALN_INIT(name, float64, int8, bool), \
2558
+ CPU_HOST_EQUALN_INIT(name, float64, int16, bool), \
2559
+ CPU_HOST_EQUALN_INIT(name, float64, int32, bool), \
2560
+ CPU_HOST_EQUALN_INIT(name, float64, bfloat16, bool), \
2561
+ CPU_HOST_EQUALN_INIT(name, float64, float16, bool), \
2562
+ CPU_HOST_EQUALN_INIT(name, float64, float32, bool), \
2563
+ CPU_HOST_EQUALN_INIT(name, float64, float64, bool), \
2564
+ CPU_HOST_EQUALN_INIT(name, float64, complex32, bool), \
2565
+ CPU_HOST_EQUALN_INIT(name, float64, complex64, bool), \
2566
+ CPU_HOST_EQUALN_INIT(name, float64, complex128, bool), \
2567
+ \
2568
+ CPU_HOST_EQUALN_INIT(name, complex32, uint8, bool), \
2569
+ CPU_HOST_EQUALN_INIT(name, complex32, uint16, bool), \
2570
+ CPU_HOST_EQUALN_INIT(name, complex32, uint32, bool), \
2571
+ CPU_HOST_EQUALN_INIT(name, complex32, int8, bool), \
2572
+ CPU_HOST_EQUALN_INIT(name, complex32, int16, bool), \
2573
+ CPU_HOST_EQUALN_INIT(name, complex32, int32, bool), \
2574
+ CPU_HOST_EQUALN_INIT(name, complex32, bfloat16, bool), \
2575
+ CPU_HOST_EQUALN_INIT(name, complex32, float16, bool), \
2576
+ CPU_HOST_EQUALN_INIT(name, complex32, float32, bool), \
2577
+ CPU_HOST_EQUALN_INIT(name, complex32, float64, bool), \
2578
+ CPU_HOST_EQUALN_INIT(name, complex32, complex32, bool), \
2579
+ CPU_HOST_EQUALN_INIT(name, complex32, complex64, bool), \
2580
+ CPU_HOST_EQUALN_INIT(name, complex32, complex128, bool), \
2581
+ \
2582
+ CPU_HOST_EQUALN_INIT(name, complex64, uint8, bool), \
2583
+ CPU_HOST_EQUALN_INIT(name, complex64, uint16, bool), \
2584
+ CPU_HOST_EQUALN_INIT(name, complex64, uint32, bool), \
2585
+ CPU_HOST_EQUALN_INIT(name, complex64, int8, bool), \
2586
+ CPU_HOST_EQUALN_INIT(name, complex64, int16, bool), \
2587
+ CPU_HOST_EQUALN_INIT(name, complex64, int32, bool), \
2588
+ CPU_HOST_EQUALN_INIT(name, complex64, bfloat16, bool), \
2589
+ CPU_HOST_EQUALN_INIT(name, complex64, float16, bool), \
2590
+ CPU_HOST_EQUALN_INIT(name, complex64, float32, bool), \
2591
+ CPU_HOST_EQUALN_INIT(name, complex64, float64, bool), \
2592
+ CPU_HOST_EQUALN_INIT(name, complex64, complex32, bool), \
2593
+ CPU_HOST_EQUALN_INIT(name, complex64, complex64, bool), \
2594
+ CPU_HOST_EQUALN_INIT(name, complex64, complex128, bool), \
2595
+ \
2596
+ CPU_HOST_EQUALN_INIT(name, complex128, uint8, bool), \
2597
+ CPU_HOST_EQUALN_INIT(name, complex128, uint16, bool), \
2598
+ CPU_HOST_EQUALN_INIT(name, complex128, uint32, bool), \
2599
+ CPU_HOST_EQUALN_INIT(name, complex128, int8, bool), \
2600
+ CPU_HOST_EQUALN_INIT(name, complex128, int16, bool), \
2601
+ CPU_HOST_EQUALN_INIT(name, complex128, int32, bool), \
2602
+ CPU_HOST_EQUALN_INIT(name, complex128, bfloat16, bool), \
2603
+ CPU_HOST_EQUALN_INIT(name, complex128, float16, bool), \
2604
+ CPU_HOST_EQUALN_INIT(name, complex128, float32, bool), \
2605
+ CPU_HOST_EQUALN_INIT(name, complex128, float64, bool), \
2606
+ CPU_HOST_EQUALN_INIT(name, complex128, complex32, bool), \
2607
+ CPU_HOST_EQUALN_INIT(name, complex128, complex64, bool), \
2608
+ CPU_HOST_EQUALN_INIT(name, complex128, complex128, bool)
2609
+
2610
+
2611
+ CPU_HOST_ALL_COMPARISON(less)
2612
+ CPU_HOST_ALL_COMPARISON(less_equal)
2613
+ CPU_HOST_ALL_COMPARISON(greater_equal)
2614
+ CPU_HOST_ALL_COMPARISON(greater)
2615
+ CPU_HOST_ALL_COMPARISON(equal)
2616
+ CPU_HOST_ALL_COMPARISON(not_equal)
2617
+ CPU_HOST_ALL_COMPARISON(equaln)
2618
+
2619
+
2620
+ static const gm_kernel_init_t binary_kernels[] = {
2621
+ CPU_HOST_ALL_ARITHMETIC_INIT(add),
2622
+ CPU_HOST_ALL_ARITHMETIC_INIT(subtract),
2623
+ CPU_HOST_ALL_ARITHMETIC_INIT(multiply),
2624
+ CPU_HOST_ALL_ARITHMETIC_INIT(floor_divide),
2625
+ CPU_HOST_ALL_ARITHMETIC_INIT(remainder),
2626
+ CPU_HOST_ALL_ARITHMETIC_FLOAT_RETURN_INIT(divide),
2627
+ CPU_HOST_ALL_ARITHMETIC_INIT(power),
2628
+ CPU_HOST_ALL_COMPARISON_INIT(less),
2629
+ CPU_HOST_ALL_COMPARISON_INIT(less_equal),
2630
+ CPU_HOST_ALL_COMPARISON_INIT(greater_equal),
2631
+ CPU_HOST_ALL_COMPARISON_INIT(greater),
2632
+ CPU_HOST_ALL_COMPARISON_INIT(equal),
2633
+ CPU_HOST_ALL_COMPARISON_INIT(not_equal),
2634
+ CPU_HOST_ALL_EQUALN_INIT(equaln),
2635
+
2636
+ { .name = NULL, .sig = NULL }
2637
+ };
2638
+
2639
+
2640
+ /*****************************************************************************/
2641
+ /* Bitwise */
2642
+ /*****************************************************************************/
2643
+
2644
+ #define CPU_HOST_ALL_BITWISE(name) \
2645
+ CPU_HOST_BINARY(name, bool, bool, bool) \
2646
+ CPU_HOST_BINARY(name, bool, uint8, uint8) \
2647
+ CPU_HOST_BINARY(name, bool, uint16, uint16) \
2648
+ CPU_HOST_BINARY(name, bool, uint32, uint32) \
2649
+ CPU_HOST_BINARY(name, bool, uint64, uint64) \
2650
+ CPU_HOST_BINARY(name, bool, int8, int8) \
2651
+ CPU_HOST_BINARY(name, bool, int16, int16) \
2652
+ CPU_HOST_BINARY(name, bool, int32, int32) \
2653
+ CPU_HOST_BINARY(name, bool, int64, int64) \
2654
+ \
2655
+ CPU_HOST_BINARY(name, uint8, bool, uint8) \
2656
+ CPU_HOST_BINARY(name, uint8, uint8, uint8) \
2657
+ CPU_HOST_BINARY(name, uint8, uint16, uint16) \
2658
+ CPU_HOST_BINARY(name, uint8, uint32, uint32) \
2659
+ CPU_HOST_BINARY(name, uint8, uint64, uint64) \
2660
+ CPU_HOST_BINARY(name, uint8, int8, int16) \
2661
+ CPU_HOST_BINARY(name, uint8, int16, int16) \
2662
+ CPU_HOST_BINARY(name, uint8, int32, int32) \
2663
+ CPU_HOST_BINARY(name, uint8, int64, int64) \
2664
+ \
2665
+ CPU_HOST_BINARY(name, uint16, bool, uint16) \
2666
+ CPU_HOST_BINARY(name, uint16, uint8, uint16) \
2667
+ CPU_HOST_BINARY(name, uint16, uint16, uint16) \
2668
+ CPU_HOST_BINARY(name, uint16, uint32, uint32) \
2669
+ CPU_HOST_BINARY(name, uint16, uint64, uint64) \
2670
+ CPU_HOST_BINARY(name, uint16, int8, int32) \
2671
+ CPU_HOST_BINARY(name, uint16, int16, int32) \
2672
+ CPU_HOST_BINARY(name, uint16, int32, int32) \
2673
+ CPU_HOST_BINARY(name, uint16, int64, int64) \
2674
+ \
2675
+ CPU_HOST_BINARY(name, uint32, bool, uint32) \
2676
+ CPU_HOST_BINARY(name, uint32, uint8, uint32) \
2677
+ CPU_HOST_BINARY(name, uint32, uint16, uint32) \
2678
+ CPU_HOST_BINARY(name, uint32, uint32, uint32) \
2679
+ CPU_HOST_BINARY(name, uint32, uint64, uint64) \
2680
+ CPU_HOST_BINARY(name, uint32, int8, int64) \
2681
+ CPU_HOST_BINARY(name, uint32, int16, int64) \
2682
+ CPU_HOST_BINARY(name, uint32, int32, int64) \
2683
+ CPU_HOST_BINARY(name, uint32, int64, int64) \
2684
+ \
2685
+ CPU_HOST_BINARY(name, uint64, bool, uint64) \
2686
+ CPU_HOST_BINARY(name, uint64, uint8, uint64) \
2687
+ CPU_HOST_BINARY(name, uint64, uint16, uint64) \
2688
+ CPU_HOST_BINARY(name, uint64, uint32, uint64) \
2689
+ CPU_HOST_BINARY(name, uint64, uint64, uint64) \
2690
+ \
2691
+ CPU_HOST_BINARY(name, int8, bool, int8) \
2692
+ CPU_HOST_BINARY(name, int8, uint8, int16) \
2693
+ CPU_HOST_BINARY(name, int8, uint16, int32) \
2694
+ CPU_HOST_BINARY(name, int8, uint32, int64) \
2695
+ CPU_HOST_BINARY(name, int8, int8, int8) \
2696
+ CPU_HOST_BINARY(name, int8, int16, int16) \
2697
+ CPU_HOST_BINARY(name, int8, int32, int32) \
2698
+ CPU_HOST_BINARY(name, int8, int64, int64) \
2699
+ \
2700
+ CPU_HOST_BINARY(name, int16, bool, int16) \
2701
+ CPU_HOST_BINARY(name, int16, uint8, int16) \
2702
+ CPU_HOST_BINARY(name, int16, uint16, int32) \
2703
+ CPU_HOST_BINARY(name, int16, uint32, int64) \
2704
+ CPU_HOST_BINARY(name, int16, int8, int16) \
2705
+ CPU_HOST_BINARY(name, int16, int16, int16) \
2706
+ CPU_HOST_BINARY(name, int16, int32, int32) \
2707
+ CPU_HOST_BINARY(name, int16, int64, int64) \
2708
+ \
2709
+ CPU_HOST_BINARY(name, int32, bool, int32) \
2710
+ CPU_HOST_BINARY(name, int32, uint8, int32) \
2711
+ CPU_HOST_BINARY(name, int32, uint16, int32) \
2712
+ CPU_HOST_BINARY(name, int32, uint32, int64) \
2713
+ CPU_HOST_BINARY(name, int32, int8, int32) \
2714
+ CPU_HOST_BINARY(name, int32, int16, int32) \
2715
+ CPU_HOST_BINARY(name, int32, int32, int32) \
2716
+ CPU_HOST_BINARY(name, int32, int64, int64) \
2717
+ \
2718
+ CPU_HOST_BINARY(name, int64, bool, int64) \
2719
+ CPU_HOST_BINARY(name, int64, uint8, int64) \
2720
+ CPU_HOST_BINARY(name, int64, uint16, int64) \
2721
+ CPU_HOST_BINARY(name, int64, uint32, int64) \
2722
+ CPU_HOST_BINARY(name, int64, int8, int64) \
2723
+ CPU_HOST_BINARY(name, int64, int16, int64) \
2724
+ CPU_HOST_BINARY(name, int64, int32, int64) \
2725
+ CPU_HOST_BINARY(name, int64, int64, int64)
2726
+
2727
+ #define CPU_HOST_ALL_BITWISE_INIT(name) \
2728
+ CPU_HOST_BINARY_INIT(name, bool, bool, bool), \
2729
+ CPU_HOST_BINARY_INIT(name, bool, uint8, uint8), \
2730
+ CPU_HOST_BINARY_INIT(name, bool, uint16, uint16), \
2731
+ CPU_HOST_BINARY_INIT(name, bool, uint32, uint32), \
2732
+ CPU_HOST_BINARY_INIT(name, bool, uint64, uint64), \
2733
+ CPU_HOST_BINARY_INIT(name, bool, int8, int8), \
2734
+ CPU_HOST_BINARY_INIT(name, bool, int16, int16), \
2735
+ CPU_HOST_BINARY_INIT(name, bool, int32, int32), \
2736
+ CPU_HOST_BINARY_INIT(name, bool, int64, int64), \
2737
+ \
2738
+ CPU_HOST_BINARY_INIT(name, uint8, bool, uint8), \
2739
+ CPU_HOST_BINARY_INIT(name, uint8, uint8, uint8), \
2740
+ CPU_HOST_BINARY_INIT(name, uint8, uint16, uint16), \
2741
+ CPU_HOST_BINARY_INIT(name, uint8, uint32, uint32), \
2742
+ CPU_HOST_BINARY_INIT(name, uint8, uint64, uint64), \
2743
+ CPU_HOST_BINARY_INIT(name, uint8, int8, int16), \
2744
+ CPU_HOST_BINARY_INIT(name, uint8, int16, int16), \
2745
+ CPU_HOST_BINARY_INIT(name, uint8, int32, int32), \
2746
+ CPU_HOST_BINARY_INIT(name, uint8, int64, int64), \
2747
+ \
2748
+ CPU_HOST_BINARY_INIT(name, uint16, bool, uint16), \
2749
+ CPU_HOST_BINARY_INIT(name, uint16, uint8, uint16), \
2750
+ CPU_HOST_BINARY_INIT(name, uint16, uint16, uint16), \
2751
+ CPU_HOST_BINARY_INIT(name, uint16, uint32, uint32), \
2752
+ CPU_HOST_BINARY_INIT(name, uint16, uint64, uint64), \
2753
+ CPU_HOST_BINARY_INIT(name, uint16, int8, int32), \
2754
+ CPU_HOST_BINARY_INIT(name, uint16, int16, int32), \
2755
+ CPU_HOST_BINARY_INIT(name, uint16, int32, int32), \
2756
+ CPU_HOST_BINARY_INIT(name, uint16, int64, int64), \
2757
+ \
2758
+ CPU_HOST_BINARY_INIT(name, uint32, bool, uint32), \
2759
+ CPU_HOST_BINARY_INIT(name, uint32, uint8, uint32), \
2760
+ CPU_HOST_BINARY_INIT(name, uint32, uint16, uint32), \
2761
+ CPU_HOST_BINARY_INIT(name, uint32, uint32, uint32), \
2762
+ CPU_HOST_BINARY_INIT(name, uint32, uint64, uint64), \
2763
+ CPU_HOST_BINARY_INIT(name, uint32, int8, int64), \
2764
+ CPU_HOST_BINARY_INIT(name, uint32, int16, int64), \
2765
+ CPU_HOST_BINARY_INIT(name, uint32, int32, int64), \
2766
+ CPU_HOST_BINARY_INIT(name, uint32, int64, int64), \
2767
+ \
2768
+ CPU_HOST_BINARY_INIT(name, uint64, bool, uint64), \
2769
+ CPU_HOST_BINARY_INIT(name, uint64, uint8, uint64), \
2770
+ CPU_HOST_BINARY_INIT(name, uint64, uint16, uint64), \
2771
+ CPU_HOST_BINARY_INIT(name, uint64, uint32, uint64), \
2772
+ CPU_HOST_BINARY_INIT(name, uint64, uint64, uint64), \
2773
+ \
2774
+ CPU_HOST_BINARY_INIT(name, int8, bool, int8), \
2775
+ CPU_HOST_BINARY_INIT(name, int8, uint8, int16), \
2776
+ CPU_HOST_BINARY_INIT(name, int8, uint16, int32), \
2777
+ CPU_HOST_BINARY_INIT(name, int8, uint32, int64), \
2778
+ CPU_HOST_BINARY_INIT(name, int8, int8, int8), \
2779
+ CPU_HOST_BINARY_INIT(name, int8, int16, int16), \
2780
+ CPU_HOST_BINARY_INIT(name, int8, int32, int32), \
2781
+ CPU_HOST_BINARY_INIT(name, int8, int64, int64), \
2782
+ \
2783
+ CPU_HOST_BINARY_INIT(name, int16, bool, int16), \
2784
+ CPU_HOST_BINARY_INIT(name, int16, uint8, int16), \
2785
+ CPU_HOST_BINARY_INIT(name, int16, uint16, int32), \
2786
+ CPU_HOST_BINARY_INIT(name, int16, uint32, int64), \
2787
+ CPU_HOST_BINARY_INIT(name, int16, int8, int16), \
2788
+ CPU_HOST_BINARY_INIT(name, int16, int16, int16), \
2789
+ CPU_HOST_BINARY_INIT(name, int16, int32, int32), \
2790
+ CPU_HOST_BINARY_INIT(name, int16, int64, int64), \
2791
+ \
2792
+ CPU_HOST_BINARY_INIT(name, int32, bool, int32), \
2793
+ CPU_HOST_BINARY_INIT(name, int32, uint8, int32), \
2794
+ CPU_HOST_BINARY_INIT(name, int32, uint16, int32), \
2795
+ CPU_HOST_BINARY_INIT(name, int32, uint32, int64), \
2796
+ CPU_HOST_BINARY_INIT(name, int32, int8, int32), \
2797
+ CPU_HOST_BINARY_INIT(name, int32, int16, int32), \
2798
+ CPU_HOST_BINARY_INIT(name, int32, int32, int32), \
2799
+ CPU_HOST_BINARY_INIT(name, int32, int64, int64), \
2800
+ \
2801
+ CPU_HOST_BINARY_INIT(name, int64, bool, int64), \
2802
+ CPU_HOST_BINARY_INIT(name, int64, uint8, int64), \
2803
+ CPU_HOST_BINARY_INIT(name, int64, uint16, int64), \
2804
+ CPU_HOST_BINARY_INIT(name, int64, uint32, int64), \
2805
+ CPU_HOST_BINARY_INIT(name, int64, int8, int64), \
2806
+ CPU_HOST_BINARY_INIT(name, int64, int16, int64), \
2807
+ CPU_HOST_BINARY_INIT(name, int64, int32, int64), \
2808
+ CPU_HOST_BINARY_INIT(name, int64, int64, int64)
2809
+
2810
+
2811
+ CPU_HOST_ALL_BITWISE(bitwise_and)
2812
+ CPU_HOST_ALL_BITWISE(bitwise_or)
2813
+ CPU_HOST_ALL_BITWISE(bitwise_xor)
2814
+
2815
+
2816
+ static const gm_kernel_init_t bitwise_kernels[] = {
2817
+ CPU_HOST_ALL_BITWISE_INIT(bitwise_and),
2818
+ CPU_HOST_ALL_BITWISE_INIT(bitwise_or),
2819
+ CPU_HOST_ALL_BITWISE_INIT(bitwise_xor),
2820
+
2821
+ { .name = NULL, .sig = NULL }
2822
+ };
2823
+
2824
+
2825
+ /****************************************************************************/
2826
+ /* Two return values */
2827
+ /****************************************************************************/
2828
+
2829
+ #define CPU_HOST_BINARY_MV(name, t0, t1, t2, t3) \
2830
+ static int \
2831
+ gm_cpu_host_fixed_1D_C_##name##_##t0##_##t1##_##t2##_##t3(xnd_t stack[], ndt_context_t *ctx) \
2832
+ { \
2833
+ const char *a0 = apply_index(&stack[0]); \
2834
+ const char *a1 = apply_index(&stack[1]); \
2835
+ char *a2 = apply_index(&stack[2]); \
2836
+ char *a3 = apply_index(&stack[3]); \
2837
+ int64_t N = xnd_fixed_shape(&stack[0]); \
2838
+ (void)ctx; \
2839
+ \
2840
+ gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1##_##t2##_##t3( \
2841
+ a0, a1, a2, a3, N); \
2842
+ \
2843
+ return 0; \
2844
+ } \
2845
+ \
2846
+ static int \
2847
+ gm_cpu_host_0D_##name##_##t0##_##t1##_##t2##_##t3(xnd_t stack[], ndt_context_t *ctx) \
2848
+ { \
2849
+ const char *a0 = stack[0].ptr; \
2850
+ const char *a1 = stack[1].ptr; \
2851
+ char *a2 = stack[2].ptr; \
2852
+ char *a3 = stack[3].ptr; \
2853
+ (void)ctx; \
2854
+ \
2855
+ gm_cpu_device_0D_##name##_##t0##_##t1##_##t2##_##t3(a0, a1, a2, a3); \
2856
+ \
2857
+ return 0; \
2858
+ }
2859
+
2860
+ #define CPU_HOST_BINARY_MV_INIT(func, t0, t1, t2, t3) \
2861
+ { .name = STRINGIZE(func), \
2862
+ .sig = "... * " STRINGIZE(t0) ", ... * " STRINGIZE(t1) " -> " \
2863
+ "... * " STRINGIZE(t2) ", ... * " STRINGIZE(t3), \
2864
+ .OptC = gm_cpu_host_fixed_1D_C_##func##_##t0##_##t1##_##t2##_##t3, \
2865
+ .Xnd = gm_cpu_host_0D_##func##_##t0##_##t1##_##t2##_##t3 }
2866
+
2867
+ #define CPU_HOST_ALL_BINARY_MV(name) \
2868
+ CPU_HOST_BINARY_MV(name, uint8, uint8, uint8, uint8) \
2869
+ CPU_HOST_BINARY_MV(name, uint16, uint16, uint16, uint16) \
2870
+ CPU_HOST_BINARY_MV(name, uint32, uint32, uint32, uint32) \
2871
+ CPU_HOST_BINARY_MV(name, uint64, uint64, uint64, uint64) \
2872
+ CPU_HOST_BINARY_MV(name, int8, int8, int8, int8) \
2873
+ CPU_HOST_BINARY_MV(name, int16, int16, int16, int16) \
2874
+ CPU_HOST_BINARY_MV(name, int32, int32, int32, int32) \
2875
+ CPU_HOST_BINARY_MV(name, int64, int64, int64, int64) \
2876
+ CPU_HOST_BINARY_MV(name, bfloat16, bfloat16, bfloat16, bfloat16) \
2877
+ CPU_HOST_BINARY_MV(name, float32, float32, float32, float32) \
2878
+ CPU_HOST_BINARY_MV(name, float64, float64, float64, float64)
2879
+
2880
+ #define CPU_HOST_ALL_BINARY_MV_INIT(name) \
2881
+ CPU_HOST_BINARY_MV_INIT(name, uint8, uint8, uint8, uint8), \
2882
+ CPU_HOST_BINARY_MV_INIT(name, uint16, uint16, uint16, uint16), \
2883
+ CPU_HOST_BINARY_MV_INIT(name, uint32, uint32, uint32, uint32), \
2884
+ CPU_HOST_BINARY_MV_INIT(name, uint64, uint64, uint64, uint64), \
2885
+ CPU_HOST_BINARY_MV_INIT(name, int8, int8, int8, int8), \
2886
+ CPU_HOST_BINARY_MV_INIT(name, int16, int16, int16, int16), \
2887
+ CPU_HOST_BINARY_MV_INIT(name, int32, int32, int32, int32), \
2888
+ CPU_HOST_BINARY_MV_INIT(name, int64, int64, int64, int64), \
2889
+ CPU_HOST_BINARY_MV_INIT(name, bfloat16, bfloat16, bfloat16, bfloat16), \
2890
+ CPU_HOST_BINARY_MV_INIT(name, float32, float32, float32, float32), \
2891
+ CPU_HOST_BINARY_MV_INIT(name, float64, float64, float64, float64)
2892
+
2893
+ CPU_HOST_ALL_BINARY_MV(divmod)
2894
+
2895
+
2896
+ static const gm_kernel_init_t binary_mv_kernels[] = {
2897
+ CPU_HOST_ALL_BINARY_MV_INIT(divmod),
2898
+
2899
+ { .name = NULL, .sig = NULL }
2900
+ };
2901
+
2902
+
2903
+ /****************************************************************************/
2904
+ /* Initialize kernel table */
2905
+ /****************************************************************************/
2906
+
2907
+ typedef _Bool bool;
2908
+
2909
+ static const gm_kernel_set_t *
2910
+ binary_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f, const ndt_t *types[],
2911
+ const int64_t li[], int nin, int nout, bool check_broadcast,
2912
+ ndt_context_t *ctx)
2913
+ {
2914
+ return cpu_binary_typecheck(binary_kernel_location, spec, f, types, li,
2915
+ nin, nout, check_broadcast, ctx);
2916
+ }
2917
+
2918
+ static const gm_kernel_set_t *
2919
+ bitwise_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f, const ndt_t *types[],
2920
+ const int64_t li[], int nin, int nout, bool check_broadcast,
2921
+ ndt_context_t *ctx)
2922
+ {
2923
+ return cpu_binary_typecheck(bitwise_kernel_location, spec, f, types, li,
2924
+ nin, nout, check_broadcast, ctx);
2925
+ }
2926
+
2927
+
2928
+ int
2929
+ gm_init_cpu_binary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx)
2930
+ {
2931
+ const gm_kernel_init_t *k;
2932
+
2933
+ for (k = binary_kernels; k->name != NULL; k++) {
2934
+ if (gm_add_kernel_typecheck(tbl, k, ctx, &binary_typecheck) < 0) {
2935
+ return -1;
2936
+ }
2937
+ }
2938
+
2939
+ for (k = bitwise_kernels; k->name != NULL; k++) {
2940
+ if (gm_add_kernel_typecheck(tbl, k, ctx, &bitwise_typecheck) < 0) {
2941
+ return -1;
2942
+ }
2943
+ }
2944
+
2945
+ for (k = binary_mv_kernels; k->name != NULL; k++) {
2946
+ if (gm_add_kernel(tbl, k, ctx) < 0) {
2947
+ return -1;
2948
+ }
2949
+ }
2950
+
2951
+ return 0;
2952
+ }