gumath 0.2.0dev5 → 0.2.0dev8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +7 -2
  3. data/Gemfile +0 -3
  4. data/ext/ruby_gumath/GPATH +0 -0
  5. data/ext/ruby_gumath/GRTAGS +0 -0
  6. data/ext/ruby_gumath/GTAGS +0 -0
  7. data/ext/ruby_gumath/extconf.rb +0 -5
  8. data/ext/ruby_gumath/functions.c +10 -2
  9. data/ext/ruby_gumath/gufunc_object.c +15 -4
  10. data/ext/ruby_gumath/gufunc_object.h +9 -3
  11. data/ext/ruby_gumath/gumath/Makefile +63 -0
  12. data/ext/ruby_gumath/gumath/Makefile.in +1 -0
  13. data/ext/ruby_gumath/gumath/config.h +56 -0
  14. data/ext/ruby_gumath/gumath/config.h.in +3 -0
  15. data/ext/ruby_gumath/gumath/config.log +497 -0
  16. data/ext/ruby_gumath/gumath/config.status +1034 -0
  17. data/ext/ruby_gumath/gumath/configure +375 -4
  18. data/ext/ruby_gumath/gumath/configure.ac +47 -3
  19. data/ext/ruby_gumath/gumath/libgumath/Makefile +236 -0
  20. data/ext/ruby_gumath/gumath/libgumath/Makefile.in +90 -24
  21. data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +54 -15
  22. data/ext/ruby_gumath/gumath/libgumath/apply.c +92 -28
  23. data/ext/ruby_gumath/gumath/libgumath/apply.o +0 -0
  24. data/ext/ruby_gumath/gumath/libgumath/common.o +0 -0
  25. data/ext/ruby_gumath/gumath/libgumath/cpu_device_binary.o +0 -0
  26. data/ext/ruby_gumath/gumath/libgumath/cpu_device_unary.o +0 -0
  27. data/ext/ruby_gumath/gumath/libgumath/cpu_host_binary.o +0 -0
  28. data/ext/ruby_gumath/gumath/libgumath/cpu_host_unary.o +0 -0
  29. data/ext/ruby_gumath/gumath/libgumath/examples.o +0 -0
  30. data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +27 -20
  31. data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +1 -1
  32. data/ext/ruby_gumath/gumath/libgumath/func.c +13 -9
  33. data/ext/ruby_gumath/gumath/libgumath/func.o +0 -0
  34. data/ext/ruby_gumath/gumath/libgumath/graph.o +0 -0
  35. data/ext/ruby_gumath/gumath/libgumath/gumath.h +55 -14
  36. data/ext/ruby_gumath/gumath/libgumath/kernels/common.c +513 -0
  37. data/ext/ruby_gumath/gumath/libgumath/kernels/common.h +155 -0
  38. data/ext/ruby_gumath/gumath/libgumath/kernels/contrib/bfloat16.h +520 -0
  39. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.cc +1123 -0
  40. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_binary.h +1062 -0
  41. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_msvc.cc +555 -0
  42. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.cc +368 -0
  43. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_device_unary.h +335 -0
  44. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_binary.c +2952 -0
  45. data/ext/ruby_gumath/gumath/libgumath/kernels/cpu_host_unary.c +1100 -0
  46. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.cu +1143 -0
  47. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_binary.h +1061 -0
  48. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.cu +528 -0
  49. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_device_unary.h +463 -0
  50. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_binary.c +2817 -0
  51. data/ext/ruby_gumath/gumath/libgumath/kernels/cuda_host_unary.c +1331 -0
  52. data/ext/ruby_gumath/gumath/libgumath/kernels/device.hh +614 -0
  53. data/ext/ruby_gumath/gumath/libgumath/libgumath.a +0 -0
  54. data/ext/ruby_gumath/gumath/libgumath/libgumath.so +1 -0
  55. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0 +1 -0
  56. data/ext/ruby_gumath/gumath/libgumath/libgumath.so.0.2.0dev3 +0 -0
  57. data/ext/ruby_gumath/gumath/libgumath/nploops.o +0 -0
  58. data/ext/ruby_gumath/gumath/libgumath/pdist.o +0 -0
  59. data/ext/ruby_gumath/gumath/libgumath/quaternion.o +0 -0
  60. data/ext/ruby_gumath/gumath/libgumath/tbl.o +0 -0
  61. data/ext/ruby_gumath/gumath/libgumath/thread.c +17 -4
  62. data/ext/ruby_gumath/gumath/libgumath/thread.o +0 -0
  63. data/ext/ruby_gumath/gumath/libgumath/xndloops.c +110 -0
  64. data/ext/ruby_gumath/gumath/libgumath/xndloops.o +0 -0
  65. data/ext/ruby_gumath/gumath/python/gumath/__init__.py +150 -0
  66. data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +446 -80
  67. data/ext/ruby_gumath/gumath/python/gumath/cuda.c +78 -0
  68. data/ext/ruby_gumath/gumath/python/gumath/examples.c +0 -5
  69. data/ext/ruby_gumath/gumath/python/gumath/functions.c +2 -2
  70. data/ext/ruby_gumath/gumath/python/gumath/gumath.h +246 -0
  71. data/ext/ruby_gumath/gumath/python/gumath/libgumath.a +0 -0
  72. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so +1 -0
  73. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0 +1 -0
  74. data/ext/ruby_gumath/gumath/python/gumath/libgumath.so.0.2.0dev3 +0 -0
  75. data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +31 -2
  76. data/ext/ruby_gumath/gumath/python/gumath_aux.py +767 -0
  77. data/ext/ruby_gumath/gumath/python/randdec.py +535 -0
  78. data/ext/ruby_gumath/gumath/python/randfloat.py +177 -0
  79. data/ext/ruby_gumath/gumath/python/test_gumath.py +1504 -24
  80. data/ext/ruby_gumath/gumath/python/test_xndarray.py +462 -0
  81. data/ext/ruby_gumath/gumath/setup.py +67 -6
  82. data/ext/ruby_gumath/gumath/tools/detect_cuda_arch.cc +35 -0
  83. data/ext/ruby_gumath/include/gumath.h +55 -14
  84. data/ext/ruby_gumath/include/ruby_gumath.h +4 -1
  85. data/ext/ruby_gumath/lib/libgumath.a +0 -0
  86. data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
  87. data/ext/ruby_gumath/ruby_gumath.c +231 -70
  88. data/ext/ruby_gumath/ruby_gumath.h +4 -1
  89. data/ext/ruby_gumath/ruby_gumath_internal.h +25 -0
  90. data/ext/ruby_gumath/util.c +34 -0
  91. data/ext/ruby_gumath/util.h +9 -0
  92. data/gumath.gemspec +3 -2
  93. data/lib/gumath.rb +55 -1
  94. data/lib/gumath/version.rb +2 -2
  95. data/lib/ruby_gumath.so +0 -0
  96. metadata +63 -10
  97. data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +0 -130
  98. data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +0 -547
  99. data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +0 -449
@@ -0,0 +1,368 @@
1
+ /*
2
+ * BSD 3-Clause License
3
+ *
4
+ * Copyright (c) 2017-2018, plures
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ *
10
+ * 1. Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ *
13
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ * this list of conditions and the following disclaimer in the documentation
15
+ * and/or other materials provided with the distribution.
16
+ *
17
+ * 3. Neither the name of the copyright holder nor the names of its
18
+ * contributors may be used to endorse or promote products derived from
19
+ * this software without specific prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+
34
+ #include <cinttypes>
35
+ #include <cmath>
36
+ #include <complex>
37
+ #include "cpu_device_unary.h"
38
+ #include "contrib/bfloat16.h"
39
+
40
+
41
+ /*****************************************************************************/
42
+ /* CPU device unary kernels */
43
+ /*****************************************************************************/
44
+
45
+ #define CPU_DEVICE_UNARY(name, func, t0, t1, common) \
46
+ extern "C" void \
47
+ gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1, \
48
+ const int64_t N) \
49
+ { \
50
+ const t0##_t *x0 = (const t0##_t *)a0; \
51
+ t1##_t *x1 = (t1##_t *)a1; \
52
+ \
53
+ for (int64_t i = 0; i < N; i++) { \
54
+ x1[i] = func((common##_t)x0[i]); \
55
+ } \
56
+ } \
57
+ \
58
+ extern "C" void \
59
+ gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1, \
60
+ const int64_t s0, const int64_t s1, \
61
+ const int64_t N) \
62
+ { \
63
+ const t0##_t *x0 = (const t0##_t *)a0; \
64
+ t1##_t *x1 = (t1##_t *)a1; \
65
+ int64_t i, k0, k1; \
66
+ \
67
+ for (i=0, k0=0, k1=0; i < N; i++, k0+=s0, k1+=s1) { \
68
+ x1[k1] = func((common##_t)x0[k0]); \
69
+ } \
70
+ } \
71
+ \
72
+ extern "C" void \
73
+ gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1) \
74
+ { \
75
+ const t0##_t x0 = *((const t0##_t *)a0); \
76
+ t1##_t *x1 = (t1##_t *)a1; \
77
+ *x1 = func((common##_t)x0); \
78
+ }
79
+
80
+ #ifdef _MSC_VER
81
+ #define CPU_DEVICE_UNARYC(name, func, t0, t1, common)
82
+ #else
83
+ #define CPU_DEVICE_UNARYC(name, func, t0, t1, common) \
84
+ CPU_DEVICE_UNARY(name, func, t0, t1, common)
85
+ #endif
86
+
87
+ #define CPU_DEVICE_NOIMPL(name, func, t0, t1, common)
88
+
89
+
90
+ #define CPU_DEVICE_ALL_UNARY(name, func, ufunc, tfunc, hfunc) \
91
+ CPU_DEVICE_UNARY(name, func, bool, bool, bool) \
92
+ CPU_DEVICE_UNARY(name, ufunc, bool, uint8, uint8) \
93
+ CPU_DEVICE_UNARY(name, ufunc, bool, uint16, uint16) \
94
+ CPU_DEVICE_UNARY(name, ufunc, bool, uint32, uint32) \
95
+ CPU_DEVICE_UNARY(name, ufunc, bool, uint64, uint64) \
96
+ CPU_DEVICE_UNARY(name, func, bool, int8, int8) \
97
+ CPU_DEVICE_UNARY(name, func, bool, int16, int16) \
98
+ CPU_DEVICE_UNARY(name, func, bool, int32, int32) \
99
+ CPU_DEVICE_UNARY(name, func, bool, int64, int64) \
100
+ CPU_DEVICE_UNARY(name, tfunc, bool, bfloat16, bfloat16) \
101
+ CPU_DEVICE_NOIMPL(name, hfunc, bool, float16, float16) \
102
+ CPU_DEVICE_UNARY(name, func, bool, float32, float32) \
103
+ CPU_DEVICE_UNARY(name, func, bool, float64, float64) \
104
+ CPU_DEVICE_NOIMPL(name, func, bool, complex32, complex32) \
105
+ CPU_DEVICE_UNARYC(name, func, bool, complex64, complex64) \
106
+ CPU_DEVICE_UNARYC(name, func, bool, complex128, complex128) \
107
+ \
108
+ CPU_DEVICE_UNARY(name, ufunc, uint8, uint8, uint8) \
109
+ CPU_DEVICE_UNARY(name, ufunc, uint8, uint16, uint16) \
110
+ CPU_DEVICE_UNARY(name, ufunc, uint8, uint32, uint32) \
111
+ CPU_DEVICE_UNARY(name, ufunc, uint8, uint64, uint64) \
112
+ CPU_DEVICE_UNARY(name, func, uint8, int16, int16) \
113
+ CPU_DEVICE_UNARY(name, func, uint8, int32, int32) \
114
+ CPU_DEVICE_UNARY(name, func, uint8, int64, int64) \
115
+ CPU_DEVICE_UNARY(name, tfunc, uint8, bfloat16, bfloat16) \
116
+ CPU_DEVICE_NOIMPL(name, hfunc, uint8, float16, float16) \
117
+ CPU_DEVICE_UNARY(name, func, uint8, float32, float32) \
118
+ CPU_DEVICE_UNARY(name, func, uint8, float64, float64) \
119
+ CPU_DEVICE_NOIMPL(name, func, uint8, complex32, complex32) \
120
+ CPU_DEVICE_UNARYC(name, func, uint8, complex64, complex64) \
121
+ CPU_DEVICE_UNARYC(name, func, uint8, complex128, complex128) \
122
+ \
123
+ CPU_DEVICE_UNARY(name, ufunc, uint16, uint16, uint16) \
124
+ CPU_DEVICE_UNARY(name, ufunc, uint16, uint32, uint32) \
125
+ CPU_DEVICE_UNARY(name, ufunc, uint16, uint64, uint64) \
126
+ CPU_DEVICE_UNARY(name, func, uint16, int32, int32) \
127
+ CPU_DEVICE_UNARY(name, func, uint16, int64, int64) \
128
+ CPU_DEVICE_UNARY(name, func, uint16, float32, float32) \
129
+ CPU_DEVICE_UNARY(name, func, uint16, float64, float64) \
130
+ CPU_DEVICE_UNARYC(name, func, uint16, complex64, complex64) \
131
+ CPU_DEVICE_UNARYC(name, func, uint16, complex128, complex128) \
132
+ \
133
+ CPU_DEVICE_UNARY(name, ufunc, uint32, uint32, uint32) \
134
+ CPU_DEVICE_UNARY(name, ufunc, uint32, uint64, uint64) \
135
+ CPU_DEVICE_UNARY(name, func, uint32, int64, int64) \
136
+ CPU_DEVICE_UNARY(name, func, uint32, float64, float64) \
137
+ CPU_DEVICE_UNARYC(name, func, uint32, complex128, complex128) \
138
+ \
139
+ CPU_DEVICE_UNARY(name, ufunc, uint64, uint64, uint64) \
140
+ \
141
+ CPU_DEVICE_UNARY(name, func, int8, int8, int8) \
142
+ CPU_DEVICE_UNARY(name, func, int8, int16, int16) \
143
+ CPU_DEVICE_UNARY(name, func, int8, int32, int32) \
144
+ CPU_DEVICE_UNARY(name, func, int8, int64, int64) \
145
+ CPU_DEVICE_UNARY(name, tfunc, int8, bfloat16, bfloat16) \
146
+ CPU_DEVICE_NOIMPL(name, hfunc, int8, float16, float16) \
147
+ CPU_DEVICE_UNARY(name, func, int8, float32, float32) \
148
+ CPU_DEVICE_UNARY(name, func, int8, float64, float64) \
149
+ CPU_DEVICE_NOIMPL(name, func, int8, complex32, complex32) \
150
+ CPU_DEVICE_UNARYC(name, func, int8, complex64, complex64) \
151
+ CPU_DEVICE_UNARYC(name, func, int8, complex128, complex128) \
152
+ \
153
+ CPU_DEVICE_UNARY(name, func, int16, int16, int16) \
154
+ CPU_DEVICE_UNARY(name, func, int16, int32, int32) \
155
+ CPU_DEVICE_UNARY(name, func, int16, int64, int64) \
156
+ CPU_DEVICE_UNARY(name, func, int16, float32, float32) \
157
+ CPU_DEVICE_UNARY(name, func, int16, float64, float64) \
158
+ CPU_DEVICE_UNARYC(name, func, int16, complex64, complex64) \
159
+ CPU_DEVICE_UNARYC(name, func, int16, complex128, complex128) \
160
+ \
161
+ CPU_DEVICE_UNARY(name, func, int32, int32, int32) \
162
+ CPU_DEVICE_UNARY(name, func, int32, int64, int64) \
163
+ CPU_DEVICE_UNARY(name, func, int32, float64, float64) \
164
+ CPU_DEVICE_UNARYC(name, func, int32, complex128, complex128) \
165
+ \
166
+ CPU_DEVICE_UNARY(name, func, int64, int64, int64) \
167
+ \
168
+ CPU_DEVICE_UNARY(name, tfunc, bfloat16, bfloat16, bfloat16) \
169
+ CPU_DEVICE_UNARY(name, func, bfloat16, float32, float32) \
170
+ CPU_DEVICE_UNARY(name, func, bfloat16, float64, float64) \
171
+ CPU_DEVICE_UNARYC(name, func, bfloat16, complex64, complex64) \
172
+ CPU_DEVICE_UNARYC(name, func, bfloat16, complex128, complex128) \
173
+ \
174
+ CPU_DEVICE_NOIMPL(name, hfunc, float16, float16, float16) \
175
+ CPU_DEVICE_NOIMPL(name, func, float16, float32, float32) \
176
+ CPU_DEVICE_NOIMPL(name, func, float16, float64, float64) \
177
+ CPU_DEVICE_NOIMPL(name, func, float16, complex32, complex32) \
178
+ CPU_DEVICE_NOIMPL(name, func, float16, complex64, complex64) \
179
+ CPU_DEVICE_NOIMPL(name, func, float16, complex128, complex128) \
180
+ \
181
+ CPU_DEVICE_UNARY(name, func, float32, float32, float32) \
182
+ CPU_DEVICE_UNARY(name, func, float32, float64, float64) \
183
+ CPU_DEVICE_UNARYC(name, func, float32, complex64, complex64) \
184
+ CPU_DEVICE_UNARYC(name, func, float32, complex128, complex128) \
185
+ \
186
+ CPU_DEVICE_UNARY(name, func, float64, float64, float64) \
187
+ CPU_DEVICE_UNARYC(name, func, float64, complex128, complex128) \
188
+ \
189
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex32, complex32) \
190
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex64, complex64) \
191
+ CPU_DEVICE_NOIMPL(name, func, complex32, complex128, complex128) \
192
+ \
193
+ CPU_DEVICE_UNARYC(name, func, complex64, complex64, complex64) \
194
+ CPU_DEVICE_UNARYC(name, func, complex64, complex128, complex128) \
195
+ \
196
+ CPU_DEVICE_UNARYC(name, func, complex128, complex128, complex128)
197
+
198
+
199
+ /*****************************************************************************/
200
+ /* Copy */
201
+ /*****************************************************************************/
202
+
203
+ #define copy(x) x
204
+ CPU_DEVICE_ALL_UNARY(copy, copy, copy, copy, copy)
205
+
206
+
207
+ /*****************************************************************************/
208
+ /* Abs */
209
+ /*****************************************************************************/
210
+
211
+ CPU_DEVICE_ALL_UNARY(abs, std::abs, copy, tf::fabs, std::abs)
212
+
213
+
214
+ /*****************************************************************************/
215
+ /* Bitwise NOT */
216
+ /*****************************************************************************/
217
+
218
+ #define invert(x) !x
219
+ CPU_DEVICE_UNARY(invert, invert, bool, bool, bool)
220
+ #undef invert
221
+
222
+ #define invert(x) ~x
223
+ CPU_DEVICE_UNARY(invert, invert, uint8, uint8, uint8)
224
+ CPU_DEVICE_UNARY(invert, invert, uint16, uint16, uint16)
225
+ CPU_DEVICE_UNARY(invert, invert, uint32, uint32, uint32)
226
+ CPU_DEVICE_UNARY(invert, invert, uint64, uint64, uint64)
227
+
228
+ CPU_DEVICE_UNARY(invert, invert, int8, int8, int8)
229
+ CPU_DEVICE_UNARY(invert, invert, int16, int16, int16)
230
+ CPU_DEVICE_UNARY(invert, invert, int32, int32, int32)
231
+ CPU_DEVICE_UNARY(invert, invert, int64, int64, int64)
232
+
233
+
234
+ /*****************************************************************************/
235
+ /* Negative */
236
+ /*****************************************************************************/
237
+
238
+ #define negative(x) -x
239
+ CPU_DEVICE_UNARY(negative, negative, uint8, int16, int16)
240
+ CPU_DEVICE_UNARY(negative, negative, uint16, int32, int32)
241
+ CPU_DEVICE_UNARY(negative, negative, uint32, int64, int64)
242
+
243
+ CPU_DEVICE_UNARY(negative, negative, int8, int8, int8)
244
+ CPU_DEVICE_UNARY(negative, negative, int16, int16, int16)
245
+ CPU_DEVICE_UNARY(negative, negative, int32, int32, int32)
246
+ CPU_DEVICE_UNARY(negative, negative, int64, int64, int64)
247
+
248
+ CPU_DEVICE_UNARY(negative, negative, bfloat16, bfloat16, bfloat16)
249
+ CPU_DEVICE_NOIMPL(negative, negative, float16, float16, float16)
250
+ CPU_DEVICE_UNARY(negative, negative, float32, float32, float32)
251
+ CPU_DEVICE_UNARY(negative, negative, float64, float64, float64)
252
+
253
+ CPU_DEVICE_NOIMPL(negative, negative, complex32, complex32, complex32)
254
+ CPU_DEVICE_UNARYC(negative, negative, complex64, complex64, complex64)
255
+ CPU_DEVICE_UNARYC(negative, negative, complex128, complex128, complex128)
256
+
257
+
258
+ /*****************************************************************************/
259
+ /* Math */
260
+ /*****************************************************************************/
261
+
262
+ #define CPU_DEVICE_UNARY_ALL_REAL_MATH(name) \
263
+ CPU_DEVICE_UNARY(name##f, name##f, uint16, float32, float32) \
264
+ CPU_DEVICE_UNARY(name##f, name##f, int16, float32, float32) \
265
+ CPU_DEVICE_UNARY(name##b16, tf::name, bfloat16, bfloat16, bfloat16) \
266
+ CPU_DEVICE_UNARY(name##f, name##f, float32, float32, float32) \
267
+ CPU_DEVICE_UNARY(name, name, uint32, float64, float64) \
268
+ CPU_DEVICE_UNARY(name, name, int32, float64, float64) \
269
+ CPU_DEVICE_UNARY(name, name, float64, float64, float64)
270
+
271
+ #define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(name) \
272
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(name) \
273
+ CPU_DEVICE_NOIMPL(name, name, complex32, complex32, complex32) \
274
+ CPU_DEVICE_UNARYC(name, name, complex64, complex64, complex64) \
275
+ CPU_DEVICE_UNARYC(name, name, complex128, complex128, complex128) \
276
+
277
+ #define CPU_DEVICE_UNARY_ALL_HALF_MATH(name, hfunc) \
278
+ CPU_DEVICE_UNARY(name##f16, hfunc, uint8, float16, float16) \
279
+ CPU_DEVICE_UNARY(name##f16, hfunc, int8, float16, float16) \
280
+ CPU_DEVICE_UNARY(name##f16, hfunc, float16, float16, float16)
281
+
282
+ #define CPU_DEVICE_UNARY_ALL_REAL_MATH_WITH_HALF(name, hfunc) \
283
+ CPU_DEVICE_UNARY_ALL_HALF_MATH(name, hfunc) \
284
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(name)
285
+
286
+ #define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_WITH_HALF(name, hfunc) \
287
+ CPU_DEVICE_UNARY_ALL_HALF_MATH(name, hfunc) \
288
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(name)
289
+
290
+
291
+ /*****************************************************************************/
292
+ /* Abs functions */
293
+ /*****************************************************************************/
294
+
295
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(fabs)
296
+
297
+
298
+ /*****************************************************************************/
299
+ /* Exponential functions */
300
+ /*****************************************************************************/
301
+
302
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(exp)
303
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(exp2)
304
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(expm1)
305
+
306
+
307
+ /*****************************************************************************/
308
+ /* Logarithm functions */
309
+ /*****************************************************************************/
310
+
311
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(log)
312
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(log10)
313
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(log2)
314
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(log1p)
315
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(logb)
316
+
317
+
318
+ /*****************************************************************************/
319
+ /* Power functions */
320
+ /*****************************************************************************/
321
+
322
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sqrt)
323
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(cbrt)
324
+
325
+
326
+ /*****************************************************************************/
327
+ /* Trigonometric functions */
328
+ /*****************************************************************************/
329
+
330
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sin)
331
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(cos)
332
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(tan)
333
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(asin)
334
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(acos)
335
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(atan)
336
+
337
+
338
+ /*****************************************************************************/
339
+ /* Hyperbolic functions */
340
+ /*****************************************************************************/
341
+
342
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(sinh)
343
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(cosh)
344
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(tanh)
345
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(asinh)
346
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(acosh)
347
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH(atanh)
348
+
349
+
350
+ /*****************************************************************************/
351
+ /* Error and gamma functions */
352
+ /*****************************************************************************/
353
+
354
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(erf)
355
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(erfc)
356
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(lgamma)
357
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(tgamma)
358
+
359
+
360
+ /*****************************************************************************/
361
+ /* Ceiling, floor, trunc */
362
+ /*****************************************************************************/
363
+
364
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(ceil)
365
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(floor)
366
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(trunc)
367
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(round)
368
+ CPU_DEVICE_UNARY_ALL_REAL_MATH(nearbyint)
@@ -0,0 +1,335 @@
1
+ /*
2
+ * BSD 3-Clause License
3
+ *
4
+ * Copyright (c) 2017-2018, plures
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ *
10
+ * 1. Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ *
13
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ * this list of conditions and the following disclaimer in the documentation
15
+ * and/or other materials provided with the distribution.
16
+ *
17
+ * 3. Neither the name of the copyright holder nor the names of its
18
+ * contributors may be used to endorse or promote products derived from
19
+ * this software without specific prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+
34
+ #ifndef CPU_DEVICE_UNARY_H
35
+ #define CPU_DEVICE_UNARY_H
36
+
37
+
38
+ #ifdef __cplusplus
39
+ #include <cinttypes>
40
+ #include <complex>
41
+ #include "contrib/bfloat16.h"
42
+ typedef tf::bfloat16 bfloat16_t;
43
+ typedef std::complex<float> complex64_t;
44
+ typedef std::complex<double> complex128_t;
45
+ #else
46
+ #include <stdint.h>
47
+ #endif
48
+
49
+
50
+ typedef bool bool_t;
51
+ typedef float float32_t;
52
+ typedef double float64_t;
53
+
54
+
55
+ /*****************************************************************************/
56
+ /* Cuda device kernel signature */
57
+ /*****************************************************************************/
58
+
59
+ #ifdef __cplusplus
60
+ #define CPU_DEVICE_UNARY_DECL(name, t0, t1) \
61
+ extern "C" void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1, \
62
+ const int64_t N); \
63
+ extern "C" void gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1, \
64
+ const int64_t s0, const int64_t s1, \
65
+ const int64_t N); \
66
+ extern "C" void gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1);
67
+ #else
68
+ #define CPU_DEVICE_UNARY_DECL(name, t0, t1) \
69
+ void gm_cpu_device_fixed_1D_C_##name##_##t0##_##t1(const char *a0, char *a1, \
70
+ const int64_t N); \
71
+ void gm_cpu_device_fixed_1D_S_##name##_##t0##_##t1(const char *a0, char *a1, \
72
+ const int64_t s0, const int64_t s1, \
73
+ const int64_t N); \
74
+ void gm_cpu_device_0D_##name##_##t0##_##t1(const char *a0, char *a1);
75
+ #endif
76
+
77
+ #define CPU_DEVICE_UNARY_NOIMPL_DECL(name, t0, t1)
78
+
79
+
80
+ /*****************************************************************************/
81
+ /* Copy */
82
+ /*****************************************************************************/
83
+
84
+ #define CPU_DEVICE_ALL_UNARY_ALL_DECL(name) \
85
+ CPU_DEVICE_UNARY_DECL(name, bool, bool) \
86
+ CPU_DEVICE_UNARY_DECL(name, bool, uint8) \
87
+ CPU_DEVICE_UNARY_DECL(name, bool, uint16) \
88
+ CPU_DEVICE_UNARY_DECL(name, bool, uint32) \
89
+ CPU_DEVICE_UNARY_DECL(name, bool, uint64) \
90
+ CPU_DEVICE_UNARY_DECL(name, bool, int8) \
91
+ CPU_DEVICE_UNARY_DECL(name, bool, int16) \
92
+ CPU_DEVICE_UNARY_DECL(name, bool, int32) \
93
+ CPU_DEVICE_UNARY_DECL(name, bool, int64) \
94
+ CPU_DEVICE_UNARY_DECL(name, bool, bfloat16) \
95
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, bool, float16) \
96
+ CPU_DEVICE_UNARY_DECL(name, bool, float32) \
97
+ CPU_DEVICE_UNARY_DECL(name, bool, float64) \
98
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, bool, complex32) \
99
+ CPU_DEVICE_UNARY_DECL(name, bool, complex64) \
100
+ CPU_DEVICE_UNARY_DECL(name, bool, complex128) \
101
+ CPU_DEVICE_UNARY_DECL(name, uint8, uint8) \
102
+ CPU_DEVICE_UNARY_DECL(name, uint8, uint16) \
103
+ CPU_DEVICE_UNARY_DECL(name, uint8, uint32) \
104
+ CPU_DEVICE_UNARY_DECL(name, uint8, uint64) \
105
+ CPU_DEVICE_UNARY_DECL(name, uint8, int16) \
106
+ CPU_DEVICE_UNARY_DECL(name, uint8, int32) \
107
+ CPU_DEVICE_UNARY_DECL(name, uint8, int64) \
108
+ CPU_DEVICE_UNARY_DECL(name, uint8, bfloat16) \
109
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, uint8, float16) \
110
+ CPU_DEVICE_UNARY_DECL(name, uint8, float32) \
111
+ CPU_DEVICE_UNARY_DECL(name, uint8, float64) \
112
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, uint8, complex32) \
113
+ CPU_DEVICE_UNARY_DECL(name, uint8, complex64) \
114
+ CPU_DEVICE_UNARY_DECL(name, uint8, complex128) \
115
+ CPU_DEVICE_UNARY_DECL(name, uint16, uint16) \
116
+ CPU_DEVICE_UNARY_DECL(name, uint16, uint32) \
117
+ CPU_DEVICE_UNARY_DECL(name, uint16, uint64) \
118
+ CPU_DEVICE_UNARY_DECL(name, uint16, int32) \
119
+ CPU_DEVICE_UNARY_DECL(name, uint16, int64) \
120
+ CPU_DEVICE_UNARY_DECL(name, uint16, float32) \
121
+ CPU_DEVICE_UNARY_DECL(name, uint16, float64) \
122
+ CPU_DEVICE_UNARY_DECL(name, uint16, complex64) \
123
+ CPU_DEVICE_UNARY_DECL(name, uint16, complex128) \
124
+ CPU_DEVICE_UNARY_DECL(name, uint32, uint32) \
125
+ CPU_DEVICE_UNARY_DECL(name, uint32, uint64) \
126
+ CPU_DEVICE_UNARY_DECL(name, uint32, int64) \
127
+ CPU_DEVICE_UNARY_DECL(name, uint32, float64) \
128
+ CPU_DEVICE_UNARY_DECL(name, uint32, complex128) \
129
+ CPU_DEVICE_UNARY_DECL(name, uint64, uint64) \
130
+ CPU_DEVICE_UNARY_DECL(name, int8, int8) \
131
+ CPU_DEVICE_UNARY_DECL(name, int8, int16) \
132
+ CPU_DEVICE_UNARY_DECL(name, int8, int32) \
133
+ CPU_DEVICE_UNARY_DECL(name, int8, int64) \
134
+ CPU_DEVICE_UNARY_DECL(name, int8, bfloat16) \
135
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, int8, float16) \
136
+ CPU_DEVICE_UNARY_DECL(name, int8, float32) \
137
+ CPU_DEVICE_UNARY_DECL(name, int8, float64) \
138
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, int8, complex32) \
139
+ CPU_DEVICE_UNARY_DECL(name, int8, complex64) \
140
+ CPU_DEVICE_UNARY_DECL(name, int8, complex128) \
141
+ CPU_DEVICE_UNARY_DECL(name, int16, int16) \
142
+ CPU_DEVICE_UNARY_DECL(name, int16, int32) \
143
+ CPU_DEVICE_UNARY_DECL(name, int16, int64) \
144
+ CPU_DEVICE_UNARY_DECL(name, int16, float32) \
145
+ CPU_DEVICE_UNARY_DECL(name, int16, float64) \
146
+ CPU_DEVICE_UNARY_DECL(name, int16, complex64) \
147
+ CPU_DEVICE_UNARY_DECL(name, int16, complex128) \
148
+ CPU_DEVICE_UNARY_DECL(name, int32, int32) \
149
+ CPU_DEVICE_UNARY_DECL(name, int32, int64) \
150
+ CPU_DEVICE_UNARY_DECL(name, int32, float64) \
151
+ CPU_DEVICE_UNARY_DECL(name, int32, complex128) \
152
+ CPU_DEVICE_UNARY_DECL(name, int64, int64) \
153
+ CPU_DEVICE_UNARY_DECL(name, bfloat16, bfloat16) \
154
+ CPU_DEVICE_UNARY_DECL(name, bfloat16, float32) \
155
+ CPU_DEVICE_UNARY_DECL(name, bfloat16, float64) \
156
+ CPU_DEVICE_UNARY_DECL(name, bfloat16, complex64) \
157
+ CPU_DEVICE_UNARY_DECL(name, bfloat16, complex128) \
158
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, float16) \
159
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, float32) \
160
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, float64) \
161
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, complex32) \
162
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, complex64) \
163
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, float16, complex128) \
164
+ CPU_DEVICE_UNARY_DECL(name, float32, float32) \
165
+ CPU_DEVICE_UNARY_DECL(name, float32, float64) \
166
+ CPU_DEVICE_UNARY_DECL(name, float32, complex64) \
167
+ CPU_DEVICE_UNARY_DECL(name, float32, complex128) \
168
+ CPU_DEVICE_UNARY_DECL(name, float64, float64) \
169
+ CPU_DEVICE_UNARY_DECL(name, float64, complex128) \
170
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, complex32, complex32) \
171
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, complex32, complex64) \
172
+ CPU_DEVICE_UNARY_NOIMPL_DECL(name, complex32, complex128) \
173
+ CPU_DEVICE_UNARY_DECL(name, complex64, complex64) \
174
+ CPU_DEVICE_UNARY_DECL(name, complex64, complex128) \
175
+ CPU_DEVICE_UNARY_DECL(name, complex128, complex128)
176
+
177
+
178
+ CPU_DEVICE_ALL_UNARY_ALL_DECL(copy)
179
+ CPU_DEVICE_ALL_UNARY_ALL_DECL(abs)
180
+
181
+
182
+ /*****************************************************************************/
183
+ /* Bitwise NOT */
184
+ /*****************************************************************************/
185
+
186
+ CPU_DEVICE_UNARY_DECL(invert, bool, bool)
187
+
188
+ CPU_DEVICE_UNARY_DECL(invert, uint8, uint8)
189
+ CPU_DEVICE_UNARY_DECL(invert, uint16, uint16)
190
+ CPU_DEVICE_UNARY_DECL(invert, uint32, uint32)
191
+ CPU_DEVICE_UNARY_DECL(invert, uint64, uint64)
192
+
193
+ CPU_DEVICE_UNARY_DECL(invert, int8, int8)
194
+ CPU_DEVICE_UNARY_DECL(invert, int16, int16)
195
+ CPU_DEVICE_UNARY_DECL(invert, int32, int32)
196
+ CPU_DEVICE_UNARY_DECL(invert, int64, int64)
197
+
198
+
199
+ /*****************************************************************************/
200
+ /* Negative */
201
+ /*****************************************************************************/
202
+
203
+ CPU_DEVICE_UNARY_DECL(negative, uint8, int16)
204
+ CPU_DEVICE_UNARY_DECL(negative, uint16, int32)
205
+ CPU_DEVICE_UNARY_DECL(negative, uint32, int64)
206
+
207
+ CPU_DEVICE_UNARY_DECL(negative, int8, int8)
208
+ CPU_DEVICE_UNARY_DECL(negative, int16, int16)
209
+ CPU_DEVICE_UNARY_DECL(negative, int32, int32)
210
+ CPU_DEVICE_UNARY_DECL(negative, int64, int64)
211
+
212
+ CPU_DEVICE_UNARY_DECL(negative, bfloat16, bfloat16)
213
+ CPU_DEVICE_UNARY_NOIMPL_DECL(negative, float16, float16)
214
+ CPU_DEVICE_UNARY_DECL(negative, float32, float32)
215
+ CPU_DEVICE_UNARY_DECL(negative, float64, float64)
216
+
217
+ CPU_DEVICE_UNARY_NOIMPL_DECL(negative, complex32, complex32)
218
+ CPU_DEVICE_UNARY_DECL(negative, complex64, complex64)
219
+ CPU_DEVICE_UNARY_DECL(negative, complex128, complex128)
220
+
221
+
222
+ /*****************************************************************************/
223
+ /* Math */
224
+ /*****************************************************************************/
225
+
226
+ #define CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(name) \
227
+ CPU_DEVICE_UNARY_DECL(name##f, uint16, float32) \
228
+ CPU_DEVICE_UNARY_DECL(name##f, int16, float32) \
229
+ CPU_DEVICE_UNARY_DECL(name##b16, bfloat16, bfloat16) \
230
+ CPU_DEVICE_UNARY_DECL(name##f, float32, float32) \
231
+ CPU_DEVICE_UNARY_DECL(name, uint32, float64) \
232
+ CPU_DEVICE_UNARY_DECL(name, int32, float64) \
233
+ CPU_DEVICE_UNARY_DECL(name, float64, float64)
234
+
235
+ #define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(name) \
236
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(name) \
237
+ CPU_DEVICE_UNARY_DECL(name, complex32, complex32) \
238
+ CPU_DEVICE_UNARY_DECL(name, complex64, complex64) \
239
+ CPU_DEVICE_UNARY_DECL(name, complex128, complex128)
240
+
241
+ #define CPU_DEVICE_UNARY_ALL_HALF_MATH_DECL(name) \
242
+ CPU_DEVICE_UNARY_DECL(name##f16, uint8, float16) \
243
+ CPU_DEVICE_UNARY_DECL(name##f16, int8, float16) \
244
+ CPU_DEVICE_UNARY_DECL(name##f16, float16, float16)
245
+
246
+ #define CPU_DEVICE_UNARY_ALL_REAL_MATH_WITH_HALF_DECL(name) \
247
+ CPU_DEVICE_UNARY_ALL_HALF_MATH_DECL(name) \
248
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(name) \
249
+
250
+ #define CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_WITH_HALF_DECL(name) \
251
+ CPU_DEVICE_UNARY_ALL_HALF_MATH_DECL(name) \
252
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(name) \
253
+
254
+
255
+ /*****************************************************************************/
256
+ /* Abs functions */
257
+ /*****************************************************************************/
258
+
259
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(fabs)
260
+
261
+
262
+ /*****************************************************************************/
263
+ /* Exponential functions */
264
+ /*****************************************************************************/
265
+
266
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(exp)
267
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(exp2)
268
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(expm1)
269
+
270
+
271
+ /*****************************************************************************/
272
+ /* Logarithm functions */
273
+ /*****************************************************************************/
274
+
275
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(log)
276
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(log10)
277
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(log2)
278
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(log1p)
279
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(logb)
280
+
281
+
282
+ /*****************************************************************************/
283
+ /* Power functions */
284
+ /*****************************************************************************/
285
+
286
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(sqrt)
287
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(cbrt)
288
+
289
+
290
+ /*****************************************************************************/
291
+ /* Trigonometric functions */
292
+ /*****************************************************************************/
293
+
294
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(sin)
295
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(cos)
296
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(tan)
297
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(asin)
298
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(acos)
299
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(atan)
300
+
301
+
302
+ /*****************************************************************************/
303
+ /* Hyperbolic functions */
304
+ /*****************************************************************************/
305
+
306
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(sinh)
307
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(cosh)
308
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(tanh)
309
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(asinh)
310
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(acosh)
311
+ CPU_DEVICE_UNARY_ALL_COMPLEX_MATH_DECL(atanh)
312
+
313
+
314
+ /*****************************************************************************/
315
+ /* Error and gamma functions */
316
+ /*****************************************************************************/
317
+
318
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(erf)
319
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(erfc)
320
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(lgamma)
321
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(tgamma)
322
+
323
+
324
+ /*****************************************************************************/
325
+ /* Ceiling, floor, trunc */
326
+ /*****************************************************************************/
327
+
328
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(ceil)
329
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(floor)
330
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(trunc)
331
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(round)
332
+ CPU_DEVICE_UNARY_ALL_REAL_MATH_DECL(nearbyint)
333
+
334
+
335
+ #endif /* CPU_DEVICE_UNARY_H */