gumath 0.2.0dev5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +61 -0
- data/Gemfile +5 -0
- data/History.md +0 -0
- data/README.md +5 -0
- data/Rakefile +105 -0
- data/ext/ruby_gumath/examples.c +126 -0
- data/ext/ruby_gumath/extconf.rb +97 -0
- data/ext/ruby_gumath/functions.c +106 -0
- data/ext/ruby_gumath/gufunc_object.c +79 -0
- data/ext/ruby_gumath/gufunc_object.h +55 -0
- data/ext/ruby_gumath/gumath/AUTHORS.txt +5 -0
- data/ext/ruby_gumath/gumath/INSTALL.txt +42 -0
- data/ext/ruby_gumath/gumath/LICENSE.txt +29 -0
- data/ext/ruby_gumath/gumath/MANIFEST.in +3 -0
- data/ext/ruby_gumath/gumath/Makefile.in +62 -0
- data/ext/ruby_gumath/gumath/README.rst +20 -0
- data/ext/ruby_gumath/gumath/config.guess +1530 -0
- data/ext/ruby_gumath/gumath/config.h.in +52 -0
- data/ext/ruby_gumath/gumath/config.sub +1782 -0
- data/ext/ruby_gumath/gumath/configure +5049 -0
- data/ext/ruby_gumath/gumath/configure.ac +167 -0
- data/ext/ruby_gumath/gumath/doc/_static/copybutton.js +66 -0
- data/ext/ruby_gumath/gumath/doc/conf.py +26 -0
- data/ext/ruby_gumath/gumath/doc/gumath/functions.rst +62 -0
- data/ext/ruby_gumath/gumath/doc/gumath/index.rst +26 -0
- data/ext/ruby_gumath/gumath/doc/index.rst +45 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/data-structures.rst +130 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/functions.rst +78 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/index.rst +25 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/kernels.rst +41 -0
- data/ext/ruby_gumath/gumath/doc/releases/index.rst +11 -0
- data/ext/ruby_gumath/gumath/install-sh +527 -0
- data/ext/ruby_gumath/gumath/libgumath/Makefile.in +170 -0
- data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +160 -0
- data/ext/ruby_gumath/gumath/libgumath/apply.c +201 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +130 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/examples.c +176 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +393 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +140 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/quaternion.c +156 -0
- data/ext/ruby_gumath/gumath/libgumath/func.c +177 -0
- data/ext/ruby_gumath/gumath/libgumath/gumath.h +205 -0
- data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +547 -0
- data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +449 -0
- data/ext/ruby_gumath/gumath/libgumath/nploops.c +219 -0
- data/ext/ruby_gumath/gumath/libgumath/tbl.c +223 -0
- data/ext/ruby_gumath/gumath/libgumath/thread.c +175 -0
- data/ext/ruby_gumath/gumath/libgumath/xndloops.c +130 -0
- data/ext/ruby_gumath/gumath/python/extending.py +24 -0
- data/ext/ruby_gumath/gumath/python/gumath/__init__.py +74 -0
- data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +577 -0
- data/ext/ruby_gumath/gumath/python/gumath/examples.c +93 -0
- data/ext/ruby_gumath/gumath/python/gumath/functions.c +77 -0
- data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +95 -0
- data/ext/ruby_gumath/gumath/python/test_gumath.py +405 -0
- data/ext/ruby_gumath/gumath/setup.py +298 -0
- data/ext/ruby_gumath/gumath/vcbuild/INSTALL.txt +36 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcbuild32.bat +21 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcbuild64.bat +21 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcclean.bat +10 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcdistclean.bat +11 -0
- data/ext/ruby_gumath/include/gumath.h +205 -0
- data/ext/ruby_gumath/include/ruby_gumath.h +41 -0
- data/ext/ruby_gumath/lib/libgumath.a +0 -0
- data/ext/ruby_gumath/lib/libgumath.so +1 -0
- data/ext/ruby_gumath/lib/libgumath.so.0 +1 -0
- data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
- data/ext/ruby_gumath/ruby_gumath.c +295 -0
- data/ext/ruby_gumath/ruby_gumath.h +41 -0
- data/ext/ruby_gumath/ruby_gumath_internal.h +45 -0
- data/ext/ruby_gumath/util.c +68 -0
- data/ext/ruby_gumath/util.h +48 -0
- data/gumath.gemspec +47 -0
- data/lib/gumath.rb +7 -0
- data/lib/gumath/version.rb +5 -0
- data/lib/ruby_gumath.so +0 -0
- metadata +206 -0
@@ -0,0 +1,449 @@
|
|
1
|
+
/*
|
2
|
+
* BSD 3-Clause License
|
3
|
+
*
|
4
|
+
* Copyright (c) 2017-2018, plures
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
*
|
10
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
* this list of conditions and the following disclaimer.
|
12
|
+
*
|
13
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
* this list of conditions and the following disclaimer in the documentation
|
15
|
+
* and/or other materials provided with the distribution.
|
16
|
+
*
|
17
|
+
* 3. Neither the name of the copyright holder nor the names of its
|
18
|
+
* contributors may be used to endorse or promote products derived from
|
19
|
+
* this software without specific prior written permission.
|
20
|
+
*
|
21
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdint.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <math.h>
|
38
|
+
#include <complex.h>
|
39
|
+
#include <inttypes.h>
|
40
|
+
#include "ndtypes.h"
|
41
|
+
#include "xnd.h"
|
42
|
+
#include "gumath.h"
|
43
|
+
|
44
|
+
|
45
|
+
/****************************************************************************/
|
46
|
+
/* Optimized dispatch (T -> T) */
|
47
|
+
/****************************************************************************/
|
48
|
+
|
49
|
+
/* Structured kernel locations for fast lookup. */
|
50
|
+
static ndt_t *
|
51
|
+
infer_id_return(int *base, const ndt_t *in, ndt_context_t *ctx)
|
52
|
+
{
|
53
|
+
ndt_t *dtype;
|
54
|
+
enum ndt tag;
|
55
|
+
|
56
|
+
switch (ndt_dtype(in)->tag) {
|
57
|
+
case Int8: *base = 0; tag = Int8; break;
|
58
|
+
case Int16: *base = 2; tag = Int16; break;
|
59
|
+
case Int32: *base = 4; tag = Int32; break;
|
60
|
+
case Int64: *base = 6; tag = Int64; break;
|
61
|
+
case Uint8: *base = 8; tag = Uint8; break;
|
62
|
+
case Uint16: *base = 10; tag = Uint16; break;
|
63
|
+
case Uint32: *base = 12; tag = Uint32; break;
|
64
|
+
case Uint64: *base = 14; tag = Uint64; break;
|
65
|
+
case Float32: *base = 16; tag = Float32; break;
|
66
|
+
case Float64: *base = 18; tag = Float64; break;
|
67
|
+
default:
|
68
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid dtype");
|
69
|
+
return NULL;
|
70
|
+
}
|
71
|
+
|
72
|
+
dtype = ndt_primitive(tag, 0, ctx);
|
73
|
+
if (dtype == NULL) {
|
74
|
+
return NULL;
|
75
|
+
}
|
76
|
+
|
77
|
+
return ndt_copy_contiguous_dtype(in, dtype, ctx);
|
78
|
+
}
|
79
|
+
|
80
|
+
|
81
|
+
/****************************************************************************/
|
82
|
+
/* Optimized dispatch (float return values) */
|
83
|
+
/****************************************************************************/
|
84
|
+
|
85
|
+
/* Structured kernel locations for fast lookup. */
|
86
|
+
static ndt_t *
|
87
|
+
infer_float_return(int *base, const ndt_t *in, ndt_context_t *ctx)
|
88
|
+
{
|
89
|
+
ndt_t *dtype;
|
90
|
+
enum ndt tag;
|
91
|
+
|
92
|
+
switch (ndt_dtype(in)->tag) {
|
93
|
+
case Int8: *base = 0; tag = Float32; break;
|
94
|
+
case Int16: *base = 2; tag = Float32; break;
|
95
|
+
case Uint8: *base = 4; tag = Float32; break;
|
96
|
+
case Uint16: *base = 6; tag = Float32; break;
|
97
|
+
case Float32: *base = 8; tag = Float32; break;
|
98
|
+
case Int32: *base = 10; tag = Float64; break;
|
99
|
+
case Uint32: *base = 12; tag = Float64; break;
|
100
|
+
case Float64: *base = 14; tag = Float64; break;
|
101
|
+
default:
|
102
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid dtype");
|
103
|
+
return NULL;
|
104
|
+
}
|
105
|
+
|
106
|
+
dtype = ndt_primitive(tag, 0, ctx);
|
107
|
+
if (dtype == NULL) {
|
108
|
+
return NULL;
|
109
|
+
}
|
110
|
+
|
111
|
+
return ndt_copy_contiguous_dtype(in, dtype, ctx);
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
/****************************************************************************/
|
116
|
+
/* Optimized typecheck */
|
117
|
+
/****************************************************************************/
|
118
|
+
|
119
|
+
static const gm_kernel_set_t *
|
120
|
+
unary_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f,
|
121
|
+
const ndt_t *in[], int nin,
|
122
|
+
ndt_t *(*infer)(int *, const ndt_t *, ndt_context_t *),
|
123
|
+
ndt_context_t *ctx)
|
124
|
+
{
|
125
|
+
const ndt_t *t;
|
126
|
+
int n;
|
127
|
+
|
128
|
+
if (nin != 1) {
|
129
|
+
ndt_err_format(ctx, NDT_ValueError,
|
130
|
+
"invalid number of arguments for %s(x): expected 1, got %d",
|
131
|
+
f->name, nin);
|
132
|
+
return NULL;
|
133
|
+
}
|
134
|
+
t = in[0];
|
135
|
+
assert(ndt_is_concrete(t));
|
136
|
+
|
137
|
+
spec->out[0] = infer(&n, t, ctx);
|
138
|
+
if (spec->out[0] == NULL) {
|
139
|
+
return NULL;
|
140
|
+
}
|
141
|
+
spec->nout = 1;
|
142
|
+
spec->nbroadcast = 0;
|
143
|
+
|
144
|
+
switch (t->tag) {
|
145
|
+
case FixedDim:
|
146
|
+
spec->flags = NDT_C|NDT_STRIDED;
|
147
|
+
spec->outer_dims = t->ndim;
|
148
|
+
if (ndt_is_c_contiguous(ndt_dim_at(t, t->ndim-1))) {
|
149
|
+
spec->flags |= NDT_ELEMWISE_1D;
|
150
|
+
}
|
151
|
+
return &f->kernels[n];
|
152
|
+
case VarDim:
|
153
|
+
spec->flags = NDT_C;
|
154
|
+
spec->outer_dims = t->ndim;
|
155
|
+
return &f->kernels[n+1];
|
156
|
+
default:
|
157
|
+
assert(t->ndim == 0);
|
158
|
+
spec->flags = NDT_C|NDT_STRIDED;
|
159
|
+
spec->outer_dims = 0;
|
160
|
+
return &f->kernels[n];
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
static const gm_kernel_set_t *
|
165
|
+
unary_id_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f,
|
166
|
+
const ndt_t *in[], int nin,
|
167
|
+
ndt_context_t *ctx)
|
168
|
+
{
|
169
|
+
return unary_typecheck(spec, f, in, nin, infer_id_return, ctx);
|
170
|
+
}
|
171
|
+
|
172
|
+
static const gm_kernel_set_t *
|
173
|
+
unary_float_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f,
|
174
|
+
const ndt_t *in[], int nin,
|
175
|
+
ndt_context_t *ctx)
|
176
|
+
{
|
177
|
+
return unary_typecheck(spec, f, in, nin, infer_float_return, ctx);
|
178
|
+
}
|
179
|
+
|
180
|
+
|
181
|
+
/****************************************************************************/
|
182
|
+
/* Generated Xnd kernels */
|
183
|
+
/****************************************************************************/
|
184
|
+
|
185
|
+
#define XSTRINGIZE(v) #v
|
186
|
+
#define STRINGIZE(v) XSTRINGIZE(v)
|
187
|
+
|
188
|
+
static inline char *
|
189
|
+
apply_index(const xnd_t *x)
|
190
|
+
{
|
191
|
+
return xnd_fixed_apply_index(x);
|
192
|
+
}
|
193
|
+
|
194
|
+
|
195
|
+
#define XND_UNARY(func, t0, t1) \
|
196
|
+
static int \
|
197
|
+
gm_##func##_0D_##t0##_##t1(xnd_t stack[], ndt_context_t *ctx) \
|
198
|
+
{ \
|
199
|
+
const xnd_t *in0 = &stack[0]; \
|
200
|
+
xnd_t *out = &stack[1]; \
|
201
|
+
(void)ctx; \
|
202
|
+
\
|
203
|
+
const t0##_t x = *(const t0##_t *)in0->ptr; \
|
204
|
+
*(t1##_t *)out->ptr = func(x); \
|
205
|
+
\
|
206
|
+
return 0; \
|
207
|
+
} \
|
208
|
+
\
|
209
|
+
static int \
|
210
|
+
gm_fixed_##func##_1D_C_##t0##_##t1(xnd_t stack[], ndt_context_t *ctx) \
|
211
|
+
{ \
|
212
|
+
const t0##_t *in0 = (const t0##_t *)apply_index(&stack[0]); \
|
213
|
+
t1##_t *out = (t1##_t *)apply_index(&stack[1]); \
|
214
|
+
int64_t N = xnd_fixed_shape(&stack[0]); \
|
215
|
+
(void)ctx; \
|
216
|
+
\
|
217
|
+
for (int64_t i = 0; i < N; i++) { \
|
218
|
+
out[i] = func(in0[i]); \
|
219
|
+
} \
|
220
|
+
\
|
221
|
+
return 0; \
|
222
|
+
}
|
223
|
+
|
224
|
+
#define XND_UNARY_INIT(funcname, func, t0, t1) \
|
225
|
+
{ .name = STRINGIZE(funcname), \
|
226
|
+
.sig = "... * " STRINGIZE(t0) " -> ... * " STRINGIZE(t1), \
|
227
|
+
.Opt = gm_fixed_##func##_1D_C_##t0##_##t1, \
|
228
|
+
.C = gm_##func##_0D_##t0##_##t1 }, \
|
229
|
+
\
|
230
|
+
{ .name = STRINGIZE(funcname), \
|
231
|
+
.sig = "var... * " STRINGIZE(t0) " -> var... * " STRINGIZE(t1), \
|
232
|
+
.C = gm_##func##_0D_##t0##_##t1 }
|
233
|
+
|
234
|
+
|
235
|
+
/*****************************************************************************/
|
236
|
+
/* Copy */
|
237
|
+
/*****************************************************************************/
|
238
|
+
|
239
|
+
#define copy(x) x
|
240
|
+
XND_UNARY(copy, int8, int8)
|
241
|
+
XND_UNARY(copy, int16, int16)
|
242
|
+
XND_UNARY(copy, int32, int32)
|
243
|
+
XND_UNARY(copy, int64, int64)
|
244
|
+
XND_UNARY(copy, uint8, uint8)
|
245
|
+
XND_UNARY(copy, uint16, uint16)
|
246
|
+
XND_UNARY(copy, uint32, uint32)
|
247
|
+
XND_UNARY(copy, uint64, uint64)
|
248
|
+
XND_UNARY(copy, float32, float32)
|
249
|
+
XND_UNARY(copy, float64, float64)
|
250
|
+
|
251
|
+
|
252
|
+
static const gm_kernel_init_t unary_id[] = {
|
253
|
+
/* COPY */
|
254
|
+
XND_UNARY_INIT(copy, copy, int8, int8),
|
255
|
+
XND_UNARY_INIT(copy, copy, int16, int16),
|
256
|
+
XND_UNARY_INIT(copy, copy, int32, int32),
|
257
|
+
XND_UNARY_INIT(copy, copy, int64, int64),
|
258
|
+
XND_UNARY_INIT(copy, copy, uint8, uint8),
|
259
|
+
XND_UNARY_INIT(copy, copy, uint16, uint16),
|
260
|
+
XND_UNARY_INIT(copy, copy, uint32, uint32),
|
261
|
+
XND_UNARY_INIT(copy, copy, uint64, uint64),
|
262
|
+
XND_UNARY_INIT(copy, copy, float32, float32),
|
263
|
+
XND_UNARY_INIT(copy, copy, float64, float64),
|
264
|
+
|
265
|
+
{ .name = NULL, .sig = NULL }
|
266
|
+
};
|
267
|
+
|
268
|
+
|
269
|
+
/*****************************************************************************/
|
270
|
+
/* Math */
|
271
|
+
/*****************************************************************************/
|
272
|
+
|
273
|
+
#define XND_ALL_UNARY_FLOAT(name) \
|
274
|
+
XND_UNARY(name##f, int8, float32) \
|
275
|
+
XND_UNARY(name##f, int16, float32) \
|
276
|
+
XND_UNARY(name##f, uint8, float32) \
|
277
|
+
XND_UNARY(name##f, uint16, float32) \
|
278
|
+
XND_UNARY(name##f, float32, float32) \
|
279
|
+
XND_UNARY(name, int32, float64) \
|
280
|
+
XND_UNARY(name, uint32, float64) \
|
281
|
+
XND_UNARY(name, float64, float64)
|
282
|
+
|
283
|
+
#define XND_ALL_UNARY_FLOAT_INIT(name) \
|
284
|
+
XND_UNARY_INIT(name, name##f, int8, float32), \
|
285
|
+
XND_UNARY_INIT(name, name##f, int16, float32), \
|
286
|
+
XND_UNARY_INIT(name, name##f, uint8, float32), \
|
287
|
+
XND_UNARY_INIT(name, name##f, uint16, float32), \
|
288
|
+
XND_UNARY_INIT(name, name##f, float32, float32), \
|
289
|
+
XND_UNARY_INIT(name, name, uint32, float64), \
|
290
|
+
XND_UNARY_INIT(name, name, int32, float64), \
|
291
|
+
XND_UNARY_INIT(name, name, float64, float64)
|
292
|
+
|
293
|
+
|
294
|
+
/*****************************************************************************/
|
295
|
+
/* Abs functions */
|
296
|
+
/*****************************************************************************/
|
297
|
+
|
298
|
+
XND_ALL_UNARY_FLOAT(fabs)
|
299
|
+
|
300
|
+
|
301
|
+
/*****************************************************************************/
|
302
|
+
/* Exponential functions */
|
303
|
+
/*****************************************************************************/
|
304
|
+
|
305
|
+
XND_ALL_UNARY_FLOAT(exp)
|
306
|
+
XND_ALL_UNARY_FLOAT(exp2)
|
307
|
+
XND_ALL_UNARY_FLOAT(expm1)
|
308
|
+
|
309
|
+
|
310
|
+
/*****************************************************************************/
|
311
|
+
/* Logarithm functions */
|
312
|
+
/*****************************************************************************/
|
313
|
+
|
314
|
+
XND_ALL_UNARY_FLOAT(log)
|
315
|
+
XND_ALL_UNARY_FLOAT(log2)
|
316
|
+
XND_ALL_UNARY_FLOAT(log10)
|
317
|
+
XND_ALL_UNARY_FLOAT(log1p)
|
318
|
+
XND_ALL_UNARY_FLOAT(logb)
|
319
|
+
|
320
|
+
|
321
|
+
/*****************************************************************************/
|
322
|
+
/* Power functions */
|
323
|
+
/*****************************************************************************/
|
324
|
+
|
325
|
+
XND_ALL_UNARY_FLOAT(sqrt)
|
326
|
+
XND_ALL_UNARY_FLOAT(cbrt)
|
327
|
+
|
328
|
+
|
329
|
+
/*****************************************************************************/
|
330
|
+
/* Trigonometric functions */
|
331
|
+
/*****************************************************************************/
|
332
|
+
|
333
|
+
XND_ALL_UNARY_FLOAT(sin)
|
334
|
+
XND_ALL_UNARY_FLOAT(cos)
|
335
|
+
XND_ALL_UNARY_FLOAT(tan)
|
336
|
+
XND_ALL_UNARY_FLOAT(asin)
|
337
|
+
XND_ALL_UNARY_FLOAT(acos)
|
338
|
+
XND_ALL_UNARY_FLOAT(atan)
|
339
|
+
|
340
|
+
|
341
|
+
/*****************************************************************************/
|
342
|
+
/* Hyperbolic functions */
|
343
|
+
/*****************************************************************************/
|
344
|
+
|
345
|
+
XND_ALL_UNARY_FLOAT(sinh)
|
346
|
+
XND_ALL_UNARY_FLOAT(cosh)
|
347
|
+
XND_ALL_UNARY_FLOAT(tanh)
|
348
|
+
XND_ALL_UNARY_FLOAT(asinh)
|
349
|
+
XND_ALL_UNARY_FLOAT(acosh)
|
350
|
+
XND_ALL_UNARY_FLOAT(atanh)
|
351
|
+
|
352
|
+
|
353
|
+
/*****************************************************************************/
|
354
|
+
/* Error and gamma functions */
|
355
|
+
/*****************************************************************************/
|
356
|
+
|
357
|
+
XND_ALL_UNARY_FLOAT(erf)
|
358
|
+
XND_ALL_UNARY_FLOAT(erfc)
|
359
|
+
XND_ALL_UNARY_FLOAT(lgamma)
|
360
|
+
XND_ALL_UNARY_FLOAT(tgamma)
|
361
|
+
|
362
|
+
|
363
|
+
/*****************************************************************************/
|
364
|
+
/* Ceiling, floor, trunc */
|
365
|
+
/*****************************************************************************/
|
366
|
+
|
367
|
+
XND_ALL_UNARY_FLOAT(ceil)
|
368
|
+
XND_ALL_UNARY_FLOAT(floor)
|
369
|
+
XND_ALL_UNARY_FLOAT(trunc)
|
370
|
+
XND_ALL_UNARY_FLOAT(round)
|
371
|
+
XND_ALL_UNARY_FLOAT(nearbyint)
|
372
|
+
|
373
|
+
|
374
|
+
static const gm_kernel_init_t unary_float[] = {
|
375
|
+
/* ABS */
|
376
|
+
XND_ALL_UNARY_FLOAT_INIT(fabs),
|
377
|
+
|
378
|
+
/* EXPONENTIAL */
|
379
|
+
XND_ALL_UNARY_FLOAT_INIT(exp),
|
380
|
+
XND_ALL_UNARY_FLOAT_INIT(exp2),
|
381
|
+
XND_ALL_UNARY_FLOAT_INIT(expm1),
|
382
|
+
|
383
|
+
/* LOGARITHM */
|
384
|
+
XND_ALL_UNARY_FLOAT_INIT(log),
|
385
|
+
XND_ALL_UNARY_FLOAT_INIT(log2),
|
386
|
+
XND_ALL_UNARY_FLOAT_INIT(log10),
|
387
|
+
XND_ALL_UNARY_FLOAT_INIT(log1p),
|
388
|
+
XND_ALL_UNARY_FLOAT_INIT(logb),
|
389
|
+
|
390
|
+
/* POWER */
|
391
|
+
XND_ALL_UNARY_FLOAT_INIT(sqrt),
|
392
|
+
XND_ALL_UNARY_FLOAT_INIT(cbrt),
|
393
|
+
|
394
|
+
/* TRIGONOMETRIC */
|
395
|
+
XND_ALL_UNARY_FLOAT_INIT(sin),
|
396
|
+
XND_ALL_UNARY_FLOAT_INIT(cos),
|
397
|
+
XND_ALL_UNARY_FLOAT_INIT(tan),
|
398
|
+
XND_ALL_UNARY_FLOAT_INIT(asin),
|
399
|
+
XND_ALL_UNARY_FLOAT_INIT(acos),
|
400
|
+
XND_ALL_UNARY_FLOAT_INIT(atan),
|
401
|
+
|
402
|
+
/* HYPERBOLIC */
|
403
|
+
XND_ALL_UNARY_FLOAT_INIT(sinh),
|
404
|
+
XND_ALL_UNARY_FLOAT_INIT(cosh),
|
405
|
+
XND_ALL_UNARY_FLOAT_INIT(tanh),
|
406
|
+
XND_ALL_UNARY_FLOAT_INIT(asinh),
|
407
|
+
XND_ALL_UNARY_FLOAT_INIT(acosh),
|
408
|
+
XND_ALL_UNARY_FLOAT_INIT(atanh),
|
409
|
+
|
410
|
+
/* ERROR AND GAMMA */
|
411
|
+
XND_ALL_UNARY_FLOAT_INIT(erf),
|
412
|
+
XND_ALL_UNARY_FLOAT_INIT(erfc),
|
413
|
+
XND_ALL_UNARY_FLOAT_INIT(lgamma),
|
414
|
+
XND_ALL_UNARY_FLOAT_INIT(tgamma),
|
415
|
+
|
416
|
+
/* CEILING, FLOOR, TRUNC */
|
417
|
+
XND_ALL_UNARY_FLOAT_INIT(ceil),
|
418
|
+
XND_ALL_UNARY_FLOAT_INIT(floor),
|
419
|
+
XND_ALL_UNARY_FLOAT_INIT(trunc),
|
420
|
+
XND_ALL_UNARY_FLOAT_INIT(round),
|
421
|
+
XND_ALL_UNARY_FLOAT_INIT(nearbyint),
|
422
|
+
|
423
|
+
{ .name = NULL, .sig = NULL }
|
424
|
+
};
|
425
|
+
|
426
|
+
|
427
|
+
/****************************************************************************/
|
428
|
+
/* Initialize kernel table */
|
429
|
+
/****************************************************************************/
|
430
|
+
|
431
|
+
int
|
432
|
+
gm_init_unary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx)
|
433
|
+
{
|
434
|
+
const gm_kernel_init_t *k;
|
435
|
+
|
436
|
+
for (k = unary_id; k->name != NULL; k++) {
|
437
|
+
if (gm_add_kernel_typecheck(tbl, k, ctx, &unary_id_typecheck) < 0) {
|
438
|
+
return -1;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
|
442
|
+
for (k = unary_float; k->name != NULL; k++) {
|
443
|
+
if (gm_add_kernel_typecheck(tbl, k, ctx, &unary_float_typecheck) < 0) {
|
444
|
+
return -1;
|
445
|
+
}
|
446
|
+
}
|
447
|
+
|
448
|
+
return 0;
|
449
|
+
}
|
@@ -0,0 +1,219 @@
|
|
1
|
+
/*
|
2
|
+
* BSD 3-Clause License
|
3
|
+
*
|
4
|
+
* Copyright (c) 2017-2018, plures
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
*
|
10
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
* this list of conditions and the following disclaimer.
|
12
|
+
*
|
13
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
* this list of conditions and the following disclaimer in the documentation
|
15
|
+
* and/or other materials provided with the distribution.
|
16
|
+
*
|
17
|
+
* 3. Neither the name of the copyright holder nor the names of its
|
18
|
+
* contributors may be used to endorse or promote products derived from
|
19
|
+
* this software without specific prior written permission.
|
20
|
+
*
|
21
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdint.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <inttypes.h>
|
38
|
+
#include "ndtypes.h"
|
39
|
+
#include "xnd.h"
|
40
|
+
#include "gumath.h"
|
41
|
+
|
42
|
+
|
43
|
+
/* Loops and functions for NumPy strided kernels. */
|
44
|
+
|
45
|
+
|
46
|
+
#define ASSIGN_OVERFLOW(array, index, maxindex, value, maxvalue, ctx) \
|
47
|
+
do { \
|
48
|
+
if (index >= maxindex) { \
|
49
|
+
ndt_err_format(ctx, NDT_RuntimeError, "unexpected array overflow"); \
|
50
|
+
return -1; \
|
51
|
+
} \
|
52
|
+
if (value >= maxvalue) { \
|
53
|
+
ndt_err_format(ctx, NDT_RuntimeError, "unexpected intptr overflow"); \
|
54
|
+
return -1; \
|
55
|
+
} \
|
56
|
+
array[index++] = (intptr_t)value; \
|
57
|
+
} while (0)
|
58
|
+
|
59
|
+
|
60
|
+
typedef struct {
|
61
|
+
int ndim;
|
62
|
+
int64_t itemsize;
|
63
|
+
int64_t nelem;
|
64
|
+
int64_t shape[NDT_MAX_DIM];
|
65
|
+
int64_t strides[NDT_MAX_DIM];
|
66
|
+
char *ptr;
|
67
|
+
} gm_ndarray_t;
|
68
|
+
|
69
|
+
|
70
|
+
static int
|
71
|
+
gm_as_ndarray(gm_ndarray_t *a, const xnd_t *x, ndt_context_t *ctx)
|
72
|
+
{
|
73
|
+
const ndt_t *t = x->type;
|
74
|
+
int i;
|
75
|
+
|
76
|
+
assert(t->ndim <= NDT_MAX_DIM);
|
77
|
+
|
78
|
+
if (ndt_is_abstract(t)) {
|
79
|
+
ndt_err_format(ctx, NDT_TypeError, "type is not an ndarray");
|
80
|
+
return -1;
|
81
|
+
}
|
82
|
+
|
83
|
+
if (!ndt_is_ndarray(t)) {
|
84
|
+
ndt_err_format(ctx, NDT_TypeError, "type is not an ndarray");
|
85
|
+
return -1;
|
86
|
+
}
|
87
|
+
|
88
|
+
if (t->ndim == 0) {
|
89
|
+
a->ndim = 1;
|
90
|
+
a->itemsize = t->datasize;
|
91
|
+
a->nelem = 1;
|
92
|
+
a->shape[0] = 1;
|
93
|
+
a->strides[0] = 0;
|
94
|
+
a->ptr = x->ptr + x->index * t->datasize;
|
95
|
+
return 0;
|
96
|
+
}
|
97
|
+
|
98
|
+
a->ndim = t->ndim;
|
99
|
+
a->itemsize = t->Concrete.FixedDim.itemsize;
|
100
|
+
a->nelem = t->datasize / t->Concrete.FixedDim.itemsize;
|
101
|
+
a->ptr = x->ptr + x->index * a->itemsize;
|
102
|
+
|
103
|
+
for (i=0; t->ndim > 0; i++, t=t->FixedDim.type) {
|
104
|
+
a->shape[i] = t->FixedDim.shape;
|
105
|
+
a->strides[i] = t->Concrete.FixedDim.step * a->itemsize;
|
106
|
+
}
|
107
|
+
|
108
|
+
return 0;
|
109
|
+
}
|
110
|
+
|
111
|
+
/*
|
112
|
+
* Convert an xnd container into the {args, dimensions, strides} representation.
|
113
|
+
*/
|
114
|
+
int
|
115
|
+
gm_np_convert_xnd(char **args, const int nargs,
|
116
|
+
intptr_t *dimensions, const int dims_size,
|
117
|
+
intptr_t *steps, const int steps_size,
|
118
|
+
xnd_t stack[], const int outer_dims,
|
119
|
+
ndt_context_t *ctx)
|
120
|
+
{
|
121
|
+
ALLOCA(gm_ndarray_t, nd, nargs);
|
122
|
+
int64_t shape;
|
123
|
+
int n = 0, m = 0;
|
124
|
+
int i, k;
|
125
|
+
|
126
|
+
if (nargs == 0) {
|
127
|
+
return 0;
|
128
|
+
}
|
129
|
+
|
130
|
+
for (i = 0; i < nargs; i++) {
|
131
|
+
if (gm_as_ndarray(&nd[i], &stack[i], ctx) < 0) {
|
132
|
+
return -1;
|
133
|
+
}
|
134
|
+
args[i] = nd[i].ptr;
|
135
|
+
}
|
136
|
+
|
137
|
+
for (i = 0; i < outer_dims; i++) {
|
138
|
+
shape = nd[0].shape[i];
|
139
|
+
ASSIGN_OVERFLOW(dimensions, n, dims_size, shape, INTPTR_MAX, ctx);
|
140
|
+
|
141
|
+
for (k = 0; k < nargs; k++) {
|
142
|
+
if (nd[k].shape[i] != shape) {
|
143
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
144
|
+
"unexpected shape mismatch in outer dimensions");
|
145
|
+
return -1;
|
146
|
+
}
|
147
|
+
|
148
|
+
ASSIGN_OVERFLOW(steps, m, steps_size, nd[k].strides[i], INTPTR_MAX, ctx);
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
for (i = 0; i < nargs; i++) {
|
153
|
+
for (k = outer_dims; k < nd[i].ndim; k++) {
|
154
|
+
ASSIGN_OVERFLOW(dimensions, n, dims_size, nd[i].shape[k], INTPTR_MAX, ctx);
|
155
|
+
ASSIGN_OVERFLOW(steps, m, steps_size, nd[i].strides[k], INTPTR_MAX, ctx);
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
return 0;
|
160
|
+
}
|
161
|
+
|
162
|
+
/*
|
163
|
+
* Flatten an xnd container into a 1D representation for direct elementwise
|
164
|
+
* kernel application. A scalar is expanded into a 1D array of size 1.
|
165
|
+
*/
|
166
|
+
int
|
167
|
+
gm_np_flatten(char **args, const int nargs,
|
168
|
+
int64_t *dimensions,
|
169
|
+
int64_t *steps,
|
170
|
+
const xnd_t stack[],
|
171
|
+
ndt_context_t *ctx)
|
172
|
+
{
|
173
|
+
gm_ndarray_t nd;
|
174
|
+
int i;
|
175
|
+
|
176
|
+
for (i = 0; i < nargs; i++) {
|
177
|
+
if (gm_as_ndarray(&nd, &stack[i], ctx) < 0) {
|
178
|
+
return -1;
|
179
|
+
}
|
180
|
+
args[i] = nd.ptr;
|
181
|
+
dimensions[i] = nd.nelem;
|
182
|
+
steps[i] = nd.itemsize;
|
183
|
+
}
|
184
|
+
|
185
|
+
return 0;
|
186
|
+
}
|
187
|
+
|
188
|
+
int
|
189
|
+
gm_np_map(const gm_strided_kernel_t f,
|
190
|
+
char **args, int nargs,
|
191
|
+
intptr_t *dimensions,
|
192
|
+
intptr_t *steps,
|
193
|
+
void *data,
|
194
|
+
int outer_dims)
|
195
|
+
{
|
196
|
+
ALLOCA(char *, next, nargs);
|
197
|
+
intptr_t shape, i;
|
198
|
+
int ret, k;
|
199
|
+
|
200
|
+
if (outer_dims <= 1) {
|
201
|
+
return f(args, dimensions, steps, data);
|
202
|
+
}
|
203
|
+
|
204
|
+
shape = dimensions[0];
|
205
|
+
|
206
|
+
for (i = 0; i < shape; i++) {
|
207
|
+
for (k = 0; k < nargs; k++) {
|
208
|
+
next[k] = args[k] + i * steps[k];
|
209
|
+
}
|
210
|
+
|
211
|
+
ret = gm_np_map(f, next, nargs, dimensions+1, steps+nargs, data,
|
212
|
+
outer_dims-1);
|
213
|
+
if (ret != 0) {
|
214
|
+
return ret;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
return 0;
|
219
|
+
}
|