gumath 0.2.0dev5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +61 -0
- data/Gemfile +5 -0
- data/History.md +0 -0
- data/README.md +5 -0
- data/Rakefile +105 -0
- data/ext/ruby_gumath/examples.c +126 -0
- data/ext/ruby_gumath/extconf.rb +97 -0
- data/ext/ruby_gumath/functions.c +106 -0
- data/ext/ruby_gumath/gufunc_object.c +79 -0
- data/ext/ruby_gumath/gufunc_object.h +55 -0
- data/ext/ruby_gumath/gumath/AUTHORS.txt +5 -0
- data/ext/ruby_gumath/gumath/INSTALL.txt +42 -0
- data/ext/ruby_gumath/gumath/LICENSE.txt +29 -0
- data/ext/ruby_gumath/gumath/MANIFEST.in +3 -0
- data/ext/ruby_gumath/gumath/Makefile.in +62 -0
- data/ext/ruby_gumath/gumath/README.rst +20 -0
- data/ext/ruby_gumath/gumath/config.guess +1530 -0
- data/ext/ruby_gumath/gumath/config.h.in +52 -0
- data/ext/ruby_gumath/gumath/config.sub +1782 -0
- data/ext/ruby_gumath/gumath/configure +5049 -0
- data/ext/ruby_gumath/gumath/configure.ac +167 -0
- data/ext/ruby_gumath/gumath/doc/_static/copybutton.js +66 -0
- data/ext/ruby_gumath/gumath/doc/conf.py +26 -0
- data/ext/ruby_gumath/gumath/doc/gumath/functions.rst +62 -0
- data/ext/ruby_gumath/gumath/doc/gumath/index.rst +26 -0
- data/ext/ruby_gumath/gumath/doc/index.rst +45 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/data-structures.rst +130 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/functions.rst +78 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/index.rst +25 -0
- data/ext/ruby_gumath/gumath/doc/libgumath/kernels.rst +41 -0
- data/ext/ruby_gumath/gumath/doc/releases/index.rst +11 -0
- data/ext/ruby_gumath/gumath/install-sh +527 -0
- data/ext/ruby_gumath/gumath/libgumath/Makefile.in +170 -0
- data/ext/ruby_gumath/gumath/libgumath/Makefile.vc +160 -0
- data/ext/ruby_gumath/gumath/libgumath/apply.c +201 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/bfloat16.c +130 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/examples.c +176 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/graph.c +393 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/pdist.c +140 -0
- data/ext/ruby_gumath/gumath/libgumath/extending/quaternion.c +156 -0
- data/ext/ruby_gumath/gumath/libgumath/func.c +177 -0
- data/ext/ruby_gumath/gumath/libgumath/gumath.h +205 -0
- data/ext/ruby_gumath/gumath/libgumath/kernels/binary.c +547 -0
- data/ext/ruby_gumath/gumath/libgumath/kernels/unary.c +449 -0
- data/ext/ruby_gumath/gumath/libgumath/nploops.c +219 -0
- data/ext/ruby_gumath/gumath/libgumath/tbl.c +223 -0
- data/ext/ruby_gumath/gumath/libgumath/thread.c +175 -0
- data/ext/ruby_gumath/gumath/libgumath/xndloops.c +130 -0
- data/ext/ruby_gumath/gumath/python/extending.py +24 -0
- data/ext/ruby_gumath/gumath/python/gumath/__init__.py +74 -0
- data/ext/ruby_gumath/gumath/python/gumath/_gumath.c +577 -0
- data/ext/ruby_gumath/gumath/python/gumath/examples.c +93 -0
- data/ext/ruby_gumath/gumath/python/gumath/functions.c +77 -0
- data/ext/ruby_gumath/gumath/python/gumath/pygumath.h +95 -0
- data/ext/ruby_gumath/gumath/python/test_gumath.py +405 -0
- data/ext/ruby_gumath/gumath/setup.py +298 -0
- data/ext/ruby_gumath/gumath/vcbuild/INSTALL.txt +36 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcbuild32.bat +21 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcbuild64.bat +21 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcclean.bat +10 -0
- data/ext/ruby_gumath/gumath/vcbuild/vcdistclean.bat +11 -0
- data/ext/ruby_gumath/include/gumath.h +205 -0
- data/ext/ruby_gumath/include/ruby_gumath.h +41 -0
- data/ext/ruby_gumath/lib/libgumath.a +0 -0
- data/ext/ruby_gumath/lib/libgumath.so +1 -0
- data/ext/ruby_gumath/lib/libgumath.so.0 +1 -0
- data/ext/ruby_gumath/lib/libgumath.so.0.2.0dev3 +0 -0
- data/ext/ruby_gumath/ruby_gumath.c +295 -0
- data/ext/ruby_gumath/ruby_gumath.h +41 -0
- data/ext/ruby_gumath/ruby_gumath_internal.h +45 -0
- data/ext/ruby_gumath/util.c +68 -0
- data/ext/ruby_gumath/util.h +48 -0
- data/gumath.gemspec +47 -0
- data/lib/gumath.rb +7 -0
- data/lib/gumath/version.rb +5 -0
- data/lib/ruby_gumath.so +0 -0
- metadata +206 -0
@@ -0,0 +1,449 @@
|
|
1
|
+
/*
|
2
|
+
* BSD 3-Clause License
|
3
|
+
*
|
4
|
+
* Copyright (c) 2017-2018, plures
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
*
|
10
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
* this list of conditions and the following disclaimer.
|
12
|
+
*
|
13
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
* this list of conditions and the following disclaimer in the documentation
|
15
|
+
* and/or other materials provided with the distribution.
|
16
|
+
*
|
17
|
+
* 3. Neither the name of the copyright holder nor the names of its
|
18
|
+
* contributors may be used to endorse or promote products derived from
|
19
|
+
* this software without specific prior written permission.
|
20
|
+
*
|
21
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdint.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <math.h>
|
38
|
+
#include <complex.h>
|
39
|
+
#include <inttypes.h>
|
40
|
+
#include "ndtypes.h"
|
41
|
+
#include "xnd.h"
|
42
|
+
#include "gumath.h"
|
43
|
+
|
44
|
+
|
45
|
+
/****************************************************************************/
|
46
|
+
/* Optimized dispatch (T -> T) */
|
47
|
+
/****************************************************************************/
|
48
|
+
|
49
|
+
/* Structured kernel locations for fast lookup. */
|
50
|
+
static ndt_t *
|
51
|
+
infer_id_return(int *base, const ndt_t *in, ndt_context_t *ctx)
|
52
|
+
{
|
53
|
+
ndt_t *dtype;
|
54
|
+
enum ndt tag;
|
55
|
+
|
56
|
+
switch (ndt_dtype(in)->tag) {
|
57
|
+
case Int8: *base = 0; tag = Int8; break;
|
58
|
+
case Int16: *base = 2; tag = Int16; break;
|
59
|
+
case Int32: *base = 4; tag = Int32; break;
|
60
|
+
case Int64: *base = 6; tag = Int64; break;
|
61
|
+
case Uint8: *base = 8; tag = Uint8; break;
|
62
|
+
case Uint16: *base = 10; tag = Uint16; break;
|
63
|
+
case Uint32: *base = 12; tag = Uint32; break;
|
64
|
+
case Uint64: *base = 14; tag = Uint64; break;
|
65
|
+
case Float32: *base = 16; tag = Float32; break;
|
66
|
+
case Float64: *base = 18; tag = Float64; break;
|
67
|
+
default:
|
68
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid dtype");
|
69
|
+
return NULL;
|
70
|
+
}
|
71
|
+
|
72
|
+
dtype = ndt_primitive(tag, 0, ctx);
|
73
|
+
if (dtype == NULL) {
|
74
|
+
return NULL;
|
75
|
+
}
|
76
|
+
|
77
|
+
return ndt_copy_contiguous_dtype(in, dtype, ctx);
|
78
|
+
}
|
79
|
+
|
80
|
+
|
81
|
+
/****************************************************************************/
|
82
|
+
/* Optimized dispatch (float return values) */
|
83
|
+
/****************************************************************************/
|
84
|
+
|
85
|
+
/* Structured kernel locations for fast lookup. */
|
86
|
+
static ndt_t *
|
87
|
+
infer_float_return(int *base, const ndt_t *in, ndt_context_t *ctx)
|
88
|
+
{
|
89
|
+
ndt_t *dtype;
|
90
|
+
enum ndt tag;
|
91
|
+
|
92
|
+
switch (ndt_dtype(in)->tag) {
|
93
|
+
case Int8: *base = 0; tag = Float32; break;
|
94
|
+
case Int16: *base = 2; tag = Float32; break;
|
95
|
+
case Uint8: *base = 4; tag = Float32; break;
|
96
|
+
case Uint16: *base = 6; tag = Float32; break;
|
97
|
+
case Float32: *base = 8; tag = Float32; break;
|
98
|
+
case Int32: *base = 10; tag = Float64; break;
|
99
|
+
case Uint32: *base = 12; tag = Float64; break;
|
100
|
+
case Float64: *base = 14; tag = Float64; break;
|
101
|
+
default:
|
102
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid dtype");
|
103
|
+
return NULL;
|
104
|
+
}
|
105
|
+
|
106
|
+
dtype = ndt_primitive(tag, 0, ctx);
|
107
|
+
if (dtype == NULL) {
|
108
|
+
return NULL;
|
109
|
+
}
|
110
|
+
|
111
|
+
return ndt_copy_contiguous_dtype(in, dtype, ctx);
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
/****************************************************************************/
|
116
|
+
/* Optimized typecheck */
|
117
|
+
/****************************************************************************/
|
118
|
+
|
119
|
+
static const gm_kernel_set_t *
|
120
|
+
unary_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f,
|
121
|
+
const ndt_t *in[], int nin,
|
122
|
+
ndt_t *(*infer)(int *, const ndt_t *, ndt_context_t *),
|
123
|
+
ndt_context_t *ctx)
|
124
|
+
{
|
125
|
+
const ndt_t *t;
|
126
|
+
int n;
|
127
|
+
|
128
|
+
if (nin != 1) {
|
129
|
+
ndt_err_format(ctx, NDT_ValueError,
|
130
|
+
"invalid number of arguments for %s(x): expected 1, got %d",
|
131
|
+
f->name, nin);
|
132
|
+
return NULL;
|
133
|
+
}
|
134
|
+
t = in[0];
|
135
|
+
assert(ndt_is_concrete(t));
|
136
|
+
|
137
|
+
spec->out[0] = infer(&n, t, ctx);
|
138
|
+
if (spec->out[0] == NULL) {
|
139
|
+
return NULL;
|
140
|
+
}
|
141
|
+
spec->nout = 1;
|
142
|
+
spec->nbroadcast = 0;
|
143
|
+
|
144
|
+
switch (t->tag) {
|
145
|
+
case FixedDim:
|
146
|
+
spec->flags = NDT_C|NDT_STRIDED;
|
147
|
+
spec->outer_dims = t->ndim;
|
148
|
+
if (ndt_is_c_contiguous(ndt_dim_at(t, t->ndim-1))) {
|
149
|
+
spec->flags |= NDT_ELEMWISE_1D;
|
150
|
+
}
|
151
|
+
return &f->kernels[n];
|
152
|
+
case VarDim:
|
153
|
+
spec->flags = NDT_C;
|
154
|
+
spec->outer_dims = t->ndim;
|
155
|
+
return &f->kernels[n+1];
|
156
|
+
default:
|
157
|
+
assert(t->ndim == 0);
|
158
|
+
spec->flags = NDT_C|NDT_STRIDED;
|
159
|
+
spec->outer_dims = 0;
|
160
|
+
return &f->kernels[n];
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
static const gm_kernel_set_t *
|
165
|
+
unary_id_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f,
|
166
|
+
const ndt_t *in[], int nin,
|
167
|
+
ndt_context_t *ctx)
|
168
|
+
{
|
169
|
+
return unary_typecheck(spec, f, in, nin, infer_id_return, ctx);
|
170
|
+
}
|
171
|
+
|
172
|
+
static const gm_kernel_set_t *
|
173
|
+
unary_float_typecheck(ndt_apply_spec_t *spec, const gm_func_t *f,
|
174
|
+
const ndt_t *in[], int nin,
|
175
|
+
ndt_context_t *ctx)
|
176
|
+
{
|
177
|
+
return unary_typecheck(spec, f, in, nin, infer_float_return, ctx);
|
178
|
+
}
|
179
|
+
|
180
|
+
|
181
|
+
/****************************************************************************/
|
182
|
+
/* Generated Xnd kernels */
|
183
|
+
/****************************************************************************/
|
184
|
+
|
185
|
+
#define XSTRINGIZE(v) #v
|
186
|
+
#define STRINGIZE(v) XSTRINGIZE(v)
|
187
|
+
|
188
|
+
static inline char *
|
189
|
+
apply_index(const xnd_t *x)
|
190
|
+
{
|
191
|
+
return xnd_fixed_apply_index(x);
|
192
|
+
}
|
193
|
+
|
194
|
+
|
195
|
+
#define XND_UNARY(func, t0, t1) \
|
196
|
+
static int \
|
197
|
+
gm_##func##_0D_##t0##_##t1(xnd_t stack[], ndt_context_t *ctx) \
|
198
|
+
{ \
|
199
|
+
const xnd_t *in0 = &stack[0]; \
|
200
|
+
xnd_t *out = &stack[1]; \
|
201
|
+
(void)ctx; \
|
202
|
+
\
|
203
|
+
const t0##_t x = *(const t0##_t *)in0->ptr; \
|
204
|
+
*(t1##_t *)out->ptr = func(x); \
|
205
|
+
\
|
206
|
+
return 0; \
|
207
|
+
} \
|
208
|
+
\
|
209
|
+
static int \
|
210
|
+
gm_fixed_##func##_1D_C_##t0##_##t1(xnd_t stack[], ndt_context_t *ctx) \
|
211
|
+
{ \
|
212
|
+
const t0##_t *in0 = (const t0##_t *)apply_index(&stack[0]); \
|
213
|
+
t1##_t *out = (t1##_t *)apply_index(&stack[1]); \
|
214
|
+
int64_t N = xnd_fixed_shape(&stack[0]); \
|
215
|
+
(void)ctx; \
|
216
|
+
\
|
217
|
+
for (int64_t i = 0; i < N; i++) { \
|
218
|
+
out[i] = func(in0[i]); \
|
219
|
+
} \
|
220
|
+
\
|
221
|
+
return 0; \
|
222
|
+
}
|
223
|
+
|
224
|
+
#define XND_UNARY_INIT(funcname, func, t0, t1) \
|
225
|
+
{ .name = STRINGIZE(funcname), \
|
226
|
+
.sig = "... * " STRINGIZE(t0) " -> ... * " STRINGIZE(t1), \
|
227
|
+
.Opt = gm_fixed_##func##_1D_C_##t0##_##t1, \
|
228
|
+
.C = gm_##func##_0D_##t0##_##t1 }, \
|
229
|
+
\
|
230
|
+
{ .name = STRINGIZE(funcname), \
|
231
|
+
.sig = "var... * " STRINGIZE(t0) " -> var... * " STRINGIZE(t1), \
|
232
|
+
.C = gm_##func##_0D_##t0##_##t1 }
|
233
|
+
|
234
|
+
|
235
|
+
/*****************************************************************************/
|
236
|
+
/* Copy */
|
237
|
+
/*****************************************************************************/
|
238
|
+
|
239
|
+
#define copy(x) x
|
240
|
+
XND_UNARY(copy, int8, int8)
|
241
|
+
XND_UNARY(copy, int16, int16)
|
242
|
+
XND_UNARY(copy, int32, int32)
|
243
|
+
XND_UNARY(copy, int64, int64)
|
244
|
+
XND_UNARY(copy, uint8, uint8)
|
245
|
+
XND_UNARY(copy, uint16, uint16)
|
246
|
+
XND_UNARY(copy, uint32, uint32)
|
247
|
+
XND_UNARY(copy, uint64, uint64)
|
248
|
+
XND_UNARY(copy, float32, float32)
|
249
|
+
XND_UNARY(copy, float64, float64)
|
250
|
+
|
251
|
+
|
252
|
+
static const gm_kernel_init_t unary_id[] = {
|
253
|
+
/* COPY */
|
254
|
+
XND_UNARY_INIT(copy, copy, int8, int8),
|
255
|
+
XND_UNARY_INIT(copy, copy, int16, int16),
|
256
|
+
XND_UNARY_INIT(copy, copy, int32, int32),
|
257
|
+
XND_UNARY_INIT(copy, copy, int64, int64),
|
258
|
+
XND_UNARY_INIT(copy, copy, uint8, uint8),
|
259
|
+
XND_UNARY_INIT(copy, copy, uint16, uint16),
|
260
|
+
XND_UNARY_INIT(copy, copy, uint32, uint32),
|
261
|
+
XND_UNARY_INIT(copy, copy, uint64, uint64),
|
262
|
+
XND_UNARY_INIT(copy, copy, float32, float32),
|
263
|
+
XND_UNARY_INIT(copy, copy, float64, float64),
|
264
|
+
|
265
|
+
{ .name = NULL, .sig = NULL }
|
266
|
+
};
|
267
|
+
|
268
|
+
|
269
|
+
/*****************************************************************************/
|
270
|
+
/* Math */
|
271
|
+
/*****************************************************************************/
|
272
|
+
|
273
|
+
#define XND_ALL_UNARY_FLOAT(name) \
|
274
|
+
XND_UNARY(name##f, int8, float32) \
|
275
|
+
XND_UNARY(name##f, int16, float32) \
|
276
|
+
XND_UNARY(name##f, uint8, float32) \
|
277
|
+
XND_UNARY(name##f, uint16, float32) \
|
278
|
+
XND_UNARY(name##f, float32, float32) \
|
279
|
+
XND_UNARY(name, int32, float64) \
|
280
|
+
XND_UNARY(name, uint32, float64) \
|
281
|
+
XND_UNARY(name, float64, float64)
|
282
|
+
|
283
|
+
#define XND_ALL_UNARY_FLOAT_INIT(name) \
|
284
|
+
XND_UNARY_INIT(name, name##f, int8, float32), \
|
285
|
+
XND_UNARY_INIT(name, name##f, int16, float32), \
|
286
|
+
XND_UNARY_INIT(name, name##f, uint8, float32), \
|
287
|
+
XND_UNARY_INIT(name, name##f, uint16, float32), \
|
288
|
+
XND_UNARY_INIT(name, name##f, float32, float32), \
|
289
|
+
XND_UNARY_INIT(name, name, uint32, float64), \
|
290
|
+
XND_UNARY_INIT(name, name, int32, float64), \
|
291
|
+
XND_UNARY_INIT(name, name, float64, float64)
|
292
|
+
|
293
|
+
|
294
|
+
/*****************************************************************************/
|
295
|
+
/* Abs functions */
|
296
|
+
/*****************************************************************************/
|
297
|
+
|
298
|
+
XND_ALL_UNARY_FLOAT(fabs)
|
299
|
+
|
300
|
+
|
301
|
+
/*****************************************************************************/
|
302
|
+
/* Exponential functions */
|
303
|
+
/*****************************************************************************/
|
304
|
+
|
305
|
+
XND_ALL_UNARY_FLOAT(exp)
|
306
|
+
XND_ALL_UNARY_FLOAT(exp2)
|
307
|
+
XND_ALL_UNARY_FLOAT(expm1)
|
308
|
+
|
309
|
+
|
310
|
+
/*****************************************************************************/
|
311
|
+
/* Logarithm functions */
|
312
|
+
/*****************************************************************************/
|
313
|
+
|
314
|
+
XND_ALL_UNARY_FLOAT(log)
|
315
|
+
XND_ALL_UNARY_FLOAT(log2)
|
316
|
+
XND_ALL_UNARY_FLOAT(log10)
|
317
|
+
XND_ALL_UNARY_FLOAT(log1p)
|
318
|
+
XND_ALL_UNARY_FLOAT(logb)
|
319
|
+
|
320
|
+
|
321
|
+
/*****************************************************************************/
|
322
|
+
/* Power functions */
|
323
|
+
/*****************************************************************************/
|
324
|
+
|
325
|
+
XND_ALL_UNARY_FLOAT(sqrt)
|
326
|
+
XND_ALL_UNARY_FLOAT(cbrt)
|
327
|
+
|
328
|
+
|
329
|
+
/*****************************************************************************/
|
330
|
+
/* Trigonometric functions */
|
331
|
+
/*****************************************************************************/
|
332
|
+
|
333
|
+
XND_ALL_UNARY_FLOAT(sin)
|
334
|
+
XND_ALL_UNARY_FLOAT(cos)
|
335
|
+
XND_ALL_UNARY_FLOAT(tan)
|
336
|
+
XND_ALL_UNARY_FLOAT(asin)
|
337
|
+
XND_ALL_UNARY_FLOAT(acos)
|
338
|
+
XND_ALL_UNARY_FLOAT(atan)
|
339
|
+
|
340
|
+
|
341
|
+
/*****************************************************************************/
|
342
|
+
/* Hyperbolic functions */
|
343
|
+
/*****************************************************************************/
|
344
|
+
|
345
|
+
XND_ALL_UNARY_FLOAT(sinh)
|
346
|
+
XND_ALL_UNARY_FLOAT(cosh)
|
347
|
+
XND_ALL_UNARY_FLOAT(tanh)
|
348
|
+
XND_ALL_UNARY_FLOAT(asinh)
|
349
|
+
XND_ALL_UNARY_FLOAT(acosh)
|
350
|
+
XND_ALL_UNARY_FLOAT(atanh)
|
351
|
+
|
352
|
+
|
353
|
+
/*****************************************************************************/
|
354
|
+
/* Error and gamma functions */
|
355
|
+
/*****************************************************************************/
|
356
|
+
|
357
|
+
XND_ALL_UNARY_FLOAT(erf)
|
358
|
+
XND_ALL_UNARY_FLOAT(erfc)
|
359
|
+
XND_ALL_UNARY_FLOAT(lgamma)
|
360
|
+
XND_ALL_UNARY_FLOAT(tgamma)
|
361
|
+
|
362
|
+
|
363
|
+
/*****************************************************************************/
|
364
|
+
/* Ceiling, floor, trunc */
|
365
|
+
/*****************************************************************************/
|
366
|
+
|
367
|
+
XND_ALL_UNARY_FLOAT(ceil)
|
368
|
+
XND_ALL_UNARY_FLOAT(floor)
|
369
|
+
XND_ALL_UNARY_FLOAT(trunc)
|
370
|
+
XND_ALL_UNARY_FLOAT(round)
|
371
|
+
XND_ALL_UNARY_FLOAT(nearbyint)
|
372
|
+
|
373
|
+
|
374
|
+
static const gm_kernel_init_t unary_float[] = {
|
375
|
+
/* ABS */
|
376
|
+
XND_ALL_UNARY_FLOAT_INIT(fabs),
|
377
|
+
|
378
|
+
/* EXPONENTIAL */
|
379
|
+
XND_ALL_UNARY_FLOAT_INIT(exp),
|
380
|
+
XND_ALL_UNARY_FLOAT_INIT(exp2),
|
381
|
+
XND_ALL_UNARY_FLOAT_INIT(expm1),
|
382
|
+
|
383
|
+
/* LOGARITHM */
|
384
|
+
XND_ALL_UNARY_FLOAT_INIT(log),
|
385
|
+
XND_ALL_UNARY_FLOAT_INIT(log2),
|
386
|
+
XND_ALL_UNARY_FLOAT_INIT(log10),
|
387
|
+
XND_ALL_UNARY_FLOAT_INIT(log1p),
|
388
|
+
XND_ALL_UNARY_FLOAT_INIT(logb),
|
389
|
+
|
390
|
+
/* POWER */
|
391
|
+
XND_ALL_UNARY_FLOAT_INIT(sqrt),
|
392
|
+
XND_ALL_UNARY_FLOAT_INIT(cbrt),
|
393
|
+
|
394
|
+
/* TRIGONOMETRIC */
|
395
|
+
XND_ALL_UNARY_FLOAT_INIT(sin),
|
396
|
+
XND_ALL_UNARY_FLOAT_INIT(cos),
|
397
|
+
XND_ALL_UNARY_FLOAT_INIT(tan),
|
398
|
+
XND_ALL_UNARY_FLOAT_INIT(asin),
|
399
|
+
XND_ALL_UNARY_FLOAT_INIT(acos),
|
400
|
+
XND_ALL_UNARY_FLOAT_INIT(atan),
|
401
|
+
|
402
|
+
/* HYPERBOLIC */
|
403
|
+
XND_ALL_UNARY_FLOAT_INIT(sinh),
|
404
|
+
XND_ALL_UNARY_FLOAT_INIT(cosh),
|
405
|
+
XND_ALL_UNARY_FLOAT_INIT(tanh),
|
406
|
+
XND_ALL_UNARY_FLOAT_INIT(asinh),
|
407
|
+
XND_ALL_UNARY_FLOAT_INIT(acosh),
|
408
|
+
XND_ALL_UNARY_FLOAT_INIT(atanh),
|
409
|
+
|
410
|
+
/* ERROR AND GAMMA */
|
411
|
+
XND_ALL_UNARY_FLOAT_INIT(erf),
|
412
|
+
XND_ALL_UNARY_FLOAT_INIT(erfc),
|
413
|
+
XND_ALL_UNARY_FLOAT_INIT(lgamma),
|
414
|
+
XND_ALL_UNARY_FLOAT_INIT(tgamma),
|
415
|
+
|
416
|
+
/* CEILING, FLOOR, TRUNC */
|
417
|
+
XND_ALL_UNARY_FLOAT_INIT(ceil),
|
418
|
+
XND_ALL_UNARY_FLOAT_INIT(floor),
|
419
|
+
XND_ALL_UNARY_FLOAT_INIT(trunc),
|
420
|
+
XND_ALL_UNARY_FLOAT_INIT(round),
|
421
|
+
XND_ALL_UNARY_FLOAT_INIT(nearbyint),
|
422
|
+
|
423
|
+
{ .name = NULL, .sig = NULL }
|
424
|
+
};
|
425
|
+
|
426
|
+
|
427
|
+
/****************************************************************************/
|
428
|
+
/* Initialize kernel table */
|
429
|
+
/****************************************************************************/
|
430
|
+
|
431
|
+
int
|
432
|
+
gm_init_unary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx)
|
433
|
+
{
|
434
|
+
const gm_kernel_init_t *k;
|
435
|
+
|
436
|
+
for (k = unary_id; k->name != NULL; k++) {
|
437
|
+
if (gm_add_kernel_typecheck(tbl, k, ctx, &unary_id_typecheck) < 0) {
|
438
|
+
return -1;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
|
442
|
+
for (k = unary_float; k->name != NULL; k++) {
|
443
|
+
if (gm_add_kernel_typecheck(tbl, k, ctx, &unary_float_typecheck) < 0) {
|
444
|
+
return -1;
|
445
|
+
}
|
446
|
+
}
|
447
|
+
|
448
|
+
return 0;
|
449
|
+
}
|
@@ -0,0 +1,219 @@
|
|
1
|
+
/*
|
2
|
+
* BSD 3-Clause License
|
3
|
+
*
|
4
|
+
* Copyright (c) 2017-2018, plures
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
*
|
10
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
* this list of conditions and the following disclaimer.
|
12
|
+
*
|
13
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
* this list of conditions and the following disclaimer in the documentation
|
15
|
+
* and/or other materials provided with the distribution.
|
16
|
+
*
|
17
|
+
* 3. Neither the name of the copyright holder nor the names of its
|
18
|
+
* contributors may be used to endorse or promote products derived from
|
19
|
+
* this software without specific prior written permission.
|
20
|
+
*
|
21
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdint.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <inttypes.h>
|
38
|
+
#include "ndtypes.h"
|
39
|
+
#include "xnd.h"
|
40
|
+
#include "gumath.h"
|
41
|
+
|
42
|
+
|
43
|
+
/* Loops and functions for NumPy strided kernels. */
|
44
|
+
|
45
|
+
|
46
|
+
#define ASSIGN_OVERFLOW(array, index, maxindex, value, maxvalue, ctx) \
|
47
|
+
do { \
|
48
|
+
if (index >= maxindex) { \
|
49
|
+
ndt_err_format(ctx, NDT_RuntimeError, "unexpected array overflow"); \
|
50
|
+
return -1; \
|
51
|
+
} \
|
52
|
+
if (value >= maxvalue) { \
|
53
|
+
ndt_err_format(ctx, NDT_RuntimeError, "unexpected intptr overflow"); \
|
54
|
+
return -1; \
|
55
|
+
} \
|
56
|
+
array[index++] = (intptr_t)value; \
|
57
|
+
} while (0)
|
58
|
+
|
59
|
+
|
60
|
+
typedef struct {
|
61
|
+
int ndim;
|
62
|
+
int64_t itemsize;
|
63
|
+
int64_t nelem;
|
64
|
+
int64_t shape[NDT_MAX_DIM];
|
65
|
+
int64_t strides[NDT_MAX_DIM];
|
66
|
+
char *ptr;
|
67
|
+
} gm_ndarray_t;
|
68
|
+
|
69
|
+
|
70
|
+
static int
|
71
|
+
gm_as_ndarray(gm_ndarray_t *a, const xnd_t *x, ndt_context_t *ctx)
|
72
|
+
{
|
73
|
+
const ndt_t *t = x->type;
|
74
|
+
int i;
|
75
|
+
|
76
|
+
assert(t->ndim <= NDT_MAX_DIM);
|
77
|
+
|
78
|
+
if (ndt_is_abstract(t)) {
|
79
|
+
ndt_err_format(ctx, NDT_TypeError, "type is not an ndarray");
|
80
|
+
return -1;
|
81
|
+
}
|
82
|
+
|
83
|
+
if (!ndt_is_ndarray(t)) {
|
84
|
+
ndt_err_format(ctx, NDT_TypeError, "type is not an ndarray");
|
85
|
+
return -1;
|
86
|
+
}
|
87
|
+
|
88
|
+
if (t->ndim == 0) {
|
89
|
+
a->ndim = 1;
|
90
|
+
a->itemsize = t->datasize;
|
91
|
+
a->nelem = 1;
|
92
|
+
a->shape[0] = 1;
|
93
|
+
a->strides[0] = 0;
|
94
|
+
a->ptr = x->ptr + x->index * t->datasize;
|
95
|
+
return 0;
|
96
|
+
}
|
97
|
+
|
98
|
+
a->ndim = t->ndim;
|
99
|
+
a->itemsize = t->Concrete.FixedDim.itemsize;
|
100
|
+
a->nelem = t->datasize / t->Concrete.FixedDim.itemsize;
|
101
|
+
a->ptr = x->ptr + x->index * a->itemsize;
|
102
|
+
|
103
|
+
for (i=0; t->ndim > 0; i++, t=t->FixedDim.type) {
|
104
|
+
a->shape[i] = t->FixedDim.shape;
|
105
|
+
a->strides[i] = t->Concrete.FixedDim.step * a->itemsize;
|
106
|
+
}
|
107
|
+
|
108
|
+
return 0;
|
109
|
+
}
|
110
|
+
|
111
|
+
/*
|
112
|
+
* Convert an xnd container into the {args, dimensions, strides} representation.
|
113
|
+
*/
|
114
|
+
int
|
115
|
+
gm_np_convert_xnd(char **args, const int nargs,
|
116
|
+
intptr_t *dimensions, const int dims_size,
|
117
|
+
intptr_t *steps, const int steps_size,
|
118
|
+
xnd_t stack[], const int outer_dims,
|
119
|
+
ndt_context_t *ctx)
|
120
|
+
{
|
121
|
+
ALLOCA(gm_ndarray_t, nd, nargs);
|
122
|
+
int64_t shape;
|
123
|
+
int n = 0, m = 0;
|
124
|
+
int i, k;
|
125
|
+
|
126
|
+
if (nargs == 0) {
|
127
|
+
return 0;
|
128
|
+
}
|
129
|
+
|
130
|
+
for (i = 0; i < nargs; i++) {
|
131
|
+
if (gm_as_ndarray(&nd[i], &stack[i], ctx) < 0) {
|
132
|
+
return -1;
|
133
|
+
}
|
134
|
+
args[i] = nd[i].ptr;
|
135
|
+
}
|
136
|
+
|
137
|
+
for (i = 0; i < outer_dims; i++) {
|
138
|
+
shape = nd[0].shape[i];
|
139
|
+
ASSIGN_OVERFLOW(dimensions, n, dims_size, shape, INTPTR_MAX, ctx);
|
140
|
+
|
141
|
+
for (k = 0; k < nargs; k++) {
|
142
|
+
if (nd[k].shape[i] != shape) {
|
143
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
144
|
+
"unexpected shape mismatch in outer dimensions");
|
145
|
+
return -1;
|
146
|
+
}
|
147
|
+
|
148
|
+
ASSIGN_OVERFLOW(steps, m, steps_size, nd[k].strides[i], INTPTR_MAX, ctx);
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
for (i = 0; i < nargs; i++) {
|
153
|
+
for (k = outer_dims; k < nd[i].ndim; k++) {
|
154
|
+
ASSIGN_OVERFLOW(dimensions, n, dims_size, nd[i].shape[k], INTPTR_MAX, ctx);
|
155
|
+
ASSIGN_OVERFLOW(steps, m, steps_size, nd[i].strides[k], INTPTR_MAX, ctx);
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
return 0;
|
160
|
+
}
|
161
|
+
|
162
|
+
/*
|
163
|
+
* Flatten an xnd container into a 1D representation for direct elementwise
|
164
|
+
* kernel application. A scalar is expanded into a 1D array of size 1.
|
165
|
+
*/
|
166
|
+
int
|
167
|
+
gm_np_flatten(char **args, const int nargs,
|
168
|
+
int64_t *dimensions,
|
169
|
+
int64_t *steps,
|
170
|
+
const xnd_t stack[],
|
171
|
+
ndt_context_t *ctx)
|
172
|
+
{
|
173
|
+
gm_ndarray_t nd;
|
174
|
+
int i;
|
175
|
+
|
176
|
+
for (i = 0; i < nargs; i++) {
|
177
|
+
if (gm_as_ndarray(&nd, &stack[i], ctx) < 0) {
|
178
|
+
return -1;
|
179
|
+
}
|
180
|
+
args[i] = nd.ptr;
|
181
|
+
dimensions[i] = nd.nelem;
|
182
|
+
steps[i] = nd.itemsize;
|
183
|
+
}
|
184
|
+
|
185
|
+
return 0;
|
186
|
+
}
|
187
|
+
|
188
|
+
int
|
189
|
+
gm_np_map(const gm_strided_kernel_t f,
|
190
|
+
char **args, int nargs,
|
191
|
+
intptr_t *dimensions,
|
192
|
+
intptr_t *steps,
|
193
|
+
void *data,
|
194
|
+
int outer_dims)
|
195
|
+
{
|
196
|
+
ALLOCA(char *, next, nargs);
|
197
|
+
intptr_t shape, i;
|
198
|
+
int ret, k;
|
199
|
+
|
200
|
+
if (outer_dims <= 1) {
|
201
|
+
return f(args, dimensions, steps, data);
|
202
|
+
}
|
203
|
+
|
204
|
+
shape = dimensions[0];
|
205
|
+
|
206
|
+
for (i = 0; i < shape; i++) {
|
207
|
+
for (k = 0; k < nargs; k++) {
|
208
|
+
next[k] = args[k] + i * steps[k];
|
209
|
+
}
|
210
|
+
|
211
|
+
ret = gm_np_map(f, next, nargs, dimensions+1, steps+nargs, data,
|
212
|
+
outer_dims-1);
|
213
|
+
if (ret != 0) {
|
214
|
+
return ret;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
return 0;
|
219
|
+
}
|