xnd 0.2.0dev6 → 0.2.0dev7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +1 -1
- data/ext/ruby_xnd/GPATH +0 -0
- data/ext/ruby_xnd/GRTAGS +0 -0
- data/ext/ruby_xnd/GTAGS +0 -0
- data/ext/ruby_xnd/extconf.rb +8 -5
- data/ext/ruby_xnd/gc_guard.c +53 -2
- data/ext/ruby_xnd/gc_guard.h +8 -2
- data/ext/ruby_xnd/include/overflow.h +147 -0
- data/ext/ruby_xnd/include/ruby_xnd.h +62 -0
- data/ext/ruby_xnd/include/xnd.h +590 -0
- data/ext/ruby_xnd/lib/libxnd.a +0 -0
- data/ext/ruby_xnd/lib/libxnd.so +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/ruby_xnd.c +556 -47
- data/ext/ruby_xnd/ruby_xnd.h +2 -1
- data/ext/ruby_xnd/xnd/Makefile +80 -0
- data/ext/ruby_xnd/xnd/config.h +26 -0
- data/ext/ruby_xnd/xnd/config.h.in +3 -0
- data/ext/ruby_xnd/xnd/config.log +421 -0
- data/ext/ruby_xnd/xnd/config.status +1023 -0
- data/ext/ruby_xnd/xnd/configure +376 -8
- data/ext/ruby_xnd/xnd/configure.ac +48 -7
- data/ext/ruby_xnd/xnd/doc/xnd/index.rst +3 -1
- data/ext/ruby_xnd/xnd/doc/xnd/{types.rst → xnd.rst} +3 -18
- data/ext/ruby_xnd/xnd/libxnd/Makefile +142 -0
- data/ext/ruby_xnd/xnd/libxnd/Makefile.in +43 -3
- data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +19 -3
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +42 -3
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/bounds.c +366 -0
- data/ext/ruby_xnd/xnd/libxnd/bounds.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib.h +98 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib/bfloat16.h +213 -0
- data/ext/ruby_xnd/xnd/libxnd/copy.c +155 -4
- data/ext/ruby_xnd/xnd/libxnd/copy.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.cu +121 -0
- data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.h +58 -0
- data/ext/ruby_xnd/xnd/libxnd/equal.c +195 -7
- data/ext/ruby_xnd/xnd/libxnd/equal.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/inline.h +32 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.a +0 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so +1 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/xnd/libxnd/shape.c +207 -0
- data/ext/ruby_xnd/xnd/libxnd/shape.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/split.c +2 -2
- data/ext/ruby_xnd/xnd/libxnd/split.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/Makefile +39 -0
- data/ext/ruby_xnd/xnd/libxnd/xnd.c +613 -91
- data/ext/ruby_xnd/xnd/libxnd/xnd.h +145 -4
- data/ext/ruby_xnd/xnd/libxnd/xnd.o +0 -0
- data/ext/ruby_xnd/xnd/python/test_xnd.py +1125 -50
- data/ext/ruby_xnd/xnd/python/xnd/__init__.py +609 -124
- data/ext/ruby_xnd/xnd/python/xnd/_version.py +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +1652 -101
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.a +0 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +1 -1
- data/ext/ruby_xnd/xnd/python/xnd/util.h +25 -0
- data/ext/ruby_xnd/xnd/python/xnd/xnd.h +590 -0
- data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +106 -6
- data/ext/ruby_xnd/xnd/python/xnd_support.py +4 -0
- data/ext/ruby_xnd/xnd/setup.py +46 -4
- data/lib/ruby_xnd.so +0 -0
- data/lib/xnd.rb +39 -3
- data/lib/xnd/version.rb +2 -2
- data/xnd.gemspec +2 -1
- metadata +58 -5
Binary file
|
@@ -80,6 +80,23 @@ _var_dim_next(const xnd_t *x, const int64_t start, const int64_t step,
|
|
80
80
|
return next;
|
81
81
|
}
|
82
82
|
|
83
|
+
static inline xnd_t
|
84
|
+
_array_next(const xnd_t *x, const int64_t i)
|
85
|
+
{
|
86
|
+
const ndt_t *t = x->type;
|
87
|
+
const ndt_t *u = t->Array.type;
|
88
|
+
xnd_t next;
|
89
|
+
|
90
|
+
assert(t->tag == Array);
|
91
|
+
|
92
|
+
next.bitmap = xnd_bitmap_empty;
|
93
|
+
next.index = 0;
|
94
|
+
next.type = u;
|
95
|
+
next.ptr = XND_ARRAY_DATA(x->ptr) + i * next.type->datasize;
|
96
|
+
|
97
|
+
return next;
|
98
|
+
}
|
99
|
+
|
83
100
|
static inline xnd_t
|
84
101
|
_tuple_next(const xnd_t *x, const int64_t i)
|
85
102
|
{
|
@@ -108,6 +125,21 @@ _record_next(const xnd_t *x, const int64_t i)
|
|
108
125
|
return next;
|
109
126
|
}
|
110
127
|
|
128
|
+
static inline xnd_t
|
129
|
+
_union_next(const xnd_t *x)
|
130
|
+
{
|
131
|
+
uint8_t i = XND_UNION_TAG(x->ptr);
|
132
|
+
const ndt_t *t = x->type;
|
133
|
+
xnd_t next;
|
134
|
+
|
135
|
+
next.bitmap = xnd_bitmap_empty;
|
136
|
+
next.index = 0;
|
137
|
+
next.type = t->Union.types[i];
|
138
|
+
next.ptr = x->ptr+1;
|
139
|
+
|
140
|
+
return next;
|
141
|
+
}
|
142
|
+
|
111
143
|
static inline xnd_t
|
112
144
|
_ref_next(const xnd_t *x)
|
113
145
|
{
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3
|
@@ -0,0 +1 @@
|
|
1
|
+
ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3
|
Binary file
|
@@ -0,0 +1,207 @@
|
|
1
|
+
/*
|
2
|
+
* BSD 3-Clause License
|
3
|
+
*
|
4
|
+
* Copyright (c) 2017-2018, plures
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
*
|
10
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
* this list of conditions and the following disclaimer.
|
12
|
+
*
|
13
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
* this list of conditions and the following disclaimer in the documentation
|
15
|
+
* and/or other materials provided with the distribution.
|
16
|
+
*
|
17
|
+
* 3. Neither the name of the copyright holder nor the names of its
|
18
|
+
* contributors may be used to endorse or promote products derived from
|
19
|
+
* this software without specific prior written permission.
|
20
|
+
*
|
21
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdint.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <inttypes.h>
|
38
|
+
#include "ndtypes.h"
|
39
|
+
#include "xnd.h"
|
40
|
+
#include "contrib.h"
|
41
|
+
#include "overflow.h"
|
42
|
+
|
43
|
+
|
44
|
+
static bool
|
45
|
+
shape_equal(const ndt_ndarray_t *dest, const ndt_ndarray_t *src)
|
46
|
+
{
|
47
|
+
if (dest->ndim != src->ndim) {
|
48
|
+
return false;
|
49
|
+
}
|
50
|
+
|
51
|
+
for (int i = 0; i < src->ndim; i++) {
|
52
|
+
if (dest->shape[i] != src->shape[i]) {
|
53
|
+
return false;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
return true;
|
58
|
+
}
|
59
|
+
|
60
|
+
static int64_t
|
61
|
+
prod(const int64_t shape[], int N)
|
62
|
+
{
|
63
|
+
bool overflow = false;
|
64
|
+
int64_t p = 1;
|
65
|
+
|
66
|
+
for (int64_t i = 0; i < N; i++) {
|
67
|
+
p = MULi64(p, shape[i], &overflow);
|
68
|
+
if (overflow) {
|
69
|
+
return -1;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
return p;
|
74
|
+
}
|
75
|
+
|
76
|
+
static inline bool
|
77
|
+
zero_in_shape(const ndt_ndarray_t *x)
|
78
|
+
{
|
79
|
+
for (int i = 0; i < x->ndim; i++) {
|
80
|
+
if (x->shape[i] == 0) {
|
81
|
+
return true;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
return false;
|
86
|
+
}
|
87
|
+
|
88
|
+
static void
|
89
|
+
init_contiguous_c_strides(ndt_ndarray_t *dest, const ndt_ndarray_t *src)
|
90
|
+
{
|
91
|
+
int64_t q;
|
92
|
+
int64_t i;
|
93
|
+
|
94
|
+
if (src->ndim == 0 && dest->ndim == 0) {
|
95
|
+
return;
|
96
|
+
}
|
97
|
+
|
98
|
+
q = 1;
|
99
|
+
for (i = dest->ndim-1; i >= 0; i--) {
|
100
|
+
dest->steps[i] = q;
|
101
|
+
q *= dest->shape[i];
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
static void
|
106
|
+
init_contiguous_f_strides(ndt_ndarray_t *dest, const ndt_ndarray_t *src)
|
107
|
+
{
|
108
|
+
int64_t q;
|
109
|
+
int64_t i;
|
110
|
+
|
111
|
+
if (src->ndim == 0 && dest->ndim == 0) {
|
112
|
+
return;
|
113
|
+
}
|
114
|
+
|
115
|
+
q = 1;
|
116
|
+
for (i = 0; i < dest->ndim; i++) {
|
117
|
+
dest->steps[i] = q;
|
118
|
+
q *= dest->shape[i];
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
xnd_t
|
123
|
+
xnd_reshape(const xnd_t *x, int64_t shape[], int ndim, char order,
|
124
|
+
ndt_context_t *ctx)
|
125
|
+
{
|
126
|
+
const ndt_t *t = x->type;
|
127
|
+
ndt_ndarray_t src, dest;
|
128
|
+
int64_t p, q;
|
129
|
+
int ret;
|
130
|
+
int use_fortran = 0;
|
131
|
+
|
132
|
+
if (order == 'F') {
|
133
|
+
use_fortran = 1;
|
134
|
+
}
|
135
|
+
else if (order == 'A') {
|
136
|
+
use_fortran = ndt_is_f_contiguous(t);
|
137
|
+
}
|
138
|
+
else if (order != 'C') {
|
139
|
+
ndt_err_format(ctx, NDT_ValueError, "'order' must be 'C', 'F' or 'A'");
|
140
|
+
return xnd_error;
|
141
|
+
}
|
142
|
+
|
143
|
+
if (ndt_as_ndarray(&src, t, ctx) < 0) {
|
144
|
+
return xnd_error;
|
145
|
+
}
|
146
|
+
|
147
|
+
dest.ndim = ndim;
|
148
|
+
dest.itemsize = src.itemsize;
|
149
|
+
for (int i = 0; i < ndim; i++) {
|
150
|
+
dest.shape[i] = shape[i];
|
151
|
+
dest.steps[i] = 0;
|
152
|
+
dest.strides[i] = 0;
|
153
|
+
}
|
154
|
+
|
155
|
+
p = prod(src.shape, src.ndim);
|
156
|
+
q = prod(dest.shape, dest.ndim);
|
157
|
+
if (p < 0 || q < 0) {
|
158
|
+
ndt_err_format(ctx, NDT_ValueError,
|
159
|
+
"reshaped array has too many elements");
|
160
|
+
return xnd_error;
|
161
|
+
}
|
162
|
+
if (p != q) {
|
163
|
+
ndt_err_format(ctx, NDT_ValueError,
|
164
|
+
"shapes do not have the same number of elements");
|
165
|
+
return xnd_error;
|
166
|
+
}
|
167
|
+
|
168
|
+
if (shape_equal(&dest, &src)) {
|
169
|
+
dest = src;
|
170
|
+
}
|
171
|
+
else if (zero_in_shape(&dest)) {
|
172
|
+
;
|
173
|
+
}
|
174
|
+
else if (!use_fortran && ndt_is_c_contiguous(t)) {
|
175
|
+
init_contiguous_c_strides(&dest, &src);
|
176
|
+
}
|
177
|
+
else if (use_fortran && ndt_is_f_contiguous(t)) {
|
178
|
+
init_contiguous_f_strides(&dest, &src);
|
179
|
+
}
|
180
|
+
else {
|
181
|
+
ret = xnd_nocopy_reshape(dest.shape, dest.steps, dest.ndim,
|
182
|
+
src.shape, src.steps, src.ndim, use_fortran);
|
183
|
+
if (!ret) {
|
184
|
+
ndt_err_format(ctx, NDT_ValueError, "inplace reshape not possible");
|
185
|
+
return xnd_error;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
xnd_t res = *x;
|
190
|
+
|
191
|
+
const ndt_t *u = ndt_copy(ndt_dtype(t), ctx);
|
192
|
+
if (u == NULL) {
|
193
|
+
return xnd_error;
|
194
|
+
}
|
195
|
+
|
196
|
+
for (int i = dest.ndim-1; i >= 0; i--) {
|
197
|
+
const ndt_t *v = ndt_fixed_dim(u, dest.shape[i], dest.steps[i], ctx);
|
198
|
+
ndt_decref(u);
|
199
|
+
if (v == NULL) {
|
200
|
+
return xnd_error;
|
201
|
+
}
|
202
|
+
u = v;
|
203
|
+
}
|
204
|
+
|
205
|
+
res.type = u;
|
206
|
+
return res;
|
207
|
+
}
|
Binary file
|
@@ -56,7 +56,7 @@ static void
|
|
56
56
|
free_slices(xnd_t *lst, int64_t len)
|
57
57
|
{
|
58
58
|
for (int64_t i = 0; i < len; i++) {
|
59
|
-
|
59
|
+
ndt_decref(lst[i].type);
|
60
60
|
}
|
61
61
|
|
62
62
|
ndt_free(lst);
|
@@ -269,7 +269,7 @@ xnd_split(const xnd_t *x, int64_t *nparts, int max_outer, ndt_context_t *ctx)
|
|
269
269
|
}
|
270
270
|
|
271
271
|
for (int64_t i = 0; i < nrows; i++) {
|
272
|
-
result[i] =
|
272
|
+
result[i] = xnd_subscript(x, indices+(i*ncols), nindices[i], ctx);
|
273
273
|
if (ndt_err_occurred(ctx)) {
|
274
274
|
ndt_free(nindices);
|
275
275
|
ndt_free(indices);
|
Binary file
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
SRCDIR = ..
|
3
|
+
|
4
|
+
CC = gcc
|
5
|
+
LIBSTATIC = libxnd.a
|
6
|
+
LIBSHARED = libxnd.so.0.2.0dev3
|
7
|
+
|
8
|
+
INCLUDES = /home/sameer/.rvm/gems/ruby-2.4.1/gems/ndtypes-0.2.0dev6/ext/ruby_ndtypes/include
|
9
|
+
LIBS = ../../ndtypes/libndtypes
|
10
|
+
|
11
|
+
CONFIGURE_CFLAGS = -Wall -Wextra -std=c11 -pedantic -O2 -g
|
12
|
+
XND_CFLAGS = $(strip $(CONFIGURE_CFLAGS) $(CFLAGS))
|
13
|
+
|
14
|
+
|
15
|
+
default: runtest runtest_shared
|
16
|
+
|
17
|
+
|
18
|
+
runtest:\
|
19
|
+
Makefile runtest.c test_fixed.c test.h $(SRCDIR)/xnd.h $(SRCDIR)/$(LIBSTATIC)
|
20
|
+
$(CC) -I$(SRCDIR) -I$(INCLUDES) $(XND_CFLAGS) \
|
21
|
+
-o runtest runtest.c test_fixed.c $(SRCDIR)/libxnd.a \
|
22
|
+
$(LIBS)/libndtypes.a
|
23
|
+
|
24
|
+
runtest_shared:\
|
25
|
+
Makefile runtest.c test_fixed.c test.h $(SRCDIR)/xnd.h $(SRCDIR)/$(LIBSHARED)
|
26
|
+
$(CC) -I$(SRCDIR) -I$(INCLUDES) -L$(SRCDIR) -L$(LIBS) \
|
27
|
+
$(XND_CFLAGS) -o runtest_shared runtest.c test_fixed.c -lxnd -lndtypes
|
28
|
+
|
29
|
+
|
30
|
+
FORCE:
|
31
|
+
|
32
|
+
clean: FORCE
|
33
|
+
rm -f *.o *.gch *.gcda *.gcno *.gcov *.dyn *.dpi *.lock
|
34
|
+
rm -f runtest runtest_shared
|
35
|
+
|
36
|
+
distclean: clean
|
37
|
+
rm -rf Makefile
|
38
|
+
|
39
|
+
|
@@ -35,15 +35,20 @@
|
|
35
35
|
#include <stdint.h>
|
36
36
|
#include <inttypes.h>
|
37
37
|
#include <string.h>
|
38
|
+
#include <math.h>
|
38
39
|
#include <assert.h>
|
39
40
|
#include "ndtypes.h"
|
40
41
|
#include "xnd.h"
|
41
42
|
#include "inline.h"
|
42
43
|
#include "contrib.h"
|
44
|
+
#include "contrib/bfloat16.h"
|
45
|
+
#include "cuda/cuda_memory.h"
|
46
|
+
#ifndef _MSC_VER
|
47
|
+
#include "config.h"
|
48
|
+
#endif
|
43
49
|
|
44
50
|
|
45
51
|
static int xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx);
|
46
|
-
static void xnd_clear(xnd_t * const x, const uint32_t flags);
|
47
52
|
|
48
53
|
|
49
54
|
/*****************************************************************************/
|
@@ -72,32 +77,104 @@ xnd_err_occurred(const xnd_t *x)
|
|
72
77
|
static bool
|
73
78
|
requires_init(const ndt_t * const t)
|
74
79
|
{
|
75
|
-
|
80
|
+
return !ndt_is_ref_free(t);
|
81
|
+
}
|
76
82
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
static bool
|
84
|
+
is_primary_type(const ndt_t * const t, ndt_context_t *ctx)
|
85
|
+
{
|
86
|
+
if (ndt_is_abstract(t)) {
|
87
|
+
ndt_err_format(ctx, NDT_ValueError,
|
88
|
+
"cannot create xnd container from abstract type");
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
|
92
|
+
if (t->flags & NDT_CHAR) {
|
93
|
+
ndt_err_format(ctx, NDT_NotImplementedError, "char is not implemented");
|
86
94
|
return false;
|
95
|
+
}
|
96
|
+
|
97
|
+
switch (t->tag) {
|
98
|
+
case FixedDim: {
|
99
|
+
if (!ndt_is_c_contiguous(t) && !ndt_is_f_contiguous(t)) {
|
100
|
+
ndt_err_format(ctx, NDT_ValueError,
|
101
|
+
"cannot create xnd container from non-contiguous type");
|
102
|
+
return false;
|
103
|
+
}
|
104
|
+
return true;
|
105
|
+
}
|
106
|
+
case VarDim: case VarDimElem: {
|
107
|
+
if (!ndt_is_var_contiguous(t)) {
|
108
|
+
ndt_err_format(ctx, NDT_ValueError,
|
109
|
+
"cannot create xnd container from non-contiguous type");
|
110
|
+
return false;
|
111
|
+
}
|
112
|
+
return true;
|
113
|
+
}
|
114
|
+
case Array: {
|
115
|
+
if (requires_init(t)) {
|
116
|
+
ndt_err_format(ctx, NDT_ValueError,
|
117
|
+
"flexible arrays cannot have dtypes that require "
|
118
|
+
"initialization");
|
119
|
+
return false;
|
120
|
+
}
|
121
|
+
return true;
|
122
|
+
}
|
87
123
|
default:
|
88
124
|
return true;
|
89
125
|
}
|
126
|
+
|
127
|
+
ndt_err_format(ctx, NDT_ValueError,
|
128
|
+
"cannot create xnd container from non-contiguous type");
|
129
|
+
return false;
|
90
130
|
}
|
91
131
|
|
132
|
+
|
92
133
|
/* Create and initialize memory with type 't'. */
|
134
|
+
#ifdef HAVE_CUDA
|
135
|
+
static char *
|
136
|
+
xnd_cuda_new(const ndt_t * const t, ndt_context_t *ctx)
|
137
|
+
{
|
138
|
+
void *ptr;
|
139
|
+
|
140
|
+
if (!is_primary_type(t, ctx)) {
|
141
|
+
return NULL;
|
142
|
+
}
|
143
|
+
|
144
|
+
if (!ndt_is_pointer_free(t)) {
|
145
|
+
ndt_err_format(ctx, NDT_ValueError,
|
146
|
+
"only pointer-free types are supported on cuda");
|
147
|
+
return NULL;
|
148
|
+
}
|
149
|
+
|
150
|
+
ptr = xnd_cuda_calloc_managed(t->align, t->datasize, ctx);
|
151
|
+
if (ptr == NULL) {
|
152
|
+
return NULL;
|
153
|
+
}
|
154
|
+
|
155
|
+
return ptr;
|
156
|
+
}
|
157
|
+
#else
|
158
|
+
static char *
|
159
|
+
xnd_cuda_new(const ndt_t * const t, ndt_context_t *ctx)
|
160
|
+
{
|
161
|
+
(void)t;
|
162
|
+
|
163
|
+
ndt_err_format(ctx, NDT_ValueError, "xnd compiled without cuda support");
|
164
|
+
return NULL;
|
165
|
+
}
|
166
|
+
#endif
|
167
|
+
|
93
168
|
static char *
|
94
169
|
xnd_new(const ndt_t * const t, const uint32_t flags, ndt_context_t *ctx)
|
95
170
|
{
|
96
171
|
xnd_t x;
|
97
172
|
|
98
|
-
if (
|
99
|
-
|
100
|
-
|
173
|
+
if (flags & XND_CUDA_MANAGED) {
|
174
|
+
return xnd_cuda_new(t, ctx);
|
175
|
+
}
|
176
|
+
|
177
|
+
if (!is_primary_type(t, ctx)) {
|
101
178
|
return NULL;
|
102
179
|
}
|
103
180
|
|
@@ -136,6 +213,13 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
136
213
|
{
|
137
214
|
const ndt_t * const t = x->type;
|
138
215
|
|
216
|
+
if (flags & XND_CUDA_MANAGED) {
|
217
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
218
|
+
"internal error: cannot initialize cuda memory with a type "
|
219
|
+
"that contains pointers");
|
220
|
+
return -1;
|
221
|
+
}
|
222
|
+
|
139
223
|
if (ndt_is_abstract(t)) {
|
140
224
|
ndt_err_format(ctx, NDT_ValueError,
|
141
225
|
"cannot initialize concrete memory from abstract type");
|
@@ -199,6 +283,16 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
199
283
|
return 0;
|
200
284
|
}
|
201
285
|
|
286
|
+
case Union: {
|
287
|
+
xnd_t next = _union_next(x);
|
288
|
+
if (xnd_init(&next, flags, ctx) < 0) {
|
289
|
+
xnd_clear(&next, flags);
|
290
|
+
return -1;
|
291
|
+
}
|
292
|
+
|
293
|
+
return 0;
|
294
|
+
}
|
295
|
+
|
202
296
|
/*
|
203
297
|
* Ref represents a pointer to an explicit type. If XND_OWN_POINTERS
|
204
298
|
* is set, allocate memory for that type and set the pointer.
|
@@ -247,10 +341,19 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
247
341
|
return 0;
|
248
342
|
}
|
249
343
|
|
344
|
+
/* Array is already initialized by calloc(). */
|
345
|
+
case Array:
|
346
|
+
return 0;
|
347
|
+
|
250
348
|
/* Categorical is already initialized by calloc(). */
|
251
349
|
case Categorical:
|
252
350
|
return 0;
|
253
351
|
|
352
|
+
case VarDimElem:
|
353
|
+
ndt_err_format(ctx, NDT_ValueError,
|
354
|
+
"cannot initialize var elem dimension");
|
355
|
+
return -1;
|
356
|
+
|
254
357
|
case Char:
|
255
358
|
ndt_err_format(ctx, NDT_NotImplementedError, "char not implemented");
|
256
359
|
return -1;
|
@@ -259,8 +362,8 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
259
362
|
case Bool:
|
260
363
|
case Int8: case Int16: case Int32: case Int64:
|
261
364
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
262
|
-
case Float16: case Float32: case Float64:
|
263
|
-
case Complex32: case Complex64: case Complex128:
|
365
|
+
case BFloat16: case Float16: case Float32: case Float64:
|
366
|
+
case BComplex32: case Complex32: case Complex64: case Complex128:
|
264
367
|
case FixedString: case FixedBytes:
|
265
368
|
case String: case Bytes:
|
266
369
|
return 0;
|
@@ -288,7 +391,7 @@ xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx)
|
|
288
391
|
{
|
289
392
|
xnd_bitmap_t b = {.data=NULL, .size=0, .next=NULL};
|
290
393
|
xnd_master_t *x;
|
291
|
-
ndt_t *t;
|
394
|
+
const ndt_t *t;
|
292
395
|
char *ptr;
|
293
396
|
|
294
397
|
if (!(flags & XND_OWN_TYPE)) {
|
@@ -310,13 +413,13 @@ xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx)
|
|
310
413
|
|
311
414
|
if (!ndt_is_concrete(t)) {
|
312
415
|
ndt_err_format(ctx, NDT_ValueError, "type must be concrete");
|
313
|
-
|
416
|
+
ndt_decref(t);
|
314
417
|
ndt_free(x);
|
315
418
|
return NULL;
|
316
419
|
}
|
317
420
|
|
318
421
|
if (xnd_bitmap_init(&b, t,ctx) < 0) {
|
319
|
-
|
422
|
+
ndt_decref(t);
|
320
423
|
ndt_free(x);
|
321
424
|
return NULL;
|
322
425
|
}
|
@@ -324,7 +427,7 @@ xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx)
|
|
324
427
|
ptr = xnd_new(t, flags, ctx);
|
325
428
|
if (ptr == NULL) {
|
326
429
|
xnd_bitmap_clear(&b);
|
327
|
-
|
430
|
+
ndt_decref(t);
|
328
431
|
ndt_free(x);
|
329
432
|
return NULL;
|
330
433
|
}
|
@@ -401,10 +504,13 @@ xnd_from_xnd(xnd_t *src, uint32_t flags, ndt_context_t *ctx)
|
|
401
504
|
{
|
402
505
|
xnd_master_t *x;
|
403
506
|
|
507
|
+
/* XXX xnd_from_xnd() will probably be replaced. */
|
508
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
509
|
+
|
404
510
|
x = ndt_alloc(1, sizeof *x);
|
405
511
|
if (x == NULL) {
|
406
512
|
xnd_clear(src, XND_OWN_ALL);
|
407
|
-
|
513
|
+
ndt_decref(src->type);
|
408
514
|
ndt_aligned_free(src->ptr);
|
409
515
|
xnd_bitmap_clear(&src->bitmap);
|
410
516
|
return ndt_memory_error(ctx);
|
@@ -424,6 +530,10 @@ xnd_from_xnd(xnd_t *src, uint32_t flags, ndt_context_t *ctx)
|
|
424
530
|
static bool
|
425
531
|
requires_clear(const ndt_t * const t)
|
426
532
|
{
|
533
|
+
if (t->tag == Array) {
|
534
|
+
return true;
|
535
|
+
}
|
536
|
+
|
427
537
|
const ndt_t *dtype = ndt_dtype(t);
|
428
538
|
|
429
539
|
switch (dtype->tag) {
|
@@ -431,8 +541,8 @@ requires_clear(const ndt_t * const t)
|
|
431
541
|
case Bool:
|
432
542
|
case Int8: case Int16: case Int32: case Int64:
|
433
543
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
434
|
-
case Float16: case Float32: case Float64:
|
435
|
-
case Complex32: case Complex64: case Complex128:
|
544
|
+
case BFloat16: case Float16: case Float32: case Float64:
|
545
|
+
case BComplex32: case Complex32: case Complex64: case Complex128:
|
436
546
|
case FixedString: case FixedBytes:
|
437
547
|
return false;
|
438
548
|
default:
|
@@ -445,6 +555,7 @@ static void
|
|
445
555
|
xnd_clear_ref(xnd_t *x, const uint32_t flags)
|
446
556
|
{
|
447
557
|
assert(x->type->tag == Ref);
|
558
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
448
559
|
|
449
560
|
if (flags & XND_OWN_POINTERS) {
|
450
561
|
ndt_aligned_free(XND_POINTER_DATA(x->ptr));
|
@@ -457,6 +568,7 @@ static void
|
|
457
568
|
xnd_clear_string(xnd_t *x, const uint32_t flags)
|
458
569
|
{
|
459
570
|
assert(x->type->tag == String);
|
571
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
460
572
|
|
461
573
|
if (flags & XND_OWN_STRINGS) {
|
462
574
|
ndt_free(XND_POINTER_DATA(x->ptr));
|
@@ -469,21 +581,38 @@ static void
|
|
469
581
|
xnd_clear_bytes(xnd_t *x, const uint32_t flags)
|
470
582
|
{
|
471
583
|
assert(x->type->tag == Bytes);
|
584
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
472
585
|
|
473
586
|
if (flags & XND_OWN_BYTES) {
|
474
587
|
ndt_aligned_free(XND_BYTES_DATA(x->ptr));
|
588
|
+
XND_BYTES_SIZE(x->ptr) = 0;
|
475
589
|
XND_BYTES_DATA(x->ptr) = NULL;
|
476
590
|
}
|
477
591
|
}
|
478
592
|
|
479
|
-
/*
|
593
|
+
/* Flexible array data must always be allocated by aligned allocators. */
|
480
594
|
static void
|
595
|
+
xnd_clear_array(xnd_t *x, const uint32_t flags)
|
596
|
+
{
|
597
|
+
assert(x->type->tag == Array);
|
598
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
599
|
+
|
600
|
+
if (flags & XND_OWN_ARRAYS) {
|
601
|
+
ndt_aligned_free(XND_ARRAY_DATA(x->ptr));
|
602
|
+
XND_ARRAY_SHAPE(x->ptr) = 0;
|
603
|
+
XND_ARRAY_DATA(x->ptr) = NULL;
|
604
|
+
}
|
605
|
+
}
|
606
|
+
|
607
|
+
/* Clear embedded pointers in the data according to flags. */
|
608
|
+
void
|
481
609
|
xnd_clear(xnd_t * const x, const uint32_t flags)
|
482
610
|
{
|
483
611
|
NDT_STATIC_CONTEXT(ctx);
|
484
612
|
const ndt_t * const t = x->type;
|
485
613
|
|
486
614
|
assert(ndt_is_concrete(t));
|
615
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
487
616
|
|
488
617
|
switch (t->tag) {
|
489
618
|
case FixedDim: {
|
@@ -516,6 +645,23 @@ xnd_clear(xnd_t * const x, const uint32_t flags)
|
|
516
645
|
return;
|
517
646
|
}
|
518
647
|
|
648
|
+
case VarDimElem: {
|
649
|
+
fprintf(stderr, "xnd_clear: internal error: unexpected var elem dimension\n");
|
650
|
+
return;
|
651
|
+
}
|
652
|
+
|
653
|
+
case Array: {
|
654
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
655
|
+
|
656
|
+
for (int64_t i = 0; i < shape; i++) {
|
657
|
+
xnd_t next = _array_next(x, i);
|
658
|
+
xnd_clear(&next, flags);
|
659
|
+
}
|
660
|
+
|
661
|
+
xnd_clear_array(x, flags);
|
662
|
+
return;
|
663
|
+
}
|
664
|
+
|
519
665
|
case Tuple: {
|
520
666
|
for (int64_t i = 0; i < t->Tuple.shape; i++) {
|
521
667
|
xnd_t next = _tuple_next(x, i);
|
@@ -534,6 +680,12 @@ xnd_clear(xnd_t * const x, const uint32_t flags)
|
|
534
680
|
return;
|
535
681
|
}
|
536
682
|
|
683
|
+
case Union: {
|
684
|
+
xnd_t next = _union_next(x);
|
685
|
+
xnd_clear(&next, flags);
|
686
|
+
return;
|
687
|
+
}
|
688
|
+
|
537
689
|
case Ref: {
|
538
690
|
if (flags & XND_OWN_POINTERS) {
|
539
691
|
xnd_t next = _ref_next(x);
|
@@ -559,8 +711,8 @@ xnd_clear(xnd_t * const x, const uint32_t flags)
|
|
559
711
|
case Bool:
|
560
712
|
case Int8: case Int16: case Int32: case Int64:
|
561
713
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
562
|
-
case Float16: case Float32: case Float64:
|
563
|
-
case Complex32: case Complex64: case Complex128:
|
714
|
+
case BFloat16: case Float16: case Float32: case Float64:
|
715
|
+
case BComplex32: case Complex32: case Complex64: case Complex128:
|
564
716
|
case FixedString: case FixedBytes:
|
565
717
|
return;
|
566
718
|
|
@@ -603,11 +755,22 @@ xnd_del_buffer(xnd_t *x, uint32_t flags)
|
|
603
755
|
}
|
604
756
|
|
605
757
|
if (flags & XND_OWN_TYPE) {
|
606
|
-
|
758
|
+
ndt_decref(x->type);
|
607
759
|
}
|
608
760
|
|
609
761
|
if (flags & XND_OWN_DATA) {
|
610
|
-
|
762
|
+
if (flags & XND_CUDA_MANAGED) {
|
763
|
+
#ifdef HAVE_CUDA
|
764
|
+
xnd_cuda_free(x->ptr);
|
765
|
+
#else
|
766
|
+
fprintf(stderr,
|
767
|
+
"xnd_del_buffer: internal error: XND_CUDA_MANAGED set "
|
768
|
+
"without cuda support\n");
|
769
|
+
#endif
|
770
|
+
}
|
771
|
+
else {
|
772
|
+
ndt_aligned_free(x->ptr);
|
773
|
+
}
|
611
774
|
}
|
612
775
|
}
|
613
776
|
|
@@ -632,23 +795,48 @@ xnd_del(xnd_master_t *x)
|
|
632
795
|
|
633
796
|
|
634
797
|
/*****************************************************************************/
|
635
|
-
/*
|
798
|
+
/* Index checks */
|
636
799
|
/*****************************************************************************/
|
637
800
|
|
638
801
|
static int64_t
|
639
802
|
get_index(const xnd_index_t *key, int64_t shape, ndt_context_t *ctx)
|
803
|
+
{
|
804
|
+
switch (key->tag) {
|
805
|
+
case Index:
|
806
|
+
return adjust_index(key->Index, shape, ctx);
|
807
|
+
|
808
|
+
case FieldName:
|
809
|
+
ndt_err_format(ctx, NDT_ValueError,
|
810
|
+
"expected integer index, got field name: '%s'", key->FieldName);
|
811
|
+
return -1;
|
812
|
+
|
813
|
+
case Slice:
|
814
|
+
ndt_err_format(ctx, NDT_ValueError,
|
815
|
+
"expected integer index, got slice");
|
816
|
+
return -1;
|
817
|
+
}
|
818
|
+
|
819
|
+
/* NOT REACHED: tags should be exhaustive */
|
820
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid index tag");
|
821
|
+
return -1;
|
822
|
+
}
|
823
|
+
|
824
|
+
/*
|
825
|
+
* Ragged arrays have multiple shapes in a single dimension that are not known
|
826
|
+
* when a VarDimElem is created. Adjusting the index must be done when the
|
827
|
+
* VarDimElem is accessed and the slices have been applied.
|
828
|
+
*/
|
829
|
+
static int64_t
|
830
|
+
get_index_var_elem(const xnd_index_t *key, ndt_context_t *ctx)
|
640
831
|
{
|
641
832
|
switch (key->tag) {
|
642
833
|
case Index: {
|
643
834
|
int64_t i = key->Index;
|
644
|
-
if (i < 0) {
|
645
|
-
i += shape;
|
646
|
-
}
|
647
835
|
|
648
|
-
if (i <
|
836
|
+
if (i < INT32_MIN || i > INT32_MAX) {
|
649
837
|
ndt_err_format(ctx, NDT_IndexError,
|
650
838
|
"index with value %" PRIi64 " out of bounds", key->Index);
|
651
|
-
return
|
839
|
+
return INT64_MIN;
|
652
840
|
}
|
653
841
|
|
654
842
|
return i;
|
@@ -657,17 +845,17 @@ get_index(const xnd_index_t *key, int64_t shape, ndt_context_t *ctx)
|
|
657
845
|
case FieldName:
|
658
846
|
ndt_err_format(ctx, NDT_ValueError,
|
659
847
|
"expected integer index, got field name: '%s'", key->FieldName);
|
660
|
-
return
|
848
|
+
return INT64_MIN;
|
661
849
|
|
662
850
|
case Slice:
|
663
851
|
ndt_err_format(ctx, NDT_ValueError,
|
664
852
|
"expected integer index, got slice");
|
665
|
-
return
|
853
|
+
return INT64_MIN;
|
666
854
|
}
|
667
855
|
|
668
856
|
/* NOT REACHED: tags should be exhaustive */
|
669
857
|
ndt_err_format(ctx, NDT_RuntimeError, "invalid index tag");
|
670
|
-
return
|
858
|
+
return INT64_MIN;
|
671
859
|
}
|
672
860
|
|
673
861
|
static int64_t
|
@@ -698,6 +886,34 @@ get_index_record(const ndt_t *t, const xnd_index_t *key, ndt_context_t *ctx)
|
|
698
886
|
return -1;
|
699
887
|
}
|
700
888
|
|
889
|
+
static int64_t
|
890
|
+
get_index_union(const ndt_t *t, const xnd_index_t *key, ndt_context_t *ctx)
|
891
|
+
{
|
892
|
+
assert(t->tag == Union);
|
893
|
+
|
894
|
+
switch (key->tag) {
|
895
|
+
case FieldName: {
|
896
|
+
int64_t i;
|
897
|
+
|
898
|
+
for (i = 0; i < t->Union.ntags; i++) {
|
899
|
+
if (strcmp(key->FieldName, t->Union.tags[i]) == 0) {
|
900
|
+
return i;
|
901
|
+
}
|
902
|
+
}
|
903
|
+
|
904
|
+
ndt_err_format(ctx, NDT_ValueError,
|
905
|
+
"invalid field name '%s'", key->FieldName);
|
906
|
+
return -1;
|
907
|
+
}
|
908
|
+
case Index: case Slice:
|
909
|
+
return get_index(key, t->Union.ntags, ctx);
|
910
|
+
}
|
911
|
+
|
912
|
+
/* NOT REACHED: tags should be exhaustive */
|
913
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid index tag");
|
914
|
+
return -1;
|
915
|
+
}
|
916
|
+
|
701
917
|
static void
|
702
918
|
set_index_exception(bool indexable, ndt_context_t *ctx)
|
703
919
|
{
|
@@ -709,11 +925,72 @@ set_index_exception(bool indexable, ndt_context_t *ctx)
|
|
709
925
|
}
|
710
926
|
}
|
711
927
|
|
712
|
-
|
928
|
+
|
929
|
+
/*****************************************************************************/
|
930
|
+
/* Stored indices */
|
931
|
+
/*****************************************************************************/
|
932
|
+
|
933
|
+
bool
|
934
|
+
have_stored_index(const ndt_t *t)
|
935
|
+
{
|
936
|
+
return t->tag == VarDimElem;
|
937
|
+
}
|
938
|
+
|
939
|
+
int64_t
|
940
|
+
get_stored_index(const ndt_t *t)
|
941
|
+
{
|
942
|
+
return t->VarDimElem.index;
|
943
|
+
}
|
944
|
+
|
945
|
+
/* skip stored indices */
|
713
946
|
xnd_t
|
714
|
-
|
947
|
+
apply_stored_index(const xnd_t *x, ndt_context_t *ctx)
|
715
948
|
{
|
716
949
|
const ndt_t * const t = x->type;
|
950
|
+
int64_t start, step, shape;
|
951
|
+
|
952
|
+
if (t->tag != VarDimElem) {
|
953
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
954
|
+
"apply_stored_index: need VarDimElem");
|
955
|
+
return xnd_error;
|
956
|
+
}
|
957
|
+
|
958
|
+
shape = ndt_var_indices(&start, &step, t, x->index, ctx);
|
959
|
+
if (shape < 0) {
|
960
|
+
return xnd_error;
|
961
|
+
}
|
962
|
+
|
963
|
+
const int64_t i = adjust_index(t->VarDimElem.index, shape, ctx);
|
964
|
+
if (i < 0) {
|
965
|
+
return xnd_error;
|
966
|
+
}
|
967
|
+
|
968
|
+
return xnd_var_dim_next(x, start, step, i);
|
969
|
+
}
|
970
|
+
|
971
|
+
xnd_t
|
972
|
+
apply_stored_indices(const xnd_t *x, ndt_context_t *ctx)
|
973
|
+
{
|
974
|
+
xnd_t tl = *x;
|
975
|
+
|
976
|
+
while (tl.type->tag == VarDimElem) {
|
977
|
+
tl = apply_stored_index(&tl, ctx);
|
978
|
+
}
|
979
|
+
|
980
|
+
return tl;
|
981
|
+
}
|
982
|
+
|
983
|
+
|
984
|
+
/*****************************************************************************/
|
985
|
+
/* Subtrees (single elements are a special case) */
|
986
|
+
/*****************************************************************************/
|
987
|
+
|
988
|
+
/* Return a typed subtree of a memory block */
|
989
|
+
static xnd_t
|
990
|
+
_xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t *ctx)
|
991
|
+
{
|
992
|
+
APPLY_STORED_INDICES_XND(x)
|
993
|
+
const ndt_t * const t = x->type;
|
717
994
|
|
718
995
|
assert(ndt_is_concrete(t));
|
719
996
|
|
@@ -731,14 +1008,13 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
731
1008
|
|
732
1009
|
switch (t->tag) {
|
733
1010
|
case FixedDim: {
|
734
|
-
|
735
|
-
|
736
|
-
"fixed dim index out of bounds");
|
1011
|
+
const int64_t k = adjust_index(i, t->FixedDim.shape, ctx);
|
1012
|
+
if (k < 0) {
|
737
1013
|
return xnd_error;
|
738
1014
|
}
|
739
1015
|
|
740
|
-
const xnd_t next = xnd_fixed_dim_next(x,
|
741
|
-
return
|
1016
|
+
const xnd_t next = xnd_fixed_dim_next(x, k);
|
1017
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
742
1018
|
}
|
743
1019
|
|
744
1020
|
case VarDim: {
|
@@ -749,41 +1025,74 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
749
1025
|
return xnd_error;
|
750
1026
|
}
|
751
1027
|
|
752
|
-
|
753
|
-
|
1028
|
+
const int64_t k = adjust_index(i, shape, ctx);
|
1029
|
+
if (k < 0) {
|
754
1030
|
return xnd_error;
|
755
1031
|
}
|
756
1032
|
|
757
|
-
const xnd_t next = xnd_var_dim_next(x, start, step,
|
758
|
-
return
|
1033
|
+
const xnd_t next = xnd_var_dim_next(x, start, step, k);
|
1034
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
759
1035
|
}
|
760
1036
|
|
761
1037
|
case Tuple: {
|
762
|
-
|
763
|
-
|
1038
|
+
const int64_t k = adjust_index(i, t->Tuple.shape, ctx);
|
1039
|
+
if (k < 0) {
|
764
1040
|
return xnd_error;
|
765
1041
|
}
|
766
1042
|
|
767
|
-
const xnd_t next = xnd_tuple_next(x,
|
1043
|
+
const xnd_t next = xnd_tuple_next(x, k, ctx);
|
768
1044
|
if (next.ptr == NULL) {
|
769
1045
|
return xnd_error;
|
770
1046
|
}
|
771
1047
|
|
772
|
-
return
|
1048
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
773
1049
|
}
|
774
1050
|
|
775
1051
|
case Record: {
|
776
|
-
|
777
|
-
|
1052
|
+
const int64_t k = adjust_index(i, t->Record.shape, ctx);
|
1053
|
+
if (k < 0) {
|
778
1054
|
return xnd_error;
|
779
1055
|
}
|
780
1056
|
|
781
|
-
const xnd_t next = xnd_record_next(x,
|
1057
|
+
const xnd_t next = xnd_record_next(x, k, ctx);
|
1058
|
+
if (next.ptr == NULL) {
|
1059
|
+
return xnd_error;
|
1060
|
+
}
|
1061
|
+
|
1062
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
case Union: {
|
1066
|
+
const int64_t k = adjust_index(i, t->Union.ntags, ctx);
|
1067
|
+
if (k < 0) {
|
1068
|
+
return xnd_error;
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
const uint8_t l = XND_UNION_TAG(x->ptr);
|
1072
|
+
if (k != l) {
|
1073
|
+
ndt_err_format(ctx, NDT_ValueError,
|
1074
|
+
"tag mismatch in union addressing: expected '%s', got '%s'",
|
1075
|
+
t->Union.tags[l], t->Union.tags[k]);
|
1076
|
+
return xnd_error;
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
const xnd_t next = xnd_union_next(x, ctx);
|
782
1080
|
if (next.ptr == NULL) {
|
783
1081
|
return xnd_error;
|
784
1082
|
}
|
785
1083
|
|
786
|
-
return
|
1084
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
case Array: {
|
1088
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
1089
|
+
const int64_t k = adjust_index(i, shape, ctx);
|
1090
|
+
if (k < 0) {
|
1091
|
+
return xnd_error;
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
const xnd_t next = xnd_array_next(x, k);
|
1095
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
787
1096
|
}
|
788
1097
|
|
789
1098
|
case Ref: {
|
@@ -792,7 +1101,7 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
792
1101
|
return xnd_error;
|
793
1102
|
}
|
794
1103
|
|
795
|
-
return
|
1104
|
+
return _xnd_subtree_index(&next, indices, len, ctx);
|
796
1105
|
}
|
797
1106
|
|
798
1107
|
case Constr: {
|
@@ -801,16 +1110,16 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
801
1110
|
return xnd_error;
|
802
1111
|
}
|
803
1112
|
|
804
|
-
return
|
1113
|
+
return _xnd_subtree_index(&next, indices, len, ctx);
|
805
1114
|
}
|
806
1115
|
|
807
|
-
|
1116
|
+
case Nominal: {
|
808
1117
|
const xnd_t next = xnd_nominal_next(x, ctx);
|
809
1118
|
if (next.ptr == NULL) {
|
810
1119
|
return xnd_error;
|
811
1120
|
}
|
812
1121
|
|
813
|
-
return
|
1122
|
+
return _xnd_subtree_index(&next, indices, len, ctx);
|
814
1123
|
}
|
815
1124
|
|
816
1125
|
default:
|
@@ -819,6 +1128,17 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
819
1128
|
}
|
820
1129
|
}
|
821
1130
|
|
1131
|
+
xnd_t
|
1132
|
+
xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t *ctx)
|
1133
|
+
{
|
1134
|
+
if (len < 0 || len > NDT_MAX_DIM) {
|
1135
|
+
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
1136
|
+
return xnd_error;
|
1137
|
+
}
|
1138
|
+
|
1139
|
+
return _xnd_subtree_index(x, indices, len, ctx);
|
1140
|
+
}
|
1141
|
+
|
822
1142
|
/*
|
823
1143
|
* Return a zero copy view of an xnd object. If a dtype is indexable,
|
824
1144
|
* descend into the dtype.
|
@@ -827,6 +1147,7 @@ static xnd_t
|
|
827
1147
|
_xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexable,
|
828
1148
|
ndt_context_t *ctx)
|
829
1149
|
{
|
1150
|
+
APPLY_STORED_INDICES_XND(x)
|
830
1151
|
const ndt_t *t = x->type;
|
831
1152
|
const xnd_index_t *key;
|
832
1153
|
|
@@ -846,7 +1167,7 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
846
1167
|
|
847
1168
|
switch (t->tag) {
|
848
1169
|
case FixedDim: {
|
849
|
-
int64_t i = get_index(key, t->FixedDim.shape, ctx);
|
1170
|
+
const int64_t i = get_index(key, t->FixedDim.shape, ctx);
|
850
1171
|
if (i < 0) {
|
851
1172
|
return xnd_error;
|
852
1173
|
}
|
@@ -857,14 +1178,13 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
857
1178
|
|
858
1179
|
case VarDim: {
|
859
1180
|
int64_t start, step, shape;
|
860
|
-
int64_t i;
|
861
1181
|
|
862
1182
|
shape = ndt_var_indices(&start, &step, t, x->index, ctx);
|
863
1183
|
if (shape < 0) {
|
864
1184
|
return xnd_error;
|
865
1185
|
}
|
866
1186
|
|
867
|
-
i = get_index(key, shape, ctx);
|
1187
|
+
const int64_t i = get_index(key, shape, ctx);
|
868
1188
|
if (i < 0) {
|
869
1189
|
return xnd_error;
|
870
1190
|
}
|
@@ -888,7 +1208,7 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
888
1208
|
}
|
889
1209
|
|
890
1210
|
case Record: {
|
891
|
-
int64_t i = get_index_record(t, key, ctx);
|
1211
|
+
const int64_t i = get_index_record(t, key, ctx);
|
892
1212
|
if (i < 0) {
|
893
1213
|
return xnd_error;
|
894
1214
|
}
|
@@ -901,6 +1221,39 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
901
1221
|
return _xnd_subtree(&next, indices+1, len-1, true, ctx);
|
902
1222
|
}
|
903
1223
|
|
1224
|
+
case Union: {
|
1225
|
+
const int64_t i = get_index_union(t, key, ctx);
|
1226
|
+
if (i < 0) {
|
1227
|
+
return xnd_error;
|
1228
|
+
}
|
1229
|
+
|
1230
|
+
const uint8_t k = XND_UNION_TAG(x->ptr);
|
1231
|
+
if (i != k) {
|
1232
|
+
ndt_err_format(ctx, NDT_ValueError,
|
1233
|
+
"tag mismatch in union addressing: expected '%s', got '%s'",
|
1234
|
+
t->Union.tags[k], t->Union.tags[i]);
|
1235
|
+
return xnd_error;
|
1236
|
+
}
|
1237
|
+
|
1238
|
+
const xnd_t next = xnd_union_next(x, ctx);
|
1239
|
+
if (next.ptr == NULL) {
|
1240
|
+
return xnd_error;
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
return _xnd_subtree(&next, indices+1, len-1, true, ctx);
|
1244
|
+
}
|
1245
|
+
|
1246
|
+
case Array: {
|
1247
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
1248
|
+
const int64_t i = get_index(key, shape, ctx);
|
1249
|
+
if (i < 0) {
|
1250
|
+
return xnd_error;
|
1251
|
+
}
|
1252
|
+
|
1253
|
+
const xnd_t next = xnd_array_next(x, i);
|
1254
|
+
return _xnd_subtree(&next, indices+1, len-1, true, ctx);
|
1255
|
+
}
|
1256
|
+
|
904
1257
|
case Ref: {
|
905
1258
|
const xnd_t next = xnd_ref_next(x, ctx);
|
906
1259
|
if (next.ptr == NULL) {
|
@@ -941,13 +1294,18 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
941
1294
|
xnd_t
|
942
1295
|
xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx)
|
943
1296
|
{
|
1297
|
+
if (len < 0 || len > NDT_MAX_DIM) {
|
1298
|
+
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
1299
|
+
return xnd_error;
|
1300
|
+
}
|
1301
|
+
|
944
1302
|
return _xnd_subtree(x, indices, len, false, ctx);
|
945
1303
|
}
|
946
1304
|
|
947
1305
|
static xnd_t xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx);
|
948
1306
|
static xnd_t xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx);
|
949
1307
|
|
950
|
-
xnd_t
|
1308
|
+
static xnd_t
|
951
1309
|
xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx)
|
952
1310
|
{
|
953
1311
|
const ndt_t *t = x->type;
|
@@ -957,18 +1315,14 @@ xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t
|
|
957
1315
|
assert(ndt_is_concrete(t));
|
958
1316
|
assert(x->ptr != NULL);
|
959
1317
|
|
960
|
-
if (len > t
|
1318
|
+
if (len > ndt_logical_ndim(t)) {
|
961
1319
|
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
962
1320
|
return xnd_error;
|
963
1321
|
}
|
964
1322
|
|
965
1323
|
if (len == 0) {
|
966
1324
|
xnd_t next = *x;
|
967
|
-
next.type
|
968
|
-
if (next.type == NULL) {
|
969
|
-
return xnd_error;
|
970
|
-
}
|
971
|
-
|
1325
|
+
ndt_incref(next.type);
|
972
1326
|
return next;
|
973
1327
|
}
|
974
1328
|
|
@@ -997,6 +1351,7 @@ xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t
|
|
997
1351
|
static xnd_t
|
998
1352
|
xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx)
|
999
1353
|
{
|
1354
|
+
xnd_index_t xindices[NDT_MAX_DIM+1];
|
1000
1355
|
const ndt_t *t = x->type;
|
1001
1356
|
const xnd_index_t *key;
|
1002
1357
|
|
@@ -1004,6 +1359,17 @@ xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1004
1359
|
assert(ndt_is_concrete(t));
|
1005
1360
|
assert(x->ptr != NULL);
|
1006
1361
|
|
1362
|
+
/* Hidden element type, insert the stored index. */
|
1363
|
+
if (have_stored_index(t)) {
|
1364
|
+
xindices[0].tag = Index;
|
1365
|
+
xindices[0].Index = get_stored_index(t);
|
1366
|
+
for (int k = 0; k < len; k++) {
|
1367
|
+
xindices[k+1] = indices[k];
|
1368
|
+
}
|
1369
|
+
indices = xindices;
|
1370
|
+
len = len+1;
|
1371
|
+
}
|
1372
|
+
|
1007
1373
|
key = &indices[0];
|
1008
1374
|
assert(key->tag == Index);
|
1009
1375
|
|
@@ -1018,10 +1384,35 @@ xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1018
1384
|
return xnd_multikey(&next, indices+1, len-1, ctx);
|
1019
1385
|
}
|
1020
1386
|
|
1021
|
-
case VarDim: {
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1387
|
+
case VarDim: case VarDimElem: {
|
1388
|
+
const ndt_t *u;
|
1389
|
+
|
1390
|
+
if (ndt_is_optional(t)) {
|
1391
|
+
ndt_err_format(ctx, NDT_NotImplementedError,
|
1392
|
+
"optional dimensions are temporarily disabled");
|
1393
|
+
return xnd_error;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
const int64_t i = get_index_var_elem(key, ctx);
|
1397
|
+
if (i == INT64_MIN) {
|
1398
|
+
return xnd_error;
|
1399
|
+
}
|
1400
|
+
|
1401
|
+
const xnd_t next = xnd_var_dim_next(x, 0, 1, 0);
|
1402
|
+
const xnd_t tail = xnd_multikey(&next, indices+1, len-1, ctx);
|
1403
|
+
if (xnd_err_occurred(&tail)) {
|
1404
|
+
return xnd_error;
|
1405
|
+
}
|
1406
|
+
|
1407
|
+
u = ndt_convert_to_var_elem(t, tail.type, i, ctx);
|
1408
|
+
ndt_decref(tail.type);
|
1409
|
+
if (u == NULL) {
|
1410
|
+
return xnd_error;
|
1411
|
+
}
|
1412
|
+
|
1413
|
+
xnd_t ret = *x;
|
1414
|
+
ret.type = u;
|
1415
|
+
return ret;
|
1025
1416
|
}
|
1026
1417
|
|
1027
1418
|
default:
|
@@ -1059,9 +1450,10 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1059
1450
|
}
|
1060
1451
|
|
1061
1452
|
xnd_t ret = *x;
|
1062
|
-
ret.type = ndt_fixed_dim(
|
1453
|
+
ret.type = ndt_fixed_dim(sliced.type, shape,
|
1063
1454
|
t->Concrete.FixedDim.step * step,
|
1064
1455
|
ctx);
|
1456
|
+
ndt_decref(sliced.type);
|
1065
1457
|
if (ret.type == NULL) {
|
1066
1458
|
return xnd_error;
|
1067
1459
|
}
|
@@ -1093,15 +1485,14 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1093
1485
|
|
1094
1486
|
slices = ndt_var_add_slice(&nslices, t, start, stop, step, ctx);
|
1095
1487
|
if (slices == NULL) {
|
1488
|
+
ndt_decref(next.type);
|
1096
1489
|
return xnd_error;
|
1097
1490
|
}
|
1098
1491
|
|
1099
1492
|
xnd_t ret = *x;
|
1100
|
-
ret.type = ndt_var_dim(
|
1101
|
-
|
1102
|
-
|
1103
|
-
nslices, slices,
|
1104
|
-
ctx);
|
1493
|
+
ret.type = ndt_var_dim(next.type, t->Concrete.VarDim.offsets,
|
1494
|
+
nslices, slices, false, ctx);
|
1495
|
+
ndt_decref(next.type);
|
1105
1496
|
if (ret.type == NULL) {
|
1106
1497
|
return xnd_error;
|
1107
1498
|
}
|
@@ -1111,6 +1502,32 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1111
1502
|
return ret;
|
1112
1503
|
}
|
1113
1504
|
|
1505
|
+
case VarDimElem: {
|
1506
|
+
int64_t i = t->VarDimElem.index;
|
1507
|
+
|
1508
|
+
if (ndt_is_optional(t)) {
|
1509
|
+
ndt_err_format(ctx, NDT_NotImplementedError,
|
1510
|
+
"optional dimensions are temporarily disabled");
|
1511
|
+
return xnd_error;
|
1512
|
+
}
|
1513
|
+
|
1514
|
+
const xnd_t next = xnd_var_dim_next(x, 0, 1, 0);
|
1515
|
+
const xnd_t tail = xnd_multikey(&next, indices, len, ctx);
|
1516
|
+
if (xnd_err_occurred(&tail)) {
|
1517
|
+
return xnd_error;
|
1518
|
+
}
|
1519
|
+
|
1520
|
+
const ndt_t *u = ndt_convert_to_var_elem(t, tail.type, i, ctx);
|
1521
|
+
ndt_decref(tail.type);
|
1522
|
+
if (u == NULL) {
|
1523
|
+
return xnd_error;
|
1524
|
+
}
|
1525
|
+
|
1526
|
+
xnd_t ret = *x;
|
1527
|
+
ret.type = u;
|
1528
|
+
return ret;
|
1529
|
+
}
|
1530
|
+
|
1114
1531
|
case Tuple: {
|
1115
1532
|
ndt_err_format(ctx, NDT_NotImplementedError,
|
1116
1533
|
"slicing tuples is not supported");
|
@@ -1123,42 +1540,108 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1123
1540
|
return xnd_error;
|
1124
1541
|
}
|
1125
1542
|
|
1543
|
+
case Union: {
|
1544
|
+
ndt_err_format(ctx, NDT_NotImplementedError,
|
1545
|
+
"slicing unions is not supported");
|
1546
|
+
return xnd_error;
|
1547
|
+
}
|
1548
|
+
|
1126
1549
|
default:
|
1127
1550
|
ndt_err_format(ctx, NDT_IndexError, "type not sliceable");
|
1128
1551
|
return xnd_error;
|
1129
1552
|
}
|
1130
1553
|
}
|
1131
1554
|
|
1555
|
+
/* Validate indices for mixed indexed/sliced var dimensions. */
|
1556
|
+
static bool
|
1557
|
+
validate_indices(const xnd_t *x, ndt_context_t *ctx)
|
1558
|
+
{
|
1559
|
+
const ndt_t * const t = x->type;
|
1560
|
+
|
1561
|
+
assert(ndt_is_concrete(t));
|
1562
|
+
|
1563
|
+
switch (t->tag) {
|
1564
|
+
case VarDim: {
|
1565
|
+
int64_t start, step, shape;
|
1566
|
+
|
1567
|
+
shape = ndt_var_indices_non_empty(&start, &step, t, x->index, ctx);
|
1568
|
+
if (shape < 0) {
|
1569
|
+
return false;
|
1570
|
+
}
|
1571
|
+
|
1572
|
+
for (int64_t i = 0; i < shape; i++) {
|
1573
|
+
const xnd_t next = xnd_var_dim_next(x, start, step, i);
|
1574
|
+
if (!validate_indices(&next, ctx)) {
|
1575
|
+
return false;
|
1576
|
+
}
|
1577
|
+
}
|
1578
|
+
|
1579
|
+
return true;
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
case VarDimElem: {
|
1583
|
+
int64_t start, step, shape;
|
1584
|
+
|
1585
|
+
shape = ndt_var_indices(&start, &step, t, x->index, ctx);
|
1586
|
+
if (shape < 0) {
|
1587
|
+
return false;
|
1588
|
+
}
|
1589
|
+
|
1590
|
+
const int64_t k = adjust_index(t->VarDimElem.index, shape, ctx);
|
1591
|
+
if (k < 0) {
|
1592
|
+
return false;
|
1593
|
+
}
|
1594
|
+
|
1595
|
+
const xnd_t next = xnd_var_dim_next(x, start, step, k);
|
1596
|
+
return validate_indices(&next, ctx);
|
1597
|
+
}
|
1598
|
+
|
1599
|
+
default:
|
1600
|
+
return true;
|
1601
|
+
}
|
1602
|
+
}
|
1603
|
+
|
1132
1604
|
xnd_t
|
1133
1605
|
xnd_subscript(const xnd_t *x, const xnd_index_t indices[], int len,
|
1134
1606
|
ndt_context_t *ctx)
|
1135
1607
|
{
|
1608
|
+
bool have_index = false;
|
1136
1609
|
bool have_slice = false;
|
1137
1610
|
|
1611
|
+
if (len < 0 || len > NDT_MAX_DIM) {
|
1612
|
+
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
1613
|
+
return xnd_error;
|
1614
|
+
}
|
1615
|
+
|
1138
1616
|
for (int i = 0; i < len; i++) {
|
1617
|
+
if (indices[i].tag == Index) {
|
1618
|
+
have_index = true;
|
1619
|
+
}
|
1139
1620
|
if (indices[i].tag == Slice) {
|
1140
1621
|
have_slice = true;
|
1141
|
-
break;
|
1142
1622
|
}
|
1143
1623
|
}
|
1144
1624
|
|
1145
1625
|
if (have_slice) {
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
const ndt_t *t;
|
1626
|
+
xnd_t res = xnd_multikey(x, indices, len, ctx);
|
1627
|
+
if (xnd_err_occurred(&res)) {
|
1628
|
+
return xnd_error;
|
1629
|
+
}
|
1151
1630
|
|
1152
|
-
if (res
|
1631
|
+
if (have_index && !validate_indices(&res, ctx)) {
|
1632
|
+
ndt_decref(res.type);
|
1153
1633
|
return xnd_error;
|
1154
1634
|
}
|
1155
1635
|
|
1156
|
-
|
1157
|
-
|
1636
|
+
return res;
|
1637
|
+
}
|
1638
|
+
else {
|
1639
|
+
xnd_t res = xnd_subtree(x, indices, len, ctx);
|
1640
|
+
if (res.ptr == NULL) {
|
1158
1641
|
return xnd_error;
|
1159
1642
|
}
|
1160
1643
|
|
1161
|
-
res.type
|
1644
|
+
ndt_incref(res.type);
|
1162
1645
|
return res;
|
1163
1646
|
}
|
1164
1647
|
}
|
@@ -1302,3 +1785,42 @@ xnd_double_is_big_endian(void)
|
|
1302
1785
|
{
|
1303
1786
|
return xnd_double_format==IEEE_BIG_ENDIAN;
|
1304
1787
|
}
|
1788
|
+
|
1789
|
+
static float
|
1790
|
+
bfloat16_to_float(uint16_t b)
|
1791
|
+
{
|
1792
|
+
float f = 0;
|
1793
|
+
uint16_t *p = (uint16_t *)((char *)&f);
|
1794
|
+
|
1795
|
+
if (xnd_float_is_big_endian()) {
|
1796
|
+
p[0] = b;
|
1797
|
+
}
|
1798
|
+
else {
|
1799
|
+
p[1] = b;
|
1800
|
+
}
|
1801
|
+
|
1802
|
+
return f;
|
1803
|
+
}
|
1804
|
+
|
1805
|
+
/*
|
1806
|
+
* Unlike the corresponding Python conversion functions, Tensorflow does
|
1807
|
+
* not raise OverflowError.
|
1808
|
+
*/
|
1809
|
+
void
|
1810
|
+
xnd_bfloat_pack(char *p, double x)
|
1811
|
+
{
|
1812
|
+
float f = (float)x;
|
1813
|
+
uint16_t u16;
|
1814
|
+
|
1815
|
+
u16 = xnd_round_to_bfloat16(f);
|
1816
|
+
PACK_SINGLE(p, u16, uint16_t, 0);
|
1817
|
+
}
|
1818
|
+
|
1819
|
+
double
|
1820
|
+
xnd_bfloat_unpack(char *p)
|
1821
|
+
{
|
1822
|
+
uint16_t u16;
|
1823
|
+
|
1824
|
+
UNPACK_SINGLE(u16, p, uint16_t, 0);
|
1825
|
+
return bfloat16_to_float(u16);
|
1826
|
+
}
|