xnd 0.2.0dev6 → 0.2.0dev7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +1 -1
- data/ext/ruby_xnd/GPATH +0 -0
- data/ext/ruby_xnd/GRTAGS +0 -0
- data/ext/ruby_xnd/GTAGS +0 -0
- data/ext/ruby_xnd/extconf.rb +8 -5
- data/ext/ruby_xnd/gc_guard.c +53 -2
- data/ext/ruby_xnd/gc_guard.h +8 -2
- data/ext/ruby_xnd/include/overflow.h +147 -0
- data/ext/ruby_xnd/include/ruby_xnd.h +62 -0
- data/ext/ruby_xnd/include/xnd.h +590 -0
- data/ext/ruby_xnd/lib/libxnd.a +0 -0
- data/ext/ruby_xnd/lib/libxnd.so +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/ruby_xnd.c +556 -47
- data/ext/ruby_xnd/ruby_xnd.h +2 -1
- data/ext/ruby_xnd/xnd/Makefile +80 -0
- data/ext/ruby_xnd/xnd/config.h +26 -0
- data/ext/ruby_xnd/xnd/config.h.in +3 -0
- data/ext/ruby_xnd/xnd/config.log +421 -0
- data/ext/ruby_xnd/xnd/config.status +1023 -0
- data/ext/ruby_xnd/xnd/configure +376 -8
- data/ext/ruby_xnd/xnd/configure.ac +48 -7
- data/ext/ruby_xnd/xnd/doc/xnd/index.rst +3 -1
- data/ext/ruby_xnd/xnd/doc/xnd/{types.rst → xnd.rst} +3 -18
- data/ext/ruby_xnd/xnd/libxnd/Makefile +142 -0
- data/ext/ruby_xnd/xnd/libxnd/Makefile.in +43 -3
- data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +19 -3
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +42 -3
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/bounds.c +366 -0
- data/ext/ruby_xnd/xnd/libxnd/bounds.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib.h +98 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib/bfloat16.h +213 -0
- data/ext/ruby_xnd/xnd/libxnd/copy.c +155 -4
- data/ext/ruby_xnd/xnd/libxnd/copy.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.cu +121 -0
- data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.h +58 -0
- data/ext/ruby_xnd/xnd/libxnd/equal.c +195 -7
- data/ext/ruby_xnd/xnd/libxnd/equal.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/inline.h +32 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.a +0 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so +1 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/xnd/libxnd/shape.c +207 -0
- data/ext/ruby_xnd/xnd/libxnd/shape.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/split.c +2 -2
- data/ext/ruby_xnd/xnd/libxnd/split.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/Makefile +39 -0
- data/ext/ruby_xnd/xnd/libxnd/xnd.c +613 -91
- data/ext/ruby_xnd/xnd/libxnd/xnd.h +145 -4
- data/ext/ruby_xnd/xnd/libxnd/xnd.o +0 -0
- data/ext/ruby_xnd/xnd/python/test_xnd.py +1125 -50
- data/ext/ruby_xnd/xnd/python/xnd/__init__.py +609 -124
- data/ext/ruby_xnd/xnd/python/xnd/_version.py +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +1652 -101
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.a +0 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +1 -1
- data/ext/ruby_xnd/xnd/python/xnd/util.h +25 -0
- data/ext/ruby_xnd/xnd/python/xnd/xnd.h +590 -0
- data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +106 -6
- data/ext/ruby_xnd/xnd/python/xnd_support.py +4 -0
- data/ext/ruby_xnd/xnd/setup.py +46 -4
- data/lib/ruby_xnd.so +0 -0
- data/lib/xnd.rb +39 -3
- data/lib/xnd/version.rb +2 -2
- data/xnd.gemspec +2 -1
- metadata +58 -5
Binary file
|
@@ -80,6 +80,23 @@ _var_dim_next(const xnd_t *x, const int64_t start, const int64_t step,
|
|
80
80
|
return next;
|
81
81
|
}
|
82
82
|
|
83
|
+
static inline xnd_t
|
84
|
+
_array_next(const xnd_t *x, const int64_t i)
|
85
|
+
{
|
86
|
+
const ndt_t *t = x->type;
|
87
|
+
const ndt_t *u = t->Array.type;
|
88
|
+
xnd_t next;
|
89
|
+
|
90
|
+
assert(t->tag == Array);
|
91
|
+
|
92
|
+
next.bitmap = xnd_bitmap_empty;
|
93
|
+
next.index = 0;
|
94
|
+
next.type = u;
|
95
|
+
next.ptr = XND_ARRAY_DATA(x->ptr) + i * next.type->datasize;
|
96
|
+
|
97
|
+
return next;
|
98
|
+
}
|
99
|
+
|
83
100
|
static inline xnd_t
|
84
101
|
_tuple_next(const xnd_t *x, const int64_t i)
|
85
102
|
{
|
@@ -108,6 +125,21 @@ _record_next(const xnd_t *x, const int64_t i)
|
|
108
125
|
return next;
|
109
126
|
}
|
110
127
|
|
128
|
+
static inline xnd_t
|
129
|
+
_union_next(const xnd_t *x)
|
130
|
+
{
|
131
|
+
uint8_t i = XND_UNION_TAG(x->ptr);
|
132
|
+
const ndt_t *t = x->type;
|
133
|
+
xnd_t next;
|
134
|
+
|
135
|
+
next.bitmap = xnd_bitmap_empty;
|
136
|
+
next.index = 0;
|
137
|
+
next.type = t->Union.types[i];
|
138
|
+
next.ptr = x->ptr+1;
|
139
|
+
|
140
|
+
return next;
|
141
|
+
}
|
142
|
+
|
111
143
|
static inline xnd_t
|
112
144
|
_ref_next(const xnd_t *x)
|
113
145
|
{
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3
|
@@ -0,0 +1 @@
|
|
1
|
+
ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3
|
Binary file
|
@@ -0,0 +1,207 @@
|
|
1
|
+
/*
|
2
|
+
* BSD 3-Clause License
|
3
|
+
*
|
4
|
+
* Copyright (c) 2017-2018, plures
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
*
|
10
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
* this list of conditions and the following disclaimer.
|
12
|
+
*
|
13
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
* this list of conditions and the following disclaimer in the documentation
|
15
|
+
* and/or other materials provided with the distribution.
|
16
|
+
*
|
17
|
+
* 3. Neither the name of the copyright holder nor the names of its
|
18
|
+
* contributors may be used to endorse or promote products derived from
|
19
|
+
* this software without specific prior written permission.
|
20
|
+
*
|
21
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdint.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <inttypes.h>
|
38
|
+
#include "ndtypes.h"
|
39
|
+
#include "xnd.h"
|
40
|
+
#include "contrib.h"
|
41
|
+
#include "overflow.h"
|
42
|
+
|
43
|
+
|
44
|
+
static bool
|
45
|
+
shape_equal(const ndt_ndarray_t *dest, const ndt_ndarray_t *src)
|
46
|
+
{
|
47
|
+
if (dest->ndim != src->ndim) {
|
48
|
+
return false;
|
49
|
+
}
|
50
|
+
|
51
|
+
for (int i = 0; i < src->ndim; i++) {
|
52
|
+
if (dest->shape[i] != src->shape[i]) {
|
53
|
+
return false;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
return true;
|
58
|
+
}
|
59
|
+
|
60
|
+
static int64_t
|
61
|
+
prod(const int64_t shape[], int N)
|
62
|
+
{
|
63
|
+
bool overflow = false;
|
64
|
+
int64_t p = 1;
|
65
|
+
|
66
|
+
for (int64_t i = 0; i < N; i++) {
|
67
|
+
p = MULi64(p, shape[i], &overflow);
|
68
|
+
if (overflow) {
|
69
|
+
return -1;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
return p;
|
74
|
+
}
|
75
|
+
|
76
|
+
static inline bool
|
77
|
+
zero_in_shape(const ndt_ndarray_t *x)
|
78
|
+
{
|
79
|
+
for (int i = 0; i < x->ndim; i++) {
|
80
|
+
if (x->shape[i] == 0) {
|
81
|
+
return true;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
return false;
|
86
|
+
}
|
87
|
+
|
88
|
+
static void
|
89
|
+
init_contiguous_c_strides(ndt_ndarray_t *dest, const ndt_ndarray_t *src)
|
90
|
+
{
|
91
|
+
int64_t q;
|
92
|
+
int64_t i;
|
93
|
+
|
94
|
+
if (src->ndim == 0 && dest->ndim == 0) {
|
95
|
+
return;
|
96
|
+
}
|
97
|
+
|
98
|
+
q = 1;
|
99
|
+
for (i = dest->ndim-1; i >= 0; i--) {
|
100
|
+
dest->steps[i] = q;
|
101
|
+
q *= dest->shape[i];
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
static void
|
106
|
+
init_contiguous_f_strides(ndt_ndarray_t *dest, const ndt_ndarray_t *src)
|
107
|
+
{
|
108
|
+
int64_t q;
|
109
|
+
int64_t i;
|
110
|
+
|
111
|
+
if (src->ndim == 0 && dest->ndim == 0) {
|
112
|
+
return;
|
113
|
+
}
|
114
|
+
|
115
|
+
q = 1;
|
116
|
+
for (i = 0; i < dest->ndim; i++) {
|
117
|
+
dest->steps[i] = q;
|
118
|
+
q *= dest->shape[i];
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
xnd_t
|
123
|
+
xnd_reshape(const xnd_t *x, int64_t shape[], int ndim, char order,
|
124
|
+
ndt_context_t *ctx)
|
125
|
+
{
|
126
|
+
const ndt_t *t = x->type;
|
127
|
+
ndt_ndarray_t src, dest;
|
128
|
+
int64_t p, q;
|
129
|
+
int ret;
|
130
|
+
int use_fortran = 0;
|
131
|
+
|
132
|
+
if (order == 'F') {
|
133
|
+
use_fortran = 1;
|
134
|
+
}
|
135
|
+
else if (order == 'A') {
|
136
|
+
use_fortran = ndt_is_f_contiguous(t);
|
137
|
+
}
|
138
|
+
else if (order != 'C') {
|
139
|
+
ndt_err_format(ctx, NDT_ValueError, "'order' must be 'C', 'F' or 'A'");
|
140
|
+
return xnd_error;
|
141
|
+
}
|
142
|
+
|
143
|
+
if (ndt_as_ndarray(&src, t, ctx) < 0) {
|
144
|
+
return xnd_error;
|
145
|
+
}
|
146
|
+
|
147
|
+
dest.ndim = ndim;
|
148
|
+
dest.itemsize = src.itemsize;
|
149
|
+
for (int i = 0; i < ndim; i++) {
|
150
|
+
dest.shape[i] = shape[i];
|
151
|
+
dest.steps[i] = 0;
|
152
|
+
dest.strides[i] = 0;
|
153
|
+
}
|
154
|
+
|
155
|
+
p = prod(src.shape, src.ndim);
|
156
|
+
q = prod(dest.shape, dest.ndim);
|
157
|
+
if (p < 0 || q < 0) {
|
158
|
+
ndt_err_format(ctx, NDT_ValueError,
|
159
|
+
"reshaped array has too many elements");
|
160
|
+
return xnd_error;
|
161
|
+
}
|
162
|
+
if (p != q) {
|
163
|
+
ndt_err_format(ctx, NDT_ValueError,
|
164
|
+
"shapes do not have the same number of elements");
|
165
|
+
return xnd_error;
|
166
|
+
}
|
167
|
+
|
168
|
+
if (shape_equal(&dest, &src)) {
|
169
|
+
dest = src;
|
170
|
+
}
|
171
|
+
else if (zero_in_shape(&dest)) {
|
172
|
+
;
|
173
|
+
}
|
174
|
+
else if (!use_fortran && ndt_is_c_contiguous(t)) {
|
175
|
+
init_contiguous_c_strides(&dest, &src);
|
176
|
+
}
|
177
|
+
else if (use_fortran && ndt_is_f_contiguous(t)) {
|
178
|
+
init_contiguous_f_strides(&dest, &src);
|
179
|
+
}
|
180
|
+
else {
|
181
|
+
ret = xnd_nocopy_reshape(dest.shape, dest.steps, dest.ndim,
|
182
|
+
src.shape, src.steps, src.ndim, use_fortran);
|
183
|
+
if (!ret) {
|
184
|
+
ndt_err_format(ctx, NDT_ValueError, "inplace reshape not possible");
|
185
|
+
return xnd_error;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
xnd_t res = *x;
|
190
|
+
|
191
|
+
const ndt_t *u = ndt_copy(ndt_dtype(t), ctx);
|
192
|
+
if (u == NULL) {
|
193
|
+
return xnd_error;
|
194
|
+
}
|
195
|
+
|
196
|
+
for (int i = dest.ndim-1; i >= 0; i--) {
|
197
|
+
const ndt_t *v = ndt_fixed_dim(u, dest.shape[i], dest.steps[i], ctx);
|
198
|
+
ndt_decref(u);
|
199
|
+
if (v == NULL) {
|
200
|
+
return xnd_error;
|
201
|
+
}
|
202
|
+
u = v;
|
203
|
+
}
|
204
|
+
|
205
|
+
res.type = u;
|
206
|
+
return res;
|
207
|
+
}
|
Binary file
|
@@ -56,7 +56,7 @@ static void
|
|
56
56
|
free_slices(xnd_t *lst, int64_t len)
|
57
57
|
{
|
58
58
|
for (int64_t i = 0; i < len; i++) {
|
59
|
-
|
59
|
+
ndt_decref(lst[i].type);
|
60
60
|
}
|
61
61
|
|
62
62
|
ndt_free(lst);
|
@@ -269,7 +269,7 @@ xnd_split(const xnd_t *x, int64_t *nparts, int max_outer, ndt_context_t *ctx)
|
|
269
269
|
}
|
270
270
|
|
271
271
|
for (int64_t i = 0; i < nrows; i++) {
|
272
|
-
result[i] =
|
272
|
+
result[i] = xnd_subscript(x, indices+(i*ncols), nindices[i], ctx);
|
273
273
|
if (ndt_err_occurred(ctx)) {
|
274
274
|
ndt_free(nindices);
|
275
275
|
ndt_free(indices);
|
Binary file
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
SRCDIR = ..
|
3
|
+
|
4
|
+
CC = gcc
|
5
|
+
LIBSTATIC = libxnd.a
|
6
|
+
LIBSHARED = libxnd.so.0.2.0dev3
|
7
|
+
|
8
|
+
INCLUDES = /home/sameer/.rvm/gems/ruby-2.4.1/gems/ndtypes-0.2.0dev6/ext/ruby_ndtypes/include
|
9
|
+
LIBS = ../../ndtypes/libndtypes
|
10
|
+
|
11
|
+
CONFIGURE_CFLAGS = -Wall -Wextra -std=c11 -pedantic -O2 -g
|
12
|
+
XND_CFLAGS = $(strip $(CONFIGURE_CFLAGS) $(CFLAGS))
|
13
|
+
|
14
|
+
|
15
|
+
default: runtest runtest_shared
|
16
|
+
|
17
|
+
|
18
|
+
runtest:\
|
19
|
+
Makefile runtest.c test_fixed.c test.h $(SRCDIR)/xnd.h $(SRCDIR)/$(LIBSTATIC)
|
20
|
+
$(CC) -I$(SRCDIR) -I$(INCLUDES) $(XND_CFLAGS) \
|
21
|
+
-o runtest runtest.c test_fixed.c $(SRCDIR)/libxnd.a \
|
22
|
+
$(LIBS)/libndtypes.a
|
23
|
+
|
24
|
+
runtest_shared:\
|
25
|
+
Makefile runtest.c test_fixed.c test.h $(SRCDIR)/xnd.h $(SRCDIR)/$(LIBSHARED)
|
26
|
+
$(CC) -I$(SRCDIR) -I$(INCLUDES) -L$(SRCDIR) -L$(LIBS) \
|
27
|
+
$(XND_CFLAGS) -o runtest_shared runtest.c test_fixed.c -lxnd -lndtypes
|
28
|
+
|
29
|
+
|
30
|
+
FORCE:
|
31
|
+
|
32
|
+
clean: FORCE
|
33
|
+
rm -f *.o *.gch *.gcda *.gcno *.gcov *.dyn *.dpi *.lock
|
34
|
+
rm -f runtest runtest_shared
|
35
|
+
|
36
|
+
distclean: clean
|
37
|
+
rm -rf Makefile
|
38
|
+
|
39
|
+
|
@@ -35,15 +35,20 @@
|
|
35
35
|
#include <stdint.h>
|
36
36
|
#include <inttypes.h>
|
37
37
|
#include <string.h>
|
38
|
+
#include <math.h>
|
38
39
|
#include <assert.h>
|
39
40
|
#include "ndtypes.h"
|
40
41
|
#include "xnd.h"
|
41
42
|
#include "inline.h"
|
42
43
|
#include "contrib.h"
|
44
|
+
#include "contrib/bfloat16.h"
|
45
|
+
#include "cuda/cuda_memory.h"
|
46
|
+
#ifndef _MSC_VER
|
47
|
+
#include "config.h"
|
48
|
+
#endif
|
43
49
|
|
44
50
|
|
45
51
|
static int xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx);
|
46
|
-
static void xnd_clear(xnd_t * const x, const uint32_t flags);
|
47
52
|
|
48
53
|
|
49
54
|
/*****************************************************************************/
|
@@ -72,32 +77,104 @@ xnd_err_occurred(const xnd_t *x)
|
|
72
77
|
static bool
|
73
78
|
requires_init(const ndt_t * const t)
|
74
79
|
{
|
75
|
-
|
80
|
+
return !ndt_is_ref_free(t);
|
81
|
+
}
|
76
82
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
static bool
|
84
|
+
is_primary_type(const ndt_t * const t, ndt_context_t *ctx)
|
85
|
+
{
|
86
|
+
if (ndt_is_abstract(t)) {
|
87
|
+
ndt_err_format(ctx, NDT_ValueError,
|
88
|
+
"cannot create xnd container from abstract type");
|
89
|
+
return false;
|
90
|
+
}
|
91
|
+
|
92
|
+
if (t->flags & NDT_CHAR) {
|
93
|
+
ndt_err_format(ctx, NDT_NotImplementedError, "char is not implemented");
|
86
94
|
return false;
|
95
|
+
}
|
96
|
+
|
97
|
+
switch (t->tag) {
|
98
|
+
case FixedDim: {
|
99
|
+
if (!ndt_is_c_contiguous(t) && !ndt_is_f_contiguous(t)) {
|
100
|
+
ndt_err_format(ctx, NDT_ValueError,
|
101
|
+
"cannot create xnd container from non-contiguous type");
|
102
|
+
return false;
|
103
|
+
}
|
104
|
+
return true;
|
105
|
+
}
|
106
|
+
case VarDim: case VarDimElem: {
|
107
|
+
if (!ndt_is_var_contiguous(t)) {
|
108
|
+
ndt_err_format(ctx, NDT_ValueError,
|
109
|
+
"cannot create xnd container from non-contiguous type");
|
110
|
+
return false;
|
111
|
+
}
|
112
|
+
return true;
|
113
|
+
}
|
114
|
+
case Array: {
|
115
|
+
if (requires_init(t)) {
|
116
|
+
ndt_err_format(ctx, NDT_ValueError,
|
117
|
+
"flexible arrays cannot have dtypes that require "
|
118
|
+
"initialization");
|
119
|
+
return false;
|
120
|
+
}
|
121
|
+
return true;
|
122
|
+
}
|
87
123
|
default:
|
88
124
|
return true;
|
89
125
|
}
|
126
|
+
|
127
|
+
ndt_err_format(ctx, NDT_ValueError,
|
128
|
+
"cannot create xnd container from non-contiguous type");
|
129
|
+
return false;
|
90
130
|
}
|
91
131
|
|
132
|
+
|
92
133
|
/* Create and initialize memory with type 't'. */
|
134
|
+
#ifdef HAVE_CUDA
|
135
|
+
static char *
|
136
|
+
xnd_cuda_new(const ndt_t * const t, ndt_context_t *ctx)
|
137
|
+
{
|
138
|
+
void *ptr;
|
139
|
+
|
140
|
+
if (!is_primary_type(t, ctx)) {
|
141
|
+
return NULL;
|
142
|
+
}
|
143
|
+
|
144
|
+
if (!ndt_is_pointer_free(t)) {
|
145
|
+
ndt_err_format(ctx, NDT_ValueError,
|
146
|
+
"only pointer-free types are supported on cuda");
|
147
|
+
return NULL;
|
148
|
+
}
|
149
|
+
|
150
|
+
ptr = xnd_cuda_calloc_managed(t->align, t->datasize, ctx);
|
151
|
+
if (ptr == NULL) {
|
152
|
+
return NULL;
|
153
|
+
}
|
154
|
+
|
155
|
+
return ptr;
|
156
|
+
}
|
157
|
+
#else
|
158
|
+
static char *
|
159
|
+
xnd_cuda_new(const ndt_t * const t, ndt_context_t *ctx)
|
160
|
+
{
|
161
|
+
(void)t;
|
162
|
+
|
163
|
+
ndt_err_format(ctx, NDT_ValueError, "xnd compiled without cuda support");
|
164
|
+
return NULL;
|
165
|
+
}
|
166
|
+
#endif
|
167
|
+
|
93
168
|
static char *
|
94
169
|
xnd_new(const ndt_t * const t, const uint32_t flags, ndt_context_t *ctx)
|
95
170
|
{
|
96
171
|
xnd_t x;
|
97
172
|
|
98
|
-
if (
|
99
|
-
|
100
|
-
|
173
|
+
if (flags & XND_CUDA_MANAGED) {
|
174
|
+
return xnd_cuda_new(t, ctx);
|
175
|
+
}
|
176
|
+
|
177
|
+
if (!is_primary_type(t, ctx)) {
|
101
178
|
return NULL;
|
102
179
|
}
|
103
180
|
|
@@ -136,6 +213,13 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
136
213
|
{
|
137
214
|
const ndt_t * const t = x->type;
|
138
215
|
|
216
|
+
if (flags & XND_CUDA_MANAGED) {
|
217
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
218
|
+
"internal error: cannot initialize cuda memory with a type "
|
219
|
+
"that contains pointers");
|
220
|
+
return -1;
|
221
|
+
}
|
222
|
+
|
139
223
|
if (ndt_is_abstract(t)) {
|
140
224
|
ndt_err_format(ctx, NDT_ValueError,
|
141
225
|
"cannot initialize concrete memory from abstract type");
|
@@ -199,6 +283,16 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
199
283
|
return 0;
|
200
284
|
}
|
201
285
|
|
286
|
+
case Union: {
|
287
|
+
xnd_t next = _union_next(x);
|
288
|
+
if (xnd_init(&next, flags, ctx) < 0) {
|
289
|
+
xnd_clear(&next, flags);
|
290
|
+
return -1;
|
291
|
+
}
|
292
|
+
|
293
|
+
return 0;
|
294
|
+
}
|
295
|
+
|
202
296
|
/*
|
203
297
|
* Ref represents a pointer to an explicit type. If XND_OWN_POINTERS
|
204
298
|
* is set, allocate memory for that type and set the pointer.
|
@@ -247,10 +341,19 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
247
341
|
return 0;
|
248
342
|
}
|
249
343
|
|
344
|
+
/* Array is already initialized by calloc(). */
|
345
|
+
case Array:
|
346
|
+
return 0;
|
347
|
+
|
250
348
|
/* Categorical is already initialized by calloc(). */
|
251
349
|
case Categorical:
|
252
350
|
return 0;
|
253
351
|
|
352
|
+
case VarDimElem:
|
353
|
+
ndt_err_format(ctx, NDT_ValueError,
|
354
|
+
"cannot initialize var elem dimension");
|
355
|
+
return -1;
|
356
|
+
|
254
357
|
case Char:
|
255
358
|
ndt_err_format(ctx, NDT_NotImplementedError, "char not implemented");
|
256
359
|
return -1;
|
@@ -259,8 +362,8 @@ xnd_init(xnd_t * const x, const uint32_t flags, ndt_context_t *ctx)
|
|
259
362
|
case Bool:
|
260
363
|
case Int8: case Int16: case Int32: case Int64:
|
261
364
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
262
|
-
case Float16: case Float32: case Float64:
|
263
|
-
case Complex32: case Complex64: case Complex128:
|
365
|
+
case BFloat16: case Float16: case Float32: case Float64:
|
366
|
+
case BComplex32: case Complex32: case Complex64: case Complex128:
|
264
367
|
case FixedString: case FixedBytes:
|
265
368
|
case String: case Bytes:
|
266
369
|
return 0;
|
@@ -288,7 +391,7 @@ xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx)
|
|
288
391
|
{
|
289
392
|
xnd_bitmap_t b = {.data=NULL, .size=0, .next=NULL};
|
290
393
|
xnd_master_t *x;
|
291
|
-
ndt_t *t;
|
394
|
+
const ndt_t *t;
|
292
395
|
char *ptr;
|
293
396
|
|
294
397
|
if (!(flags & XND_OWN_TYPE)) {
|
@@ -310,13 +413,13 @@ xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx)
|
|
310
413
|
|
311
414
|
if (!ndt_is_concrete(t)) {
|
312
415
|
ndt_err_format(ctx, NDT_ValueError, "type must be concrete");
|
313
|
-
|
416
|
+
ndt_decref(t);
|
314
417
|
ndt_free(x);
|
315
418
|
return NULL;
|
316
419
|
}
|
317
420
|
|
318
421
|
if (xnd_bitmap_init(&b, t,ctx) < 0) {
|
319
|
-
|
422
|
+
ndt_decref(t);
|
320
423
|
ndt_free(x);
|
321
424
|
return NULL;
|
322
425
|
}
|
@@ -324,7 +427,7 @@ xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx)
|
|
324
427
|
ptr = xnd_new(t, flags, ctx);
|
325
428
|
if (ptr == NULL) {
|
326
429
|
xnd_bitmap_clear(&b);
|
327
|
-
|
430
|
+
ndt_decref(t);
|
328
431
|
ndt_free(x);
|
329
432
|
return NULL;
|
330
433
|
}
|
@@ -401,10 +504,13 @@ xnd_from_xnd(xnd_t *src, uint32_t flags, ndt_context_t *ctx)
|
|
401
504
|
{
|
402
505
|
xnd_master_t *x;
|
403
506
|
|
507
|
+
/* XXX xnd_from_xnd() will probably be replaced. */
|
508
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
509
|
+
|
404
510
|
x = ndt_alloc(1, sizeof *x);
|
405
511
|
if (x == NULL) {
|
406
512
|
xnd_clear(src, XND_OWN_ALL);
|
407
|
-
|
513
|
+
ndt_decref(src->type);
|
408
514
|
ndt_aligned_free(src->ptr);
|
409
515
|
xnd_bitmap_clear(&src->bitmap);
|
410
516
|
return ndt_memory_error(ctx);
|
@@ -424,6 +530,10 @@ xnd_from_xnd(xnd_t *src, uint32_t flags, ndt_context_t *ctx)
|
|
424
530
|
static bool
|
425
531
|
requires_clear(const ndt_t * const t)
|
426
532
|
{
|
533
|
+
if (t->tag == Array) {
|
534
|
+
return true;
|
535
|
+
}
|
536
|
+
|
427
537
|
const ndt_t *dtype = ndt_dtype(t);
|
428
538
|
|
429
539
|
switch (dtype->tag) {
|
@@ -431,8 +541,8 @@ requires_clear(const ndt_t * const t)
|
|
431
541
|
case Bool:
|
432
542
|
case Int8: case Int16: case Int32: case Int64:
|
433
543
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
434
|
-
case Float16: case Float32: case Float64:
|
435
|
-
case Complex32: case Complex64: case Complex128:
|
544
|
+
case BFloat16: case Float16: case Float32: case Float64:
|
545
|
+
case BComplex32: case Complex32: case Complex64: case Complex128:
|
436
546
|
case FixedString: case FixedBytes:
|
437
547
|
return false;
|
438
548
|
default:
|
@@ -445,6 +555,7 @@ static void
|
|
445
555
|
xnd_clear_ref(xnd_t *x, const uint32_t flags)
|
446
556
|
{
|
447
557
|
assert(x->type->tag == Ref);
|
558
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
448
559
|
|
449
560
|
if (flags & XND_OWN_POINTERS) {
|
450
561
|
ndt_aligned_free(XND_POINTER_DATA(x->ptr));
|
@@ -457,6 +568,7 @@ static void
|
|
457
568
|
xnd_clear_string(xnd_t *x, const uint32_t flags)
|
458
569
|
{
|
459
570
|
assert(x->type->tag == String);
|
571
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
460
572
|
|
461
573
|
if (flags & XND_OWN_STRINGS) {
|
462
574
|
ndt_free(XND_POINTER_DATA(x->ptr));
|
@@ -469,21 +581,38 @@ static void
|
|
469
581
|
xnd_clear_bytes(xnd_t *x, const uint32_t flags)
|
470
582
|
{
|
471
583
|
assert(x->type->tag == Bytes);
|
584
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
472
585
|
|
473
586
|
if (flags & XND_OWN_BYTES) {
|
474
587
|
ndt_aligned_free(XND_BYTES_DATA(x->ptr));
|
588
|
+
XND_BYTES_SIZE(x->ptr) = 0;
|
475
589
|
XND_BYTES_DATA(x->ptr) = NULL;
|
476
590
|
}
|
477
591
|
}
|
478
592
|
|
479
|
-
/*
|
593
|
+
/* Flexible array data must always be allocated by aligned allocators. */
|
480
594
|
static void
|
595
|
+
xnd_clear_array(xnd_t *x, const uint32_t flags)
|
596
|
+
{
|
597
|
+
assert(x->type->tag == Array);
|
598
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
599
|
+
|
600
|
+
if (flags & XND_OWN_ARRAYS) {
|
601
|
+
ndt_aligned_free(XND_ARRAY_DATA(x->ptr));
|
602
|
+
XND_ARRAY_SHAPE(x->ptr) = 0;
|
603
|
+
XND_ARRAY_DATA(x->ptr) = NULL;
|
604
|
+
}
|
605
|
+
}
|
606
|
+
|
607
|
+
/* Clear embedded pointers in the data according to flags. */
|
608
|
+
void
|
481
609
|
xnd_clear(xnd_t * const x, const uint32_t flags)
|
482
610
|
{
|
483
611
|
NDT_STATIC_CONTEXT(ctx);
|
484
612
|
const ndt_t * const t = x->type;
|
485
613
|
|
486
614
|
assert(ndt_is_concrete(t));
|
615
|
+
assert(!(flags & XND_CUDA_MANAGED));
|
487
616
|
|
488
617
|
switch (t->tag) {
|
489
618
|
case FixedDim: {
|
@@ -516,6 +645,23 @@ xnd_clear(xnd_t * const x, const uint32_t flags)
|
|
516
645
|
return;
|
517
646
|
}
|
518
647
|
|
648
|
+
case VarDimElem: {
|
649
|
+
fprintf(stderr, "xnd_clear: internal error: unexpected var elem dimension\n");
|
650
|
+
return;
|
651
|
+
}
|
652
|
+
|
653
|
+
case Array: {
|
654
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
655
|
+
|
656
|
+
for (int64_t i = 0; i < shape; i++) {
|
657
|
+
xnd_t next = _array_next(x, i);
|
658
|
+
xnd_clear(&next, flags);
|
659
|
+
}
|
660
|
+
|
661
|
+
xnd_clear_array(x, flags);
|
662
|
+
return;
|
663
|
+
}
|
664
|
+
|
519
665
|
case Tuple: {
|
520
666
|
for (int64_t i = 0; i < t->Tuple.shape; i++) {
|
521
667
|
xnd_t next = _tuple_next(x, i);
|
@@ -534,6 +680,12 @@ xnd_clear(xnd_t * const x, const uint32_t flags)
|
|
534
680
|
return;
|
535
681
|
}
|
536
682
|
|
683
|
+
case Union: {
|
684
|
+
xnd_t next = _union_next(x);
|
685
|
+
xnd_clear(&next, flags);
|
686
|
+
return;
|
687
|
+
}
|
688
|
+
|
537
689
|
case Ref: {
|
538
690
|
if (flags & XND_OWN_POINTERS) {
|
539
691
|
xnd_t next = _ref_next(x);
|
@@ -559,8 +711,8 @@ xnd_clear(xnd_t * const x, const uint32_t flags)
|
|
559
711
|
case Bool:
|
560
712
|
case Int8: case Int16: case Int32: case Int64:
|
561
713
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
562
|
-
case Float16: case Float32: case Float64:
|
563
|
-
case Complex32: case Complex64: case Complex128:
|
714
|
+
case BFloat16: case Float16: case Float32: case Float64:
|
715
|
+
case BComplex32: case Complex32: case Complex64: case Complex128:
|
564
716
|
case FixedString: case FixedBytes:
|
565
717
|
return;
|
566
718
|
|
@@ -603,11 +755,22 @@ xnd_del_buffer(xnd_t *x, uint32_t flags)
|
|
603
755
|
}
|
604
756
|
|
605
757
|
if (flags & XND_OWN_TYPE) {
|
606
|
-
|
758
|
+
ndt_decref(x->type);
|
607
759
|
}
|
608
760
|
|
609
761
|
if (flags & XND_OWN_DATA) {
|
610
|
-
|
762
|
+
if (flags & XND_CUDA_MANAGED) {
|
763
|
+
#ifdef HAVE_CUDA
|
764
|
+
xnd_cuda_free(x->ptr);
|
765
|
+
#else
|
766
|
+
fprintf(stderr,
|
767
|
+
"xnd_del_buffer: internal error: XND_CUDA_MANAGED set "
|
768
|
+
"without cuda support\n");
|
769
|
+
#endif
|
770
|
+
}
|
771
|
+
else {
|
772
|
+
ndt_aligned_free(x->ptr);
|
773
|
+
}
|
611
774
|
}
|
612
775
|
}
|
613
776
|
|
@@ -632,23 +795,48 @@ xnd_del(xnd_master_t *x)
|
|
632
795
|
|
633
796
|
|
634
797
|
/*****************************************************************************/
|
635
|
-
/*
|
798
|
+
/* Index checks */
|
636
799
|
/*****************************************************************************/
|
637
800
|
|
638
801
|
static int64_t
|
639
802
|
get_index(const xnd_index_t *key, int64_t shape, ndt_context_t *ctx)
|
803
|
+
{
|
804
|
+
switch (key->tag) {
|
805
|
+
case Index:
|
806
|
+
return adjust_index(key->Index, shape, ctx);
|
807
|
+
|
808
|
+
case FieldName:
|
809
|
+
ndt_err_format(ctx, NDT_ValueError,
|
810
|
+
"expected integer index, got field name: '%s'", key->FieldName);
|
811
|
+
return -1;
|
812
|
+
|
813
|
+
case Slice:
|
814
|
+
ndt_err_format(ctx, NDT_ValueError,
|
815
|
+
"expected integer index, got slice");
|
816
|
+
return -1;
|
817
|
+
}
|
818
|
+
|
819
|
+
/* NOT REACHED: tags should be exhaustive */
|
820
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid index tag");
|
821
|
+
return -1;
|
822
|
+
}
|
823
|
+
|
824
|
+
/*
|
825
|
+
* Ragged arrays have multiple shapes in a single dimension that are not known
|
826
|
+
* when a VarDimElem is created. Adjusting the index must be done when the
|
827
|
+
* VarDimElem is accessed and the slices have been applied.
|
828
|
+
*/
|
829
|
+
static int64_t
|
830
|
+
get_index_var_elem(const xnd_index_t *key, ndt_context_t *ctx)
|
640
831
|
{
|
641
832
|
switch (key->tag) {
|
642
833
|
case Index: {
|
643
834
|
int64_t i = key->Index;
|
644
|
-
if (i < 0) {
|
645
|
-
i += shape;
|
646
|
-
}
|
647
835
|
|
648
|
-
if (i <
|
836
|
+
if (i < INT32_MIN || i > INT32_MAX) {
|
649
837
|
ndt_err_format(ctx, NDT_IndexError,
|
650
838
|
"index with value %" PRIi64 " out of bounds", key->Index);
|
651
|
-
return
|
839
|
+
return INT64_MIN;
|
652
840
|
}
|
653
841
|
|
654
842
|
return i;
|
@@ -657,17 +845,17 @@ get_index(const xnd_index_t *key, int64_t shape, ndt_context_t *ctx)
|
|
657
845
|
case FieldName:
|
658
846
|
ndt_err_format(ctx, NDT_ValueError,
|
659
847
|
"expected integer index, got field name: '%s'", key->FieldName);
|
660
|
-
return
|
848
|
+
return INT64_MIN;
|
661
849
|
|
662
850
|
case Slice:
|
663
851
|
ndt_err_format(ctx, NDT_ValueError,
|
664
852
|
"expected integer index, got slice");
|
665
|
-
return
|
853
|
+
return INT64_MIN;
|
666
854
|
}
|
667
855
|
|
668
856
|
/* NOT REACHED: tags should be exhaustive */
|
669
857
|
ndt_err_format(ctx, NDT_RuntimeError, "invalid index tag");
|
670
|
-
return
|
858
|
+
return INT64_MIN;
|
671
859
|
}
|
672
860
|
|
673
861
|
static int64_t
|
@@ -698,6 +886,34 @@ get_index_record(const ndt_t *t, const xnd_index_t *key, ndt_context_t *ctx)
|
|
698
886
|
return -1;
|
699
887
|
}
|
700
888
|
|
889
|
+
static int64_t
|
890
|
+
get_index_union(const ndt_t *t, const xnd_index_t *key, ndt_context_t *ctx)
|
891
|
+
{
|
892
|
+
assert(t->tag == Union);
|
893
|
+
|
894
|
+
switch (key->tag) {
|
895
|
+
case FieldName: {
|
896
|
+
int64_t i;
|
897
|
+
|
898
|
+
for (i = 0; i < t->Union.ntags; i++) {
|
899
|
+
if (strcmp(key->FieldName, t->Union.tags[i]) == 0) {
|
900
|
+
return i;
|
901
|
+
}
|
902
|
+
}
|
903
|
+
|
904
|
+
ndt_err_format(ctx, NDT_ValueError,
|
905
|
+
"invalid field name '%s'", key->FieldName);
|
906
|
+
return -1;
|
907
|
+
}
|
908
|
+
case Index: case Slice:
|
909
|
+
return get_index(key, t->Union.ntags, ctx);
|
910
|
+
}
|
911
|
+
|
912
|
+
/* NOT REACHED: tags should be exhaustive */
|
913
|
+
ndt_err_format(ctx, NDT_RuntimeError, "invalid index tag");
|
914
|
+
return -1;
|
915
|
+
}
|
916
|
+
|
701
917
|
static void
|
702
918
|
set_index_exception(bool indexable, ndt_context_t *ctx)
|
703
919
|
{
|
@@ -709,11 +925,72 @@ set_index_exception(bool indexable, ndt_context_t *ctx)
|
|
709
925
|
}
|
710
926
|
}
|
711
927
|
|
712
|
-
|
928
|
+
|
929
|
+
/*****************************************************************************/
|
930
|
+
/* Stored indices */
|
931
|
+
/*****************************************************************************/
|
932
|
+
|
933
|
+
bool
|
934
|
+
have_stored_index(const ndt_t *t)
|
935
|
+
{
|
936
|
+
return t->tag == VarDimElem;
|
937
|
+
}
|
938
|
+
|
939
|
+
int64_t
|
940
|
+
get_stored_index(const ndt_t *t)
|
941
|
+
{
|
942
|
+
return t->VarDimElem.index;
|
943
|
+
}
|
944
|
+
|
945
|
+
/* skip stored indices */
|
713
946
|
xnd_t
|
714
|
-
|
947
|
+
apply_stored_index(const xnd_t *x, ndt_context_t *ctx)
|
715
948
|
{
|
716
949
|
const ndt_t * const t = x->type;
|
950
|
+
int64_t start, step, shape;
|
951
|
+
|
952
|
+
if (t->tag != VarDimElem) {
|
953
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
954
|
+
"apply_stored_index: need VarDimElem");
|
955
|
+
return xnd_error;
|
956
|
+
}
|
957
|
+
|
958
|
+
shape = ndt_var_indices(&start, &step, t, x->index, ctx);
|
959
|
+
if (shape < 0) {
|
960
|
+
return xnd_error;
|
961
|
+
}
|
962
|
+
|
963
|
+
const int64_t i = adjust_index(t->VarDimElem.index, shape, ctx);
|
964
|
+
if (i < 0) {
|
965
|
+
return xnd_error;
|
966
|
+
}
|
967
|
+
|
968
|
+
return xnd_var_dim_next(x, start, step, i);
|
969
|
+
}
|
970
|
+
|
971
|
+
xnd_t
|
972
|
+
apply_stored_indices(const xnd_t *x, ndt_context_t *ctx)
|
973
|
+
{
|
974
|
+
xnd_t tl = *x;
|
975
|
+
|
976
|
+
while (tl.type->tag == VarDimElem) {
|
977
|
+
tl = apply_stored_index(&tl, ctx);
|
978
|
+
}
|
979
|
+
|
980
|
+
return tl;
|
981
|
+
}
|
982
|
+
|
983
|
+
|
984
|
+
/*****************************************************************************/
|
985
|
+
/* Subtrees (single elements are a special case) */
|
986
|
+
/*****************************************************************************/
|
987
|
+
|
988
|
+
/* Return a typed subtree of a memory block */
|
989
|
+
static xnd_t
|
990
|
+
_xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t *ctx)
|
991
|
+
{
|
992
|
+
APPLY_STORED_INDICES_XND(x)
|
993
|
+
const ndt_t * const t = x->type;
|
717
994
|
|
718
995
|
assert(ndt_is_concrete(t));
|
719
996
|
|
@@ -731,14 +1008,13 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
731
1008
|
|
732
1009
|
switch (t->tag) {
|
733
1010
|
case FixedDim: {
|
734
|
-
|
735
|
-
|
736
|
-
"fixed dim index out of bounds");
|
1011
|
+
const int64_t k = adjust_index(i, t->FixedDim.shape, ctx);
|
1012
|
+
if (k < 0) {
|
737
1013
|
return xnd_error;
|
738
1014
|
}
|
739
1015
|
|
740
|
-
const xnd_t next = xnd_fixed_dim_next(x,
|
741
|
-
return
|
1016
|
+
const xnd_t next = xnd_fixed_dim_next(x, k);
|
1017
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
742
1018
|
}
|
743
1019
|
|
744
1020
|
case VarDim: {
|
@@ -749,41 +1025,74 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
749
1025
|
return xnd_error;
|
750
1026
|
}
|
751
1027
|
|
752
|
-
|
753
|
-
|
1028
|
+
const int64_t k = adjust_index(i, shape, ctx);
|
1029
|
+
if (k < 0) {
|
754
1030
|
return xnd_error;
|
755
1031
|
}
|
756
1032
|
|
757
|
-
const xnd_t next = xnd_var_dim_next(x, start, step,
|
758
|
-
return
|
1033
|
+
const xnd_t next = xnd_var_dim_next(x, start, step, k);
|
1034
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
759
1035
|
}
|
760
1036
|
|
761
1037
|
case Tuple: {
|
762
|
-
|
763
|
-
|
1038
|
+
const int64_t k = adjust_index(i, t->Tuple.shape, ctx);
|
1039
|
+
if (k < 0) {
|
764
1040
|
return xnd_error;
|
765
1041
|
}
|
766
1042
|
|
767
|
-
const xnd_t next = xnd_tuple_next(x,
|
1043
|
+
const xnd_t next = xnd_tuple_next(x, k, ctx);
|
768
1044
|
if (next.ptr == NULL) {
|
769
1045
|
return xnd_error;
|
770
1046
|
}
|
771
1047
|
|
772
|
-
return
|
1048
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
773
1049
|
}
|
774
1050
|
|
775
1051
|
case Record: {
|
776
|
-
|
777
|
-
|
1052
|
+
const int64_t k = adjust_index(i, t->Record.shape, ctx);
|
1053
|
+
if (k < 0) {
|
778
1054
|
return xnd_error;
|
779
1055
|
}
|
780
1056
|
|
781
|
-
const xnd_t next = xnd_record_next(x,
|
1057
|
+
const xnd_t next = xnd_record_next(x, k, ctx);
|
1058
|
+
if (next.ptr == NULL) {
|
1059
|
+
return xnd_error;
|
1060
|
+
}
|
1061
|
+
|
1062
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
case Union: {
|
1066
|
+
const int64_t k = adjust_index(i, t->Union.ntags, ctx);
|
1067
|
+
if (k < 0) {
|
1068
|
+
return xnd_error;
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
const uint8_t l = XND_UNION_TAG(x->ptr);
|
1072
|
+
if (k != l) {
|
1073
|
+
ndt_err_format(ctx, NDT_ValueError,
|
1074
|
+
"tag mismatch in union addressing: expected '%s', got '%s'",
|
1075
|
+
t->Union.tags[l], t->Union.tags[k]);
|
1076
|
+
return xnd_error;
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
const xnd_t next = xnd_union_next(x, ctx);
|
782
1080
|
if (next.ptr == NULL) {
|
783
1081
|
return xnd_error;
|
784
1082
|
}
|
785
1083
|
|
786
|
-
return
|
1084
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
case Array: {
|
1088
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
1089
|
+
const int64_t k = adjust_index(i, shape, ctx);
|
1090
|
+
if (k < 0) {
|
1091
|
+
return xnd_error;
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
const xnd_t next = xnd_array_next(x, k);
|
1095
|
+
return _xnd_subtree_index(&next, indices+1, len-1, ctx);
|
787
1096
|
}
|
788
1097
|
|
789
1098
|
case Ref: {
|
@@ -792,7 +1101,7 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
792
1101
|
return xnd_error;
|
793
1102
|
}
|
794
1103
|
|
795
|
-
return
|
1104
|
+
return _xnd_subtree_index(&next, indices, len, ctx);
|
796
1105
|
}
|
797
1106
|
|
798
1107
|
case Constr: {
|
@@ -801,16 +1110,16 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
801
1110
|
return xnd_error;
|
802
1111
|
}
|
803
1112
|
|
804
|
-
return
|
1113
|
+
return _xnd_subtree_index(&next, indices, len, ctx);
|
805
1114
|
}
|
806
1115
|
|
807
|
-
|
1116
|
+
case Nominal: {
|
808
1117
|
const xnd_t next = xnd_nominal_next(x, ctx);
|
809
1118
|
if (next.ptr == NULL) {
|
810
1119
|
return xnd_error;
|
811
1120
|
}
|
812
1121
|
|
813
|
-
return
|
1122
|
+
return _xnd_subtree_index(&next, indices, len, ctx);
|
814
1123
|
}
|
815
1124
|
|
816
1125
|
default:
|
@@ -819,6 +1128,17 @@ xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t
|
|
819
1128
|
}
|
820
1129
|
}
|
821
1130
|
|
1131
|
+
xnd_t
|
1132
|
+
xnd_subtree_index(const xnd_t *x, const int64_t *indices, int len, ndt_context_t *ctx)
|
1133
|
+
{
|
1134
|
+
if (len < 0 || len > NDT_MAX_DIM) {
|
1135
|
+
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
1136
|
+
return xnd_error;
|
1137
|
+
}
|
1138
|
+
|
1139
|
+
return _xnd_subtree_index(x, indices, len, ctx);
|
1140
|
+
}
|
1141
|
+
|
822
1142
|
/*
|
823
1143
|
* Return a zero copy view of an xnd object. If a dtype is indexable,
|
824
1144
|
* descend into the dtype.
|
@@ -827,6 +1147,7 @@ static xnd_t
|
|
827
1147
|
_xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexable,
|
828
1148
|
ndt_context_t *ctx)
|
829
1149
|
{
|
1150
|
+
APPLY_STORED_INDICES_XND(x)
|
830
1151
|
const ndt_t *t = x->type;
|
831
1152
|
const xnd_index_t *key;
|
832
1153
|
|
@@ -846,7 +1167,7 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
846
1167
|
|
847
1168
|
switch (t->tag) {
|
848
1169
|
case FixedDim: {
|
849
|
-
int64_t i = get_index(key, t->FixedDim.shape, ctx);
|
1170
|
+
const int64_t i = get_index(key, t->FixedDim.shape, ctx);
|
850
1171
|
if (i < 0) {
|
851
1172
|
return xnd_error;
|
852
1173
|
}
|
@@ -857,14 +1178,13 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
857
1178
|
|
858
1179
|
case VarDim: {
|
859
1180
|
int64_t start, step, shape;
|
860
|
-
int64_t i;
|
861
1181
|
|
862
1182
|
shape = ndt_var_indices(&start, &step, t, x->index, ctx);
|
863
1183
|
if (shape < 0) {
|
864
1184
|
return xnd_error;
|
865
1185
|
}
|
866
1186
|
|
867
|
-
i = get_index(key, shape, ctx);
|
1187
|
+
const int64_t i = get_index(key, shape, ctx);
|
868
1188
|
if (i < 0) {
|
869
1189
|
return xnd_error;
|
870
1190
|
}
|
@@ -888,7 +1208,7 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
888
1208
|
}
|
889
1209
|
|
890
1210
|
case Record: {
|
891
|
-
int64_t i = get_index_record(t, key, ctx);
|
1211
|
+
const int64_t i = get_index_record(t, key, ctx);
|
892
1212
|
if (i < 0) {
|
893
1213
|
return xnd_error;
|
894
1214
|
}
|
@@ -901,6 +1221,39 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
901
1221
|
return _xnd_subtree(&next, indices+1, len-1, true, ctx);
|
902
1222
|
}
|
903
1223
|
|
1224
|
+
case Union: {
|
1225
|
+
const int64_t i = get_index_union(t, key, ctx);
|
1226
|
+
if (i < 0) {
|
1227
|
+
return xnd_error;
|
1228
|
+
}
|
1229
|
+
|
1230
|
+
const uint8_t k = XND_UNION_TAG(x->ptr);
|
1231
|
+
if (i != k) {
|
1232
|
+
ndt_err_format(ctx, NDT_ValueError,
|
1233
|
+
"tag mismatch in union addressing: expected '%s', got '%s'",
|
1234
|
+
t->Union.tags[k], t->Union.tags[i]);
|
1235
|
+
return xnd_error;
|
1236
|
+
}
|
1237
|
+
|
1238
|
+
const xnd_t next = xnd_union_next(x, ctx);
|
1239
|
+
if (next.ptr == NULL) {
|
1240
|
+
return xnd_error;
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
return _xnd_subtree(&next, indices+1, len-1, true, ctx);
|
1244
|
+
}
|
1245
|
+
|
1246
|
+
case Array: {
|
1247
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
1248
|
+
const int64_t i = get_index(key, shape, ctx);
|
1249
|
+
if (i < 0) {
|
1250
|
+
return xnd_error;
|
1251
|
+
}
|
1252
|
+
|
1253
|
+
const xnd_t next = xnd_array_next(x, i);
|
1254
|
+
return _xnd_subtree(&next, indices+1, len-1, true, ctx);
|
1255
|
+
}
|
1256
|
+
|
904
1257
|
case Ref: {
|
905
1258
|
const xnd_t next = xnd_ref_next(x, ctx);
|
906
1259
|
if (next.ptr == NULL) {
|
@@ -941,13 +1294,18 @@ _xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, bool indexabl
|
|
941
1294
|
xnd_t
|
942
1295
|
xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx)
|
943
1296
|
{
|
1297
|
+
if (len < 0 || len > NDT_MAX_DIM) {
|
1298
|
+
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
1299
|
+
return xnd_error;
|
1300
|
+
}
|
1301
|
+
|
944
1302
|
return _xnd_subtree(x, indices, len, false, ctx);
|
945
1303
|
}
|
946
1304
|
|
947
1305
|
static xnd_t xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx);
|
948
1306
|
static xnd_t xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx);
|
949
1307
|
|
950
|
-
xnd_t
|
1308
|
+
static xnd_t
|
951
1309
|
xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx)
|
952
1310
|
{
|
953
1311
|
const ndt_t *t = x->type;
|
@@ -957,18 +1315,14 @@ xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t
|
|
957
1315
|
assert(ndt_is_concrete(t));
|
958
1316
|
assert(x->ptr != NULL);
|
959
1317
|
|
960
|
-
if (len > t
|
1318
|
+
if (len > ndt_logical_ndim(t)) {
|
961
1319
|
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
962
1320
|
return xnd_error;
|
963
1321
|
}
|
964
1322
|
|
965
1323
|
if (len == 0) {
|
966
1324
|
xnd_t next = *x;
|
967
|
-
next.type
|
968
|
-
if (next.type == NULL) {
|
969
|
-
return xnd_error;
|
970
|
-
}
|
971
|
-
|
1325
|
+
ndt_incref(next.type);
|
972
1326
|
return next;
|
973
1327
|
}
|
974
1328
|
|
@@ -997,6 +1351,7 @@ xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t
|
|
997
1351
|
static xnd_t
|
998
1352
|
xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *ctx)
|
999
1353
|
{
|
1354
|
+
xnd_index_t xindices[NDT_MAX_DIM+1];
|
1000
1355
|
const ndt_t *t = x->type;
|
1001
1356
|
const xnd_index_t *key;
|
1002
1357
|
|
@@ -1004,6 +1359,17 @@ xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1004
1359
|
assert(ndt_is_concrete(t));
|
1005
1360
|
assert(x->ptr != NULL);
|
1006
1361
|
|
1362
|
+
/* Hidden element type, insert the stored index. */
|
1363
|
+
if (have_stored_index(t)) {
|
1364
|
+
xindices[0].tag = Index;
|
1365
|
+
xindices[0].Index = get_stored_index(t);
|
1366
|
+
for (int k = 0; k < len; k++) {
|
1367
|
+
xindices[k+1] = indices[k];
|
1368
|
+
}
|
1369
|
+
indices = xindices;
|
1370
|
+
len = len+1;
|
1371
|
+
}
|
1372
|
+
|
1007
1373
|
key = &indices[0];
|
1008
1374
|
assert(key->tag == Index);
|
1009
1375
|
|
@@ -1018,10 +1384,35 @@ xnd_index(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1018
1384
|
return xnd_multikey(&next, indices+1, len-1, ctx);
|
1019
1385
|
}
|
1020
1386
|
|
1021
|
-
case VarDim: {
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1387
|
+
case VarDim: case VarDimElem: {
|
1388
|
+
const ndt_t *u;
|
1389
|
+
|
1390
|
+
if (ndt_is_optional(t)) {
|
1391
|
+
ndt_err_format(ctx, NDT_NotImplementedError,
|
1392
|
+
"optional dimensions are temporarily disabled");
|
1393
|
+
return xnd_error;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
const int64_t i = get_index_var_elem(key, ctx);
|
1397
|
+
if (i == INT64_MIN) {
|
1398
|
+
return xnd_error;
|
1399
|
+
}
|
1400
|
+
|
1401
|
+
const xnd_t next = xnd_var_dim_next(x, 0, 1, 0);
|
1402
|
+
const xnd_t tail = xnd_multikey(&next, indices+1, len-1, ctx);
|
1403
|
+
if (xnd_err_occurred(&tail)) {
|
1404
|
+
return xnd_error;
|
1405
|
+
}
|
1406
|
+
|
1407
|
+
u = ndt_convert_to_var_elem(t, tail.type, i, ctx);
|
1408
|
+
ndt_decref(tail.type);
|
1409
|
+
if (u == NULL) {
|
1410
|
+
return xnd_error;
|
1411
|
+
}
|
1412
|
+
|
1413
|
+
xnd_t ret = *x;
|
1414
|
+
ret.type = u;
|
1415
|
+
return ret;
|
1025
1416
|
}
|
1026
1417
|
|
1027
1418
|
default:
|
@@ -1059,9 +1450,10 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1059
1450
|
}
|
1060
1451
|
|
1061
1452
|
xnd_t ret = *x;
|
1062
|
-
ret.type = ndt_fixed_dim(
|
1453
|
+
ret.type = ndt_fixed_dim(sliced.type, shape,
|
1063
1454
|
t->Concrete.FixedDim.step * step,
|
1064
1455
|
ctx);
|
1456
|
+
ndt_decref(sliced.type);
|
1065
1457
|
if (ret.type == NULL) {
|
1066
1458
|
return xnd_error;
|
1067
1459
|
}
|
@@ -1093,15 +1485,14 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1093
1485
|
|
1094
1486
|
slices = ndt_var_add_slice(&nslices, t, start, stop, step, ctx);
|
1095
1487
|
if (slices == NULL) {
|
1488
|
+
ndt_decref(next.type);
|
1096
1489
|
return xnd_error;
|
1097
1490
|
}
|
1098
1491
|
|
1099
1492
|
xnd_t ret = *x;
|
1100
|
-
ret.type = ndt_var_dim(
|
1101
|
-
|
1102
|
-
|
1103
|
-
nslices, slices,
|
1104
|
-
ctx);
|
1493
|
+
ret.type = ndt_var_dim(next.type, t->Concrete.VarDim.offsets,
|
1494
|
+
nslices, slices, false, ctx);
|
1495
|
+
ndt_decref(next.type);
|
1105
1496
|
if (ret.type == NULL) {
|
1106
1497
|
return xnd_error;
|
1107
1498
|
}
|
@@ -1111,6 +1502,32 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1111
1502
|
return ret;
|
1112
1503
|
}
|
1113
1504
|
|
1505
|
+
case VarDimElem: {
|
1506
|
+
int64_t i = t->VarDimElem.index;
|
1507
|
+
|
1508
|
+
if (ndt_is_optional(t)) {
|
1509
|
+
ndt_err_format(ctx, NDT_NotImplementedError,
|
1510
|
+
"optional dimensions are temporarily disabled");
|
1511
|
+
return xnd_error;
|
1512
|
+
}
|
1513
|
+
|
1514
|
+
const xnd_t next = xnd_var_dim_next(x, 0, 1, 0);
|
1515
|
+
const xnd_t tail = xnd_multikey(&next, indices, len, ctx);
|
1516
|
+
if (xnd_err_occurred(&tail)) {
|
1517
|
+
return xnd_error;
|
1518
|
+
}
|
1519
|
+
|
1520
|
+
const ndt_t *u = ndt_convert_to_var_elem(t, tail.type, i, ctx);
|
1521
|
+
ndt_decref(tail.type);
|
1522
|
+
if (u == NULL) {
|
1523
|
+
return xnd_error;
|
1524
|
+
}
|
1525
|
+
|
1526
|
+
xnd_t ret = *x;
|
1527
|
+
ret.type = u;
|
1528
|
+
return ret;
|
1529
|
+
}
|
1530
|
+
|
1114
1531
|
case Tuple: {
|
1115
1532
|
ndt_err_format(ctx, NDT_NotImplementedError,
|
1116
1533
|
"slicing tuples is not supported");
|
@@ -1123,42 +1540,108 @@ xnd_slice(const xnd_t *x, const xnd_index_t indices[], int len, ndt_context_t *c
|
|
1123
1540
|
return xnd_error;
|
1124
1541
|
}
|
1125
1542
|
|
1543
|
+
case Union: {
|
1544
|
+
ndt_err_format(ctx, NDT_NotImplementedError,
|
1545
|
+
"slicing unions is not supported");
|
1546
|
+
return xnd_error;
|
1547
|
+
}
|
1548
|
+
|
1126
1549
|
default:
|
1127
1550
|
ndt_err_format(ctx, NDT_IndexError, "type not sliceable");
|
1128
1551
|
return xnd_error;
|
1129
1552
|
}
|
1130
1553
|
}
|
1131
1554
|
|
1555
|
+
/* Validate indices for mixed indexed/sliced var dimensions. */
|
1556
|
+
static bool
|
1557
|
+
validate_indices(const xnd_t *x, ndt_context_t *ctx)
|
1558
|
+
{
|
1559
|
+
const ndt_t * const t = x->type;
|
1560
|
+
|
1561
|
+
assert(ndt_is_concrete(t));
|
1562
|
+
|
1563
|
+
switch (t->tag) {
|
1564
|
+
case VarDim: {
|
1565
|
+
int64_t start, step, shape;
|
1566
|
+
|
1567
|
+
shape = ndt_var_indices_non_empty(&start, &step, t, x->index, ctx);
|
1568
|
+
if (shape < 0) {
|
1569
|
+
return false;
|
1570
|
+
}
|
1571
|
+
|
1572
|
+
for (int64_t i = 0; i < shape; i++) {
|
1573
|
+
const xnd_t next = xnd_var_dim_next(x, start, step, i);
|
1574
|
+
if (!validate_indices(&next, ctx)) {
|
1575
|
+
return false;
|
1576
|
+
}
|
1577
|
+
}
|
1578
|
+
|
1579
|
+
return true;
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
case VarDimElem: {
|
1583
|
+
int64_t start, step, shape;
|
1584
|
+
|
1585
|
+
shape = ndt_var_indices(&start, &step, t, x->index, ctx);
|
1586
|
+
if (shape < 0) {
|
1587
|
+
return false;
|
1588
|
+
}
|
1589
|
+
|
1590
|
+
const int64_t k = adjust_index(t->VarDimElem.index, shape, ctx);
|
1591
|
+
if (k < 0) {
|
1592
|
+
return false;
|
1593
|
+
}
|
1594
|
+
|
1595
|
+
const xnd_t next = xnd_var_dim_next(x, start, step, k);
|
1596
|
+
return validate_indices(&next, ctx);
|
1597
|
+
}
|
1598
|
+
|
1599
|
+
default:
|
1600
|
+
return true;
|
1601
|
+
}
|
1602
|
+
}
|
1603
|
+
|
1132
1604
|
xnd_t
|
1133
1605
|
xnd_subscript(const xnd_t *x, const xnd_index_t indices[], int len,
|
1134
1606
|
ndt_context_t *ctx)
|
1135
1607
|
{
|
1608
|
+
bool have_index = false;
|
1136
1609
|
bool have_slice = false;
|
1137
1610
|
|
1611
|
+
if (len < 0 || len > NDT_MAX_DIM) {
|
1612
|
+
ndt_err_format(ctx, NDT_IndexError, "too many indices");
|
1613
|
+
return xnd_error;
|
1614
|
+
}
|
1615
|
+
|
1138
1616
|
for (int i = 0; i < len; i++) {
|
1617
|
+
if (indices[i].tag == Index) {
|
1618
|
+
have_index = true;
|
1619
|
+
}
|
1139
1620
|
if (indices[i].tag == Slice) {
|
1140
1621
|
have_slice = true;
|
1141
|
-
break;
|
1142
1622
|
}
|
1143
1623
|
}
|
1144
1624
|
|
1145
1625
|
if (have_slice) {
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
const ndt_t *t;
|
1626
|
+
xnd_t res = xnd_multikey(x, indices, len, ctx);
|
1627
|
+
if (xnd_err_occurred(&res)) {
|
1628
|
+
return xnd_error;
|
1629
|
+
}
|
1151
1630
|
|
1152
|
-
if (res
|
1631
|
+
if (have_index && !validate_indices(&res, ctx)) {
|
1632
|
+
ndt_decref(res.type);
|
1153
1633
|
return xnd_error;
|
1154
1634
|
}
|
1155
1635
|
|
1156
|
-
|
1157
|
-
|
1636
|
+
return res;
|
1637
|
+
}
|
1638
|
+
else {
|
1639
|
+
xnd_t res = xnd_subtree(x, indices, len, ctx);
|
1640
|
+
if (res.ptr == NULL) {
|
1158
1641
|
return xnd_error;
|
1159
1642
|
}
|
1160
1643
|
|
1161
|
-
res.type
|
1644
|
+
ndt_incref(res.type);
|
1162
1645
|
return res;
|
1163
1646
|
}
|
1164
1647
|
}
|
@@ -1302,3 +1785,42 @@ xnd_double_is_big_endian(void)
|
|
1302
1785
|
{
|
1303
1786
|
return xnd_double_format==IEEE_BIG_ENDIAN;
|
1304
1787
|
}
|
1788
|
+
|
1789
|
+
static float
|
1790
|
+
bfloat16_to_float(uint16_t b)
|
1791
|
+
{
|
1792
|
+
float f = 0;
|
1793
|
+
uint16_t *p = (uint16_t *)((char *)&f);
|
1794
|
+
|
1795
|
+
if (xnd_float_is_big_endian()) {
|
1796
|
+
p[0] = b;
|
1797
|
+
}
|
1798
|
+
else {
|
1799
|
+
p[1] = b;
|
1800
|
+
}
|
1801
|
+
|
1802
|
+
return f;
|
1803
|
+
}
|
1804
|
+
|
1805
|
+
/*
|
1806
|
+
* Unlike the corresponding Python conversion functions, Tensorflow does
|
1807
|
+
* not raise OverflowError.
|
1808
|
+
*/
|
1809
|
+
void
|
1810
|
+
xnd_bfloat_pack(char *p, double x)
|
1811
|
+
{
|
1812
|
+
float f = (float)x;
|
1813
|
+
uint16_t u16;
|
1814
|
+
|
1815
|
+
u16 = xnd_round_to_bfloat16(f);
|
1816
|
+
PACK_SINGLE(p, u16, uint16_t, 0);
|
1817
|
+
}
|
1818
|
+
|
1819
|
+
double
|
1820
|
+
xnd_bfloat_unpack(char *p)
|
1821
|
+
{
|
1822
|
+
uint16_t u16;
|
1823
|
+
|
1824
|
+
UNPACK_SINGLE(u16, p, uint16_t, 0);
|
1825
|
+
return bfloat16_to_float(u16);
|
1826
|
+
}
|