xnd 0.2.0dev6 → 0.2.0dev7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +1 -1
- data/ext/ruby_xnd/GPATH +0 -0
- data/ext/ruby_xnd/GRTAGS +0 -0
- data/ext/ruby_xnd/GTAGS +0 -0
- data/ext/ruby_xnd/extconf.rb +8 -5
- data/ext/ruby_xnd/gc_guard.c +53 -2
- data/ext/ruby_xnd/gc_guard.h +8 -2
- data/ext/ruby_xnd/include/overflow.h +147 -0
- data/ext/ruby_xnd/include/ruby_xnd.h +62 -0
- data/ext/ruby_xnd/include/xnd.h +590 -0
- data/ext/ruby_xnd/lib/libxnd.a +0 -0
- data/ext/ruby_xnd/lib/libxnd.so +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/ruby_xnd.c +556 -47
- data/ext/ruby_xnd/ruby_xnd.h +2 -1
- data/ext/ruby_xnd/xnd/Makefile +80 -0
- data/ext/ruby_xnd/xnd/config.h +26 -0
- data/ext/ruby_xnd/xnd/config.h.in +3 -0
- data/ext/ruby_xnd/xnd/config.log +421 -0
- data/ext/ruby_xnd/xnd/config.status +1023 -0
- data/ext/ruby_xnd/xnd/configure +376 -8
- data/ext/ruby_xnd/xnd/configure.ac +48 -7
- data/ext/ruby_xnd/xnd/doc/xnd/index.rst +3 -1
- data/ext/ruby_xnd/xnd/doc/xnd/{types.rst → xnd.rst} +3 -18
- data/ext/ruby_xnd/xnd/libxnd/Makefile +142 -0
- data/ext/ruby_xnd/xnd/libxnd/Makefile.in +43 -3
- data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +19 -3
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +42 -3
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/bounds.c +366 -0
- data/ext/ruby_xnd/xnd/libxnd/bounds.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib.h +98 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib/bfloat16.h +213 -0
- data/ext/ruby_xnd/xnd/libxnd/copy.c +155 -4
- data/ext/ruby_xnd/xnd/libxnd/copy.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.cu +121 -0
- data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.h +58 -0
- data/ext/ruby_xnd/xnd/libxnd/equal.c +195 -7
- data/ext/ruby_xnd/xnd/libxnd/equal.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/inline.h +32 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.a +0 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so +1 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/xnd/libxnd/shape.c +207 -0
- data/ext/ruby_xnd/xnd/libxnd/shape.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/split.c +2 -2
- data/ext/ruby_xnd/xnd/libxnd/split.o +0 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/Makefile +39 -0
- data/ext/ruby_xnd/xnd/libxnd/xnd.c +613 -91
- data/ext/ruby_xnd/xnd/libxnd/xnd.h +145 -4
- data/ext/ruby_xnd/xnd/libxnd/xnd.o +0 -0
- data/ext/ruby_xnd/xnd/python/test_xnd.py +1125 -50
- data/ext/ruby_xnd/xnd/python/xnd/__init__.py +609 -124
- data/ext/ruby_xnd/xnd/python/xnd/_version.py +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +1652 -101
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.a +0 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +1 -1
- data/ext/ruby_xnd/xnd/python/xnd/util.h +25 -0
- data/ext/ruby_xnd/xnd/python/xnd/xnd.h +590 -0
- data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +106 -6
- data/ext/ruby_xnd/xnd/python/xnd_support.py +4 -0
- data/ext/ruby_xnd/xnd/setup.py +46 -4
- data/lib/ruby_xnd.so +0 -0
- data/lib/xnd.rb +39 -3
- data/lib/xnd/version.rb +2 -2
- data/xnd.gemspec +2 -1
- metadata +58 -5
@@ -0,0 +1,213 @@
|
|
1
|
+
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
/* Modified and adapted for gumath. */
|
17
|
+
|
18
|
+
#ifndef BFLOAT16_H
|
19
|
+
#define BFLOAT16_H
|
20
|
+
|
21
|
+
|
22
|
+
#include <stdint.h>
|
23
|
+
#include <math.h>
|
24
|
+
|
25
|
+
|
26
|
+
union FP32 {
|
27
|
+
unsigned int u;
|
28
|
+
float f;
|
29
|
+
};
|
30
|
+
|
31
|
+
// Converts a float point to bfloat16, with round-nearest-to-even as rounding
|
32
|
+
// method.
|
33
|
+
// TODO: There is a slightly faster implementation (8% faster on CPU)
|
34
|
+
// than this (documented in cl/175987786), that is exponentially harder to
|
35
|
+
// understand and document. Switch to the faster version when converting to
|
36
|
+
// BF16 becomes compute-bound.
|
37
|
+
static inline uint16_t
|
38
|
+
xnd_round_to_bfloat16(float v)
|
39
|
+
{
|
40
|
+
uint32_t input;
|
41
|
+
union FP32 f;
|
42
|
+
f.f = v;
|
43
|
+
input = f.u;
|
44
|
+
uint16_t output;
|
45
|
+
|
46
|
+
if (isnan(v)) {
|
47
|
+
// If the value is a NaN, squash it to a qNaN with msb of fraction set,
|
48
|
+
// this makes sure after truncation we don't end up with an inf.
|
49
|
+
//
|
50
|
+
// qNaN magic: All exponent bits set + most significant bit of fraction
|
51
|
+
// set.
|
52
|
+
output = 0x7fc0;
|
53
|
+
} else {
|
54
|
+
// Fast rounding algorithm that rounds a half value to nearest even. This
|
55
|
+
// reduces expected error when we convert a large number of floats. Here
|
56
|
+
// is how it works:
|
57
|
+
//
|
58
|
+
// Definitions:
|
59
|
+
// To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
|
60
|
+
// with the following tags:
|
61
|
+
//
|
62
|
+
// Sign | Exp (8 bits) | Frac (23 bits)
|
63
|
+
// S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
|
64
|
+
//
|
65
|
+
// S: Sign bit.
|
66
|
+
// E: Exponent bits.
|
67
|
+
// F: First 6 bits of fraction.
|
68
|
+
// L: Least significant bit of resulting bfloat16 if we truncate away the
|
69
|
+
// rest of the float32. This is also the 7th bit of fraction
|
70
|
+
// R: Rounding bit, 8th bit of fraction.
|
71
|
+
// T: Sticky bits, rest of fraction, 15 bits.
|
72
|
+
//
|
73
|
+
// To round half to nearest even, there are 3 cases where we want to round
|
74
|
+
// down (simply truncate the result of the bits away, which consists of
|
75
|
+
// rounding bit and sticky bits) and two cases where we want to round up
|
76
|
+
// (truncate then add one to the result).
|
77
|
+
//
|
78
|
+
// The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
|
79
|
+
// 1s) as the rounding bias, adds the rounding bias to the input, then
|
80
|
+
// truncates the last 16 bits away.
|
81
|
+
//
|
82
|
+
// To understand how it works, we can analyze this algorithm case by case:
|
83
|
+
//
|
84
|
+
// 1. L = 0, R = 0:
|
85
|
+
// Expect: round down, this is less than half value.
|
86
|
+
//
|
87
|
+
// Algorithm:
|
88
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
89
|
+
// - Adding rounding bias to input may create any carry, depending on
|
90
|
+
// whether there is any value set to 1 in T bits.
|
91
|
+
// - R may be set to 1 if there is a carry.
|
92
|
+
// - L remains 0.
|
93
|
+
// - Note that this case also handles Inf and -Inf, where all fraction
|
94
|
+
// bits, including L, R and Ts are all 0. The output remains Inf after
|
95
|
+
// this algorithm.
|
96
|
+
//
|
97
|
+
// 2. L = 1, R = 0:
|
98
|
+
// Expect: round down, this is less than half value.
|
99
|
+
//
|
100
|
+
// Algorithm:
|
101
|
+
// - Rounding bias: 0x7fff + 1 = 0x8000
|
102
|
+
// - Adding rounding bias to input doesn't change sticky bits but
|
103
|
+
// adds 1 to rounding bit.
|
104
|
+
// - L remains 1.
|
105
|
+
//
|
106
|
+
// 3. L = 0, R = 1, all of T are 0:
|
107
|
+
// Expect: round down, this is exactly at half, the result is already
|
108
|
+
// even (L=0).
|
109
|
+
//
|
110
|
+
// Algorithm:
|
111
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
112
|
+
// - Adding rounding bias to input sets all sticky bits to 1, but
|
113
|
+
// doesn't create a carry.
|
114
|
+
// - R remains 1.
|
115
|
+
// - L remains 0.
|
116
|
+
//
|
117
|
+
// 4. L = 1, R = 1:
|
118
|
+
// Expect: round up, this is exactly at half, the result needs to be
|
119
|
+
// round to the next even number.
|
120
|
+
//
|
121
|
+
// Algorithm:
|
122
|
+
// - Rounding bias: 0x7fff + 1 = 0x8000
|
123
|
+
// - Adding rounding bias to input doesn't change sticky bits, but
|
124
|
+
// creates a carry from rounding bit.
|
125
|
+
// - The carry sets L to 0, creates another carry bit and propagate
|
126
|
+
// forward to F bits.
|
127
|
+
// - If all the F bits are 1, a carry then propagates to the exponent
|
128
|
+
// bits, which then creates the minimum value with the next exponent
|
129
|
+
// value. Note that we won't have the case where exponents are all 1,
|
130
|
+
// since that's either a NaN (handled in the other if condition) or inf
|
131
|
+
// (handled in case 1).
|
132
|
+
//
|
133
|
+
// 5. L = 0, R = 1, any of T is 1:
|
134
|
+
// Expect: round up, this is greater than half.
|
135
|
+
//
|
136
|
+
// Algorithm:
|
137
|
+
// - Rounding bias: 0x7fff + 0 = 0x7fff
|
138
|
+
// - Adding rounding bias to input creates a carry from sticky bits,
|
139
|
+
// sets rounding bit to 0, then create another carry.
|
140
|
+
// - The second carry sets L to 1.
|
141
|
+
//
|
142
|
+
// Examples:
|
143
|
+
//
|
144
|
+
// Exact half value that is already even:
|
145
|
+
// Input:
|
146
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
147
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
148
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
|
149
|
+
//
|
150
|
+
// This falls into case 3. We truncate the rest of 16 bits and no
|
151
|
+
// carry is created into F and L:
|
152
|
+
//
|
153
|
+
// Output:
|
154
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
155
|
+
// S E E E E E E E E F F F F F F L
|
156
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
157
|
+
//
|
158
|
+
// Exact half value, round to next even number:
|
159
|
+
// Input:
|
160
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
161
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
162
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
|
163
|
+
//
|
164
|
+
// This falls into case 4. We create a carry from R and T,
|
165
|
+
// which then propagates into L and F:
|
166
|
+
//
|
167
|
+
// Output:
|
168
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
169
|
+
// S E E E E E E E E F F F F F F L
|
170
|
+
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
|
171
|
+
//
|
172
|
+
//
|
173
|
+
// Max denormal value round to min normal value:
|
174
|
+
// Input:
|
175
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
176
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
177
|
+
// 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
|
178
|
+
//
|
179
|
+
// This falls into case 4. We create a carry from R and T,
|
180
|
+
// propagate into L and F, which then propagates into exponent
|
181
|
+
// bits:
|
182
|
+
//
|
183
|
+
// Output:
|
184
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
185
|
+
// S E E E E E E E E F F F F F F L
|
186
|
+
// 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
|
187
|
+
//
|
188
|
+
// Max normal value round to Inf:
|
189
|
+
// Input:
|
190
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
|
191
|
+
// S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
|
192
|
+
// 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
|
193
|
+
//
|
194
|
+
// This falls into case 4. We create a carry from R and T,
|
195
|
+
// propagate into L and F, which then propagates into exponent
|
196
|
+
// bits:
|
197
|
+
//
|
198
|
+
// Sign | Exp (8 bit) | Frac (first 7 bit)
|
199
|
+
// S E E E E E E E E F F F F F F L
|
200
|
+
// 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
|
201
|
+
//
|
202
|
+
//
|
203
|
+
// Least significant bit of resulting bfloat.
|
204
|
+
uint32_t lsb = (input >> 16) & 1;
|
205
|
+
uint32_t rounding_bias = 0x7fff + lsb;
|
206
|
+
input += rounding_bias;
|
207
|
+
output = (uint16_t)(input >> 16);
|
208
|
+
}
|
209
|
+
return output;
|
210
|
+
}
|
211
|
+
|
212
|
+
|
213
|
+
#endif // BFLOAT16_H
|
@@ -38,6 +38,7 @@
|
|
38
38
|
#include <assert.h>
|
39
39
|
#include "ndtypes.h"
|
40
40
|
#include "xnd.h"
|
41
|
+
#include "overflow.h"
|
41
42
|
#include "contrib.h"
|
42
43
|
|
43
44
|
|
@@ -164,6 +165,15 @@ copy_int64(xnd_t * const x, const int64_t i64, ndt_context_t *ctx)
|
|
164
165
|
return 0;
|
165
166
|
}
|
166
167
|
|
168
|
+
case BFloat16: {
|
169
|
+
if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
|
170
|
+
return value_error(ctx);
|
171
|
+
}
|
172
|
+
double real = (double)i64;
|
173
|
+
xnd_bfloat_pack(x->ptr, real);
|
174
|
+
return 0;
|
175
|
+
}
|
176
|
+
|
167
177
|
case Float16: {
|
168
178
|
if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
|
169
179
|
return value_error(ctx);
|
@@ -189,6 +199,19 @@ copy_int64(xnd_t * const x, const int64_t i64, ndt_context_t *ctx)
|
|
189
199
|
return 0;
|
190
200
|
}
|
191
201
|
|
202
|
+
case BComplex32: {
|
203
|
+
if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
|
204
|
+
return value_error(ctx);
|
205
|
+
}
|
206
|
+
|
207
|
+
double real = (double)i64;
|
208
|
+
double imag = 0.0;
|
209
|
+
|
210
|
+
xnd_bfloat_pack(x->ptr, real);
|
211
|
+
xnd_bfloat_pack(x->ptr+2, imag);
|
212
|
+
return 0;
|
213
|
+
}
|
214
|
+
|
192
215
|
case Complex32: {
|
193
216
|
if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
|
194
217
|
return value_error(ctx);
|
@@ -312,6 +335,15 @@ copy_uint64(xnd_t * const x, const uint64_t u64, ndt_context_t *ctx)
|
|
312
335
|
return 0;
|
313
336
|
}
|
314
337
|
|
338
|
+
case BFloat16: {
|
339
|
+
if (u64 > 4503599627370496LL) {
|
340
|
+
return value_error(ctx);
|
341
|
+
}
|
342
|
+
double real = (double)u64;
|
343
|
+
xnd_bfloat_pack(x->ptr, real);
|
344
|
+
return 0;
|
345
|
+
}
|
346
|
+
|
315
347
|
case Float16: {
|
316
348
|
if (u64 > 4503599627370496LL) {
|
317
349
|
return value_error(ctx);
|
@@ -337,6 +369,19 @@ copy_uint64(xnd_t * const x, const uint64_t u64, ndt_context_t *ctx)
|
|
337
369
|
return 0;
|
338
370
|
}
|
339
371
|
|
372
|
+
case BComplex32: {
|
373
|
+
if (u64 > 4503599627370496LL) {
|
374
|
+
return value_error(ctx);
|
375
|
+
}
|
376
|
+
|
377
|
+
double real = (double)u64;
|
378
|
+
double imag = 0.0;
|
379
|
+
|
380
|
+
xnd_bfloat_pack(x->ptr, real);
|
381
|
+
xnd_bfloat_pack(x->ptr+2, imag);
|
382
|
+
return 0;
|
383
|
+
}
|
384
|
+
|
340
385
|
case Complex32: {
|
341
386
|
if (u64 > 4503599627370496LL) {
|
342
387
|
return value_error(ctx);
|
@@ -474,6 +519,12 @@ copy_float64(xnd_t * const x, const double real, ndt_context_t *ctx)
|
|
474
519
|
}
|
475
520
|
uint64_t u64 = (uint64_t)real;
|
476
521
|
PACK_SINGLE(x->ptr, u64, uint64_t, t->flags);
|
522
|
+
return 0;
|
523
|
+
}
|
524
|
+
|
525
|
+
case BFloat16: {
|
526
|
+
xnd_bfloat_pack(x->ptr, real);
|
527
|
+
return 0;
|
477
528
|
}
|
478
529
|
|
479
530
|
case Float16: {
|
@@ -489,6 +540,14 @@ copy_float64(xnd_t * const x, const double real, ndt_context_t *ctx)
|
|
489
540
|
return 0;
|
490
541
|
}
|
491
542
|
|
543
|
+
case BComplex32: {
|
544
|
+
double imag = 0.0;
|
545
|
+
|
546
|
+
xnd_bfloat_pack(x->ptr, real);
|
547
|
+
xnd_bfloat_pack(x->ptr+2, imag);
|
548
|
+
return 0;
|
549
|
+
}
|
550
|
+
|
492
551
|
case Complex32: {
|
493
552
|
double imag = 0.0;
|
494
553
|
|
@@ -532,7 +591,7 @@ copy_complex128(xnd_t * const x, const double real, const double imag,
|
|
532
591
|
switch (t->tag) {
|
533
592
|
case Int8: case Int16: case Int32: case Int64:
|
534
593
|
case Uint8: case Uint16: case Uint32: case Uint64:
|
535
|
-
case Float16: case Float32: case Float64: {
|
594
|
+
case BFloat16: case Float16: case Float32: case Float64: {
|
536
595
|
if (imag == 0.0) {
|
537
596
|
return copy_float64(x, real, ctx);
|
538
597
|
}
|
@@ -540,6 +599,12 @@ copy_complex128(xnd_t * const x, const double real, const double imag,
|
|
540
599
|
return type_error(ctx);
|
541
600
|
}
|
542
601
|
|
602
|
+
case BComplex32: {
|
603
|
+
xnd_bfloat_pack(x->ptr, real);
|
604
|
+
xnd_bfloat_pack(x->ptr+2, imag);
|
605
|
+
return 0;
|
606
|
+
}
|
607
|
+
|
543
608
|
case Complex32: {
|
544
609
|
if (xnd_float_pack2(real, (unsigned char *)x->ptr, le(t->flags), ctx) < 0) {
|
545
610
|
return -1;
|
@@ -568,6 +633,8 @@ copy_complex128(xnd_t * const x, const double real, const double imag,
|
|
568
633
|
int
|
569
634
|
xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
570
635
|
{
|
636
|
+
APPLY_STORED_INDICES_INT(x)
|
637
|
+
APPLY_STORED_INDICES_INT(y)
|
571
638
|
const ndt_t * const t = x->type;
|
572
639
|
const ndt_t * const u = y->type;
|
573
640
|
int n;
|
@@ -583,6 +650,10 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
583
650
|
return 0;
|
584
651
|
}
|
585
652
|
|
653
|
+
if (ndt_is_optional(u)) {
|
654
|
+
xnd_set_valid(y);
|
655
|
+
}
|
656
|
+
|
586
657
|
if (t->tag == Ref || u->tag == Ref) {
|
587
658
|
return copy_ref(y, x, flags, ctx);
|
588
659
|
}
|
@@ -688,6 +759,25 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
688
759
|
return 0;
|
689
760
|
}
|
690
761
|
|
762
|
+
case Union: {
|
763
|
+
if (!ndt_equal(u, t)) {
|
764
|
+
return type_error(ctx);
|
765
|
+
}
|
766
|
+
|
767
|
+
const xnd_t xnext = xnd_union_next(x, ctx);
|
768
|
+
if (xnext.ptr == NULL) {
|
769
|
+
return -1;
|
770
|
+
}
|
771
|
+
|
772
|
+
XND_UNION_TAG(y->ptr) = XND_UNION_TAG(x->ptr);
|
773
|
+
xnd_t ynext = xnd_union_next(y, ctx);
|
774
|
+
if (ynext.ptr == NULL) {
|
775
|
+
return -1;
|
776
|
+
}
|
777
|
+
|
778
|
+
return xnd_copy(&ynext, &xnext, flags, ctx);
|
779
|
+
}
|
780
|
+
|
691
781
|
case Constr: {
|
692
782
|
if (u->tag != Constr || strcmp(u->Constr.name, t->Constr.name) != 0) {
|
693
783
|
return type_error(ctx);
|
@@ -805,6 +895,11 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
805
895
|
return copy_uint64(y, u64, ctx);
|
806
896
|
}
|
807
897
|
|
898
|
+
case BFloat16: {
|
899
|
+
double real = xnd_bfloat_unpack(x->ptr);
|
900
|
+
return copy_float64(y, real, ctx);
|
901
|
+
}
|
902
|
+
|
808
903
|
case Float16: {
|
809
904
|
double real = xnd_float_unpack2((unsigned char *)x->ptr, le(t->flags));
|
810
905
|
return copy_float64(y, real, ctx);
|
@@ -820,6 +915,15 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
820
915
|
return copy_float64(y, real, ctx);
|
821
916
|
}
|
822
917
|
|
918
|
+
case BComplex32: {
|
919
|
+
double real, imag;
|
920
|
+
|
921
|
+
real = xnd_bfloat_unpack(x->ptr);
|
922
|
+
imag = xnd_bfloat_unpack(x->ptr+2);
|
923
|
+
|
924
|
+
return copy_complex128(y, real, imag, ctx);
|
925
|
+
}
|
926
|
+
|
823
927
|
case Complex32: {
|
824
928
|
double real, imag;
|
825
929
|
|
@@ -875,7 +979,7 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
875
979
|
return type_error(ctx);
|
876
980
|
}
|
877
981
|
|
878
|
-
s = ndt_strdup(
|
982
|
+
s = ndt_strdup(XND_STRING_DATA(x->ptr), ctx);
|
879
983
|
if (s == NULL) {
|
880
984
|
return -1;
|
881
985
|
}
|
@@ -912,7 +1016,7 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
912
1016
|
memcpy(s, XND_BYTES_DATA(x->ptr), (size_t)size);
|
913
1017
|
|
914
1018
|
if (XND_BYTES_DATA(y->ptr) != NULL) {
|
915
|
-
if (!(flags &
|
1019
|
+
if (!(flags & XND_OWN_BYTES)) {
|
916
1020
|
ndt_err_format(ctx, NDT_RuntimeError,
|
917
1021
|
"cannot free string pointer, xnd does not own it");
|
918
1022
|
ndt_aligned_free(s);
|
@@ -926,8 +1030,55 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
|
|
926
1030
|
return 0;
|
927
1031
|
}
|
928
1032
|
|
929
|
-
|
1033
|
+
case Array: {
|
1034
|
+
bool overflow = false;
|
1035
|
+
|
1036
|
+
if (u->tag != Array) {
|
1037
|
+
return type_error(ctx);
|
1038
|
+
}
|
1039
|
+
|
1040
|
+
const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
|
1041
|
+
const int64_t size = MULi64(shape, t->Array.itemsize, &overflow);
|
1042
|
+
if (overflow) {
|
1043
|
+
ndt_err_format(ctx, NDT_ValueError, "flexible array too large");
|
1044
|
+
return -1;
|
1045
|
+
}
|
1046
|
+
|
1047
|
+
char *data = ndt_aligned_calloc(u->align, size);
|
1048
|
+
if (data == NULL) {
|
1049
|
+
(void)ndt_memory_error(ctx);
|
1050
|
+
return -1;
|
1051
|
+
}
|
1052
|
+
|
1053
|
+
if (XND_ARRAY_DATA(y->ptr) != NULL) {
|
1054
|
+
if (!(flags & XND_OWN_ARRAYS)) {
|
1055
|
+
ndt_err_format(ctx, NDT_RuntimeError,
|
1056
|
+
"cannot free array data pointer, xnd does not own it");
|
1057
|
+
ndt_aligned_free(data);
|
1058
|
+
return -1;
|
1059
|
+
}
|
1060
|
+
ndt_aligned_free(XND_ARRAY_DATA(y->ptr));
|
1061
|
+
}
|
1062
|
+
|
1063
|
+
XND_ARRAY_SHAPE(y->ptr) = shape;
|
1064
|
+
XND_ARRAY_DATA(y->ptr) = data;
|
1065
|
+
|
1066
|
+
for (int64_t i = 0; i < shape; i++) {
|
1067
|
+
const xnd_t xnext = xnd_array_next(x, i);
|
1068
|
+
xnd_t ynext = xnd_array_next(y, i);
|
1069
|
+
n = xnd_copy(&ynext, &xnext, flags, ctx);
|
1070
|
+
if (n < 0) return n;
|
1071
|
+
}
|
1072
|
+
|
1073
|
+
return 0;
|
1074
|
+
}
|
1075
|
+
|
1076
|
+
/* NOT REACHED: intercepted by apply_stored_indices(). */
|
1077
|
+
case VarDimElem:
|
1078
|
+
/* NOT REACHED: intercepted by copy_ref(). */
|
930
1079
|
case Ref:
|
1080
|
+
ndt_err_format(ctx, NDT_RuntimeError, "unexpected VarDimElem or Ref");
|
1081
|
+
return -1;
|
931
1082
|
|
932
1083
|
/* NOT REACHED: xnd types must be concrete. */
|
933
1084
|
case Module: case Function:
|