xnd 0.2.0dev6 → 0.2.0dev7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/Rakefile +1 -1
  4. data/ext/ruby_xnd/GPATH +0 -0
  5. data/ext/ruby_xnd/GRTAGS +0 -0
  6. data/ext/ruby_xnd/GTAGS +0 -0
  7. data/ext/ruby_xnd/extconf.rb +8 -5
  8. data/ext/ruby_xnd/gc_guard.c +53 -2
  9. data/ext/ruby_xnd/gc_guard.h +8 -2
  10. data/ext/ruby_xnd/include/overflow.h +147 -0
  11. data/ext/ruby_xnd/include/ruby_xnd.h +62 -0
  12. data/ext/ruby_xnd/include/xnd.h +590 -0
  13. data/ext/ruby_xnd/lib/libxnd.a +0 -0
  14. data/ext/ruby_xnd/lib/libxnd.so +1 -0
  15. data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
  16. data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
  17. data/ext/ruby_xnd/ruby_xnd.c +556 -47
  18. data/ext/ruby_xnd/ruby_xnd.h +2 -1
  19. data/ext/ruby_xnd/xnd/Makefile +80 -0
  20. data/ext/ruby_xnd/xnd/config.h +26 -0
  21. data/ext/ruby_xnd/xnd/config.h.in +3 -0
  22. data/ext/ruby_xnd/xnd/config.log +421 -0
  23. data/ext/ruby_xnd/xnd/config.status +1023 -0
  24. data/ext/ruby_xnd/xnd/configure +376 -8
  25. data/ext/ruby_xnd/xnd/configure.ac +48 -7
  26. data/ext/ruby_xnd/xnd/doc/xnd/index.rst +3 -1
  27. data/ext/ruby_xnd/xnd/doc/xnd/{types.rst → xnd.rst} +3 -18
  28. data/ext/ruby_xnd/xnd/libxnd/Makefile +142 -0
  29. data/ext/ruby_xnd/xnd/libxnd/Makefile.in +43 -3
  30. data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +19 -3
  31. data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +42 -3
  32. data/ext/ruby_xnd/xnd/libxnd/bitmaps.o +0 -0
  33. data/ext/ruby_xnd/xnd/libxnd/bounds.c +366 -0
  34. data/ext/ruby_xnd/xnd/libxnd/bounds.o +0 -0
  35. data/ext/ruby_xnd/xnd/libxnd/contrib.h +98 -0
  36. data/ext/ruby_xnd/xnd/libxnd/contrib/bfloat16.h +213 -0
  37. data/ext/ruby_xnd/xnd/libxnd/copy.c +155 -4
  38. data/ext/ruby_xnd/xnd/libxnd/copy.o +0 -0
  39. data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.cu +121 -0
  40. data/ext/ruby_xnd/xnd/libxnd/cuda/cuda_memory.h +58 -0
  41. data/ext/ruby_xnd/xnd/libxnd/equal.c +195 -7
  42. data/ext/ruby_xnd/xnd/libxnd/equal.o +0 -0
  43. data/ext/ruby_xnd/xnd/libxnd/inline.h +32 -0
  44. data/ext/ruby_xnd/xnd/libxnd/libxnd.a +0 -0
  45. data/ext/ruby_xnd/xnd/libxnd/libxnd.so +1 -0
  46. data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0 +1 -0
  47. data/ext/ruby_xnd/xnd/libxnd/libxnd.so.0.2.0dev3 +0 -0
  48. data/ext/ruby_xnd/xnd/libxnd/shape.c +207 -0
  49. data/ext/ruby_xnd/xnd/libxnd/shape.o +0 -0
  50. data/ext/ruby_xnd/xnd/libxnd/split.c +2 -2
  51. data/ext/ruby_xnd/xnd/libxnd/split.o +0 -0
  52. data/ext/ruby_xnd/xnd/libxnd/tests/Makefile +39 -0
  53. data/ext/ruby_xnd/xnd/libxnd/xnd.c +613 -91
  54. data/ext/ruby_xnd/xnd/libxnd/xnd.h +145 -4
  55. data/ext/ruby_xnd/xnd/libxnd/xnd.o +0 -0
  56. data/ext/ruby_xnd/xnd/python/test_xnd.py +1125 -50
  57. data/ext/ruby_xnd/xnd/python/xnd/__init__.py +609 -124
  58. data/ext/ruby_xnd/xnd/python/xnd/_version.py +1 -0
  59. data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +1652 -101
  60. data/ext/ruby_xnd/xnd/python/xnd/libxnd.a +0 -0
  61. data/ext/ruby_xnd/xnd/python/xnd/libxnd.so +1 -0
  62. data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0 +1 -0
  63. data/ext/ruby_xnd/xnd/python/xnd/libxnd.so.0.2.0dev3 +0 -0
  64. data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +1 -1
  65. data/ext/ruby_xnd/xnd/python/xnd/util.h +25 -0
  66. data/ext/ruby_xnd/xnd/python/xnd/xnd.h +590 -0
  67. data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +106 -6
  68. data/ext/ruby_xnd/xnd/python/xnd_support.py +4 -0
  69. data/ext/ruby_xnd/xnd/setup.py +46 -4
  70. data/lib/ruby_xnd.so +0 -0
  71. data/lib/xnd.rb +39 -3
  72. data/lib/xnd/version.rb +2 -2
  73. data/xnd.gemspec +2 -1
  74. metadata +58 -5
@@ -0,0 +1,213 @@
1
+ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ /* Modified and adapted for gumath. */
17
+
18
+ #ifndef BFLOAT16_H
19
+ #define BFLOAT16_H
20
+
21
+
22
+ #include <stdint.h>
23
+ #include <math.h>
24
+
25
+
26
+ union FP32 {
27
+ unsigned int u;
28
+ float f;
29
+ };
30
+
31
+ // Converts a float point to bfloat16, with round-nearest-to-even as rounding
32
+ // method.
33
+ // TODO: There is a slightly faster implementation (8% faster on CPU)
34
+ // than this (documented in cl/175987786), that is exponentially harder to
35
+ // understand and document. Switch to the faster version when converting to
36
+ // BF16 becomes compute-bound.
37
+ static inline uint16_t
38
+ xnd_round_to_bfloat16(float v)
39
+ {
40
+ uint32_t input;
41
+ union FP32 f;
42
+ f.f = v;
43
+ input = f.u;
44
+ uint16_t output;
45
+
46
+ if (isnan(v)) {
47
+ // If the value is a NaN, squash it to a qNaN with msb of fraction set,
48
+ // this makes sure after truncation we don't end up with an inf.
49
+ //
50
+ // qNaN magic: All exponent bits set + most significant bit of fraction
51
+ // set.
52
+ output = 0x7fc0;
53
+ } else {
54
+ // Fast rounding algorithm that rounds a half value to nearest even. This
55
+ // reduces expected error when we convert a large number of floats. Here
56
+ // is how it works:
57
+ //
58
+ // Definitions:
59
+ // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
60
+ // with the following tags:
61
+ //
62
+ // Sign | Exp (8 bits) | Frac (23 bits)
63
+ // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
64
+ //
65
+ // S: Sign bit.
66
+ // E: Exponent bits.
67
+ // F: First 6 bits of fraction.
68
+ // L: Least significant bit of resulting bfloat16 if we truncate away the
69
+ // rest of the float32. This is also the 7th bit of fraction
70
+ // R: Rounding bit, 8th bit of fraction.
71
+ // T: Sticky bits, rest of fraction, 15 bits.
72
+ //
73
+ // To round half to nearest even, there are 3 cases where we want to round
74
+ // down (simply truncate the result of the bits away, which consists of
75
+ // rounding bit and sticky bits) and two cases where we want to round up
76
+ // (truncate then add one to the result).
77
+ //
78
+ // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
79
+ // 1s) as the rounding bias, adds the rounding bias to the input, then
80
+ // truncates the last 16 bits away.
81
+ //
82
+ // To understand how it works, we can analyze this algorithm case by case:
83
+ //
84
+ // 1. L = 0, R = 0:
85
+ // Expect: round down, this is less than half value.
86
+ //
87
+ // Algorithm:
88
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
89
+ // - Adding rounding bias to input may create any carry, depending on
90
+ // whether there is any value set to 1 in T bits.
91
+ // - R may be set to 1 if there is a carry.
92
+ // - L remains 0.
93
+ // - Note that this case also handles Inf and -Inf, where all fraction
94
+ // bits, including L, R and Ts are all 0. The output remains Inf after
95
+ // this algorithm.
96
+ //
97
+ // 2. L = 1, R = 0:
98
+ // Expect: round down, this is less than half value.
99
+ //
100
+ // Algorithm:
101
+ // - Rounding bias: 0x7fff + 1 = 0x8000
102
+ // - Adding rounding bias to input doesn't change sticky bits but
103
+ // adds 1 to rounding bit.
104
+ // - L remains 1.
105
+ //
106
+ // 3. L = 0, R = 1, all of T are 0:
107
+ // Expect: round down, this is exactly at half, the result is already
108
+ // even (L=0).
109
+ //
110
+ // Algorithm:
111
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
112
+ // - Adding rounding bias to input sets all sticky bits to 1, but
113
+ // doesn't create a carry.
114
+ // - R remains 1.
115
+ // - L remains 0.
116
+ //
117
+ // 4. L = 1, R = 1:
118
+ // Expect: round up, this is exactly at half, the result needs to be
119
+ // round to the next even number.
120
+ //
121
+ // Algorithm:
122
+ // - Rounding bias: 0x7fff + 1 = 0x8000
123
+ // - Adding rounding bias to input doesn't change sticky bits, but
124
+ // creates a carry from rounding bit.
125
+ // - The carry sets L to 0, creates another carry bit and propagate
126
+ // forward to F bits.
127
+ // - If all the F bits are 1, a carry then propagates to the exponent
128
+ // bits, which then creates the minimum value with the next exponent
129
+ // value. Note that we won't have the case where exponents are all 1,
130
+ // since that's either a NaN (handled in the other if condition) or inf
131
+ // (handled in case 1).
132
+ //
133
+ // 5. L = 0, R = 1, any of T is 1:
134
+ // Expect: round up, this is greater than half.
135
+ //
136
+ // Algorithm:
137
+ // - Rounding bias: 0x7fff + 0 = 0x7fff
138
+ // - Adding rounding bias to input creates a carry from sticky bits,
139
+ // sets rounding bit to 0, then create another carry.
140
+ // - The second carry sets L to 1.
141
+ //
142
+ // Examples:
143
+ //
144
+ // Exact half value that is already even:
145
+ // Input:
146
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
147
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
148
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
149
+ //
150
+ // This falls into case 3. We truncate the rest of 16 bits and no
151
+ // carry is created into F and L:
152
+ //
153
+ // Output:
154
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
155
+ // S E E E E E E E E F F F F F F L
156
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
157
+ //
158
+ // Exact half value, round to next even number:
159
+ // Input:
160
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
161
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
162
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
163
+ //
164
+ // This falls into case 4. We create a carry from R and T,
165
+ // which then propagates into L and F:
166
+ //
167
+ // Output:
168
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
169
+ // S E E E E E E E E F F F F F F L
170
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
171
+ //
172
+ //
173
+ // Max denormal value round to min normal value:
174
+ // Input:
175
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
176
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
177
+ // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
178
+ //
179
+ // This falls into case 4. We create a carry from R and T,
180
+ // propagate into L and F, which then propagates into exponent
181
+ // bits:
182
+ //
183
+ // Output:
184
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
185
+ // S E E E E E E E E F F F F F F L
186
+ // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
187
+ //
188
+ // Max normal value round to Inf:
189
+ // Input:
190
+ // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
191
+ // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
192
+ // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
193
+ //
194
+ // This falls into case 4. We create a carry from R and T,
195
+ // propagate into L and F, which then propagates into exponent
196
+ // bits:
197
+ //
198
+ // Sign | Exp (8 bit) | Frac (first 7 bit)
199
+ // S E E E E E E E E F F F F F F L
200
+ // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
201
+ //
202
+ //
203
+ // Least significant bit of resulting bfloat.
204
+ uint32_t lsb = (input >> 16) & 1;
205
+ uint32_t rounding_bias = 0x7fff + lsb;
206
+ input += rounding_bias;
207
+ output = (uint16_t)(input >> 16);
208
+ }
209
+ return output;
210
+ }
211
+
212
+
213
+ #endif // BFLOAT16_H
@@ -38,6 +38,7 @@
38
38
  #include <assert.h>
39
39
  #include "ndtypes.h"
40
40
  #include "xnd.h"
41
+ #include "overflow.h"
41
42
  #include "contrib.h"
42
43
 
43
44
 
@@ -164,6 +165,15 @@ copy_int64(xnd_t * const x, const int64_t i64, ndt_context_t *ctx)
164
165
  return 0;
165
166
  }
166
167
 
168
+ case BFloat16: {
169
+ if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
170
+ return value_error(ctx);
171
+ }
172
+ double real = (double)i64;
173
+ xnd_bfloat_pack(x->ptr, real);
174
+ return 0;
175
+ }
176
+
167
177
  case Float16: {
168
178
  if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
169
179
  return value_error(ctx);
@@ -189,6 +199,19 @@ copy_int64(xnd_t * const x, const int64_t i64, ndt_context_t *ctx)
189
199
  return 0;
190
200
  }
191
201
 
202
+ case BComplex32: {
203
+ if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
204
+ return value_error(ctx);
205
+ }
206
+
207
+ double real = (double)i64;
208
+ double imag = 0.0;
209
+
210
+ xnd_bfloat_pack(x->ptr, real);
211
+ xnd_bfloat_pack(x->ptr+2, imag);
212
+ return 0;
213
+ }
214
+
192
215
  case Complex32: {
193
216
  if (i64 < -4503599627370496LL || i64 > 4503599627370496LL) {
194
217
  return value_error(ctx);
@@ -312,6 +335,15 @@ copy_uint64(xnd_t * const x, const uint64_t u64, ndt_context_t *ctx)
312
335
  return 0;
313
336
  }
314
337
 
338
+ case BFloat16: {
339
+ if (u64 > 4503599627370496LL) {
340
+ return value_error(ctx);
341
+ }
342
+ double real = (double)u64;
343
+ xnd_bfloat_pack(x->ptr, real);
344
+ return 0;
345
+ }
346
+
315
347
  case Float16: {
316
348
  if (u64 > 4503599627370496LL) {
317
349
  return value_error(ctx);
@@ -337,6 +369,19 @@ copy_uint64(xnd_t * const x, const uint64_t u64, ndt_context_t *ctx)
337
369
  return 0;
338
370
  }
339
371
 
372
+ case BComplex32: {
373
+ if (u64 > 4503599627370496LL) {
374
+ return value_error(ctx);
375
+ }
376
+
377
+ double real = (double)u64;
378
+ double imag = 0.0;
379
+
380
+ xnd_bfloat_pack(x->ptr, real);
381
+ xnd_bfloat_pack(x->ptr+2, imag);
382
+ return 0;
383
+ }
384
+
340
385
  case Complex32: {
341
386
  if (u64 > 4503599627370496LL) {
342
387
  return value_error(ctx);
@@ -474,6 +519,12 @@ copy_float64(xnd_t * const x, const double real, ndt_context_t *ctx)
474
519
  }
475
520
  uint64_t u64 = (uint64_t)real;
476
521
  PACK_SINGLE(x->ptr, u64, uint64_t, t->flags);
522
+ return 0;
523
+ }
524
+
525
+ case BFloat16: {
526
+ xnd_bfloat_pack(x->ptr, real);
527
+ return 0;
477
528
  }
478
529
 
479
530
  case Float16: {
@@ -489,6 +540,14 @@ copy_float64(xnd_t * const x, const double real, ndt_context_t *ctx)
489
540
  return 0;
490
541
  }
491
542
 
543
+ case BComplex32: {
544
+ double imag = 0.0;
545
+
546
+ xnd_bfloat_pack(x->ptr, real);
547
+ xnd_bfloat_pack(x->ptr+2, imag);
548
+ return 0;
549
+ }
550
+
492
551
  case Complex32: {
493
552
  double imag = 0.0;
494
553
 
@@ -532,7 +591,7 @@ copy_complex128(xnd_t * const x, const double real, const double imag,
532
591
  switch (t->tag) {
533
592
  case Int8: case Int16: case Int32: case Int64:
534
593
  case Uint8: case Uint16: case Uint32: case Uint64:
535
- case Float16: case Float32: case Float64: {
594
+ case BFloat16: case Float16: case Float32: case Float64: {
536
595
  if (imag == 0.0) {
537
596
  return copy_float64(x, real, ctx);
538
597
  }
@@ -540,6 +599,12 @@ copy_complex128(xnd_t * const x, const double real, const double imag,
540
599
  return type_error(ctx);
541
600
  }
542
601
 
602
+ case BComplex32: {
603
+ xnd_bfloat_pack(x->ptr, real);
604
+ xnd_bfloat_pack(x->ptr+2, imag);
605
+ return 0;
606
+ }
607
+
543
608
  case Complex32: {
544
609
  if (xnd_float_pack2(real, (unsigned char *)x->ptr, le(t->flags), ctx) < 0) {
545
610
  return -1;
@@ -568,6 +633,8 @@ copy_complex128(xnd_t * const x, const double real, const double imag,
568
633
  int
569
634
  xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
570
635
  {
636
+ APPLY_STORED_INDICES_INT(x)
637
+ APPLY_STORED_INDICES_INT(y)
571
638
  const ndt_t * const t = x->type;
572
639
  const ndt_t * const u = y->type;
573
640
  int n;
@@ -583,6 +650,10 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
583
650
  return 0;
584
651
  }
585
652
 
653
+ if (ndt_is_optional(u)) {
654
+ xnd_set_valid(y);
655
+ }
656
+
586
657
  if (t->tag == Ref || u->tag == Ref) {
587
658
  return copy_ref(y, x, flags, ctx);
588
659
  }
@@ -688,6 +759,25 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
688
759
  return 0;
689
760
  }
690
761
 
762
+ case Union: {
763
+ if (!ndt_equal(u, t)) {
764
+ return type_error(ctx);
765
+ }
766
+
767
+ const xnd_t xnext = xnd_union_next(x, ctx);
768
+ if (xnext.ptr == NULL) {
769
+ return -1;
770
+ }
771
+
772
+ XND_UNION_TAG(y->ptr) = XND_UNION_TAG(x->ptr);
773
+ xnd_t ynext = xnd_union_next(y, ctx);
774
+ if (ynext.ptr == NULL) {
775
+ return -1;
776
+ }
777
+
778
+ return xnd_copy(&ynext, &xnext, flags, ctx);
779
+ }
780
+
691
781
  case Constr: {
692
782
  if (u->tag != Constr || strcmp(u->Constr.name, t->Constr.name) != 0) {
693
783
  return type_error(ctx);
@@ -805,6 +895,11 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
805
895
  return copy_uint64(y, u64, ctx);
806
896
  }
807
897
 
898
+ case BFloat16: {
899
+ double real = xnd_bfloat_unpack(x->ptr);
900
+ return copy_float64(y, real, ctx);
901
+ }
902
+
808
903
  case Float16: {
809
904
  double real = xnd_float_unpack2((unsigned char *)x->ptr, le(t->flags));
810
905
  return copy_float64(y, real, ctx);
@@ -820,6 +915,15 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
820
915
  return copy_float64(y, real, ctx);
821
916
  }
822
917
 
918
+ case BComplex32: {
919
+ double real, imag;
920
+
921
+ real = xnd_bfloat_unpack(x->ptr);
922
+ imag = xnd_bfloat_unpack(x->ptr+2);
923
+
924
+ return copy_complex128(y, real, imag, ctx);
925
+ }
926
+
823
927
  case Complex32: {
824
928
  double real, imag;
825
929
 
@@ -875,7 +979,7 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
875
979
  return type_error(ctx);
876
980
  }
877
981
 
878
- s = ndt_strdup(XND_POINTER_DATA(x->ptr), ctx);
982
+ s = ndt_strdup(XND_STRING_DATA(x->ptr), ctx);
879
983
  if (s == NULL) {
880
984
  return -1;
881
985
  }
@@ -912,7 +1016,7 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
912
1016
  memcpy(s, XND_BYTES_DATA(x->ptr), (size_t)size);
913
1017
 
914
1018
  if (XND_BYTES_DATA(y->ptr) != NULL) {
915
- if (!(flags & XND_OWN_EMBEDDED)) {
1019
+ if (!(flags & XND_OWN_BYTES)) {
916
1020
  ndt_err_format(ctx, NDT_RuntimeError,
917
1021
  "cannot free string pointer, xnd does not own it");
918
1022
  ndt_aligned_free(s);
@@ -926,8 +1030,55 @@ xnd_copy(xnd_t *y, const xnd_t *x, uint32_t flags, ndt_context_t *ctx)
926
1030
  return 0;
927
1031
  }
928
1032
 
929
- /* NOT REACHED: intercepted by equal_ref(). */
1033
+ case Array: {
1034
+ bool overflow = false;
1035
+
1036
+ if (u->tag != Array) {
1037
+ return type_error(ctx);
1038
+ }
1039
+
1040
+ const int64_t shape = XND_ARRAY_SHAPE(x->ptr);
1041
+ const int64_t size = MULi64(shape, t->Array.itemsize, &overflow);
1042
+ if (overflow) {
1043
+ ndt_err_format(ctx, NDT_ValueError, "flexible array too large");
1044
+ return -1;
1045
+ }
1046
+
1047
+ char *data = ndt_aligned_calloc(u->align, size);
1048
+ if (data == NULL) {
1049
+ (void)ndt_memory_error(ctx);
1050
+ return -1;
1051
+ }
1052
+
1053
+ if (XND_ARRAY_DATA(y->ptr) != NULL) {
1054
+ if (!(flags & XND_OWN_ARRAYS)) {
1055
+ ndt_err_format(ctx, NDT_RuntimeError,
1056
+ "cannot free array data pointer, xnd does not own it");
1057
+ ndt_aligned_free(data);
1058
+ return -1;
1059
+ }
1060
+ ndt_aligned_free(XND_ARRAY_DATA(y->ptr));
1061
+ }
1062
+
1063
+ XND_ARRAY_SHAPE(y->ptr) = shape;
1064
+ XND_ARRAY_DATA(y->ptr) = data;
1065
+
1066
+ for (int64_t i = 0; i < shape; i++) {
1067
+ const xnd_t xnext = xnd_array_next(x, i);
1068
+ xnd_t ynext = xnd_array_next(y, i);
1069
+ n = xnd_copy(&ynext, &xnext, flags, ctx);
1070
+ if (n < 0) return n;
1071
+ }
1072
+
1073
+ return 0;
1074
+ }
1075
+
1076
+ /* NOT REACHED: intercepted by apply_stored_indices(). */
1077
+ case VarDimElem:
1078
+ /* NOT REACHED: intercepted by copy_ref(). */
930
1079
  case Ref:
1080
+ ndt_err_format(ctx, NDT_RuntimeError, "unexpected VarDimElem or Ref");
1081
+ return -1;
931
1082
 
932
1083
  /* NOT REACHED: xnd types must be concrete. */
933
1084
  case Module: case Function: