duckdb 0.8.2-dev1573.0 → 0.8.2-dev1724.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev1573.0",
5
+ "version": "0.8.2-dev1724.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -333,8 +333,22 @@ static void CreateValuesMap(const StructNames &names, yyjson_mut_doc *doc, yyjso
333
333
  static void CreateValuesUnion(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v,
334
334
  idx_t count) {
335
335
  // Structs become values, therefore we initialize vals to JSON values
336
- for (idx_t i = 0; i < count; i++) {
337
- vals[i] = yyjson_mut_obj(doc);
336
+ UnifiedVectorFormat value_data;
337
+ value_v.ToUnifiedFormat(count, value_data);
338
+ if (value_data.validity.AllValid()) {
339
+ for (idx_t i = 0; i < count; i++) {
340
+ vals[i] = yyjson_mut_obj(doc);
341
+ }
342
+ } else {
343
+ for (idx_t i = 0; i < count; i++) {
344
+ auto index = value_data.sel->get_index(i);
345
+ if (!value_data.validity.RowIsValid(index)) {
346
+ // Make the entry NULL if the Union value is NULL
347
+ vals[i] = yyjson_mut_null(doc);
348
+ } else {
349
+ vals[i] = yyjson_mut_obj(doc);
350
+ }
351
+ }
338
352
  }
339
353
 
340
354
  // Initialize re-usable array for the nested values
@@ -361,6 +375,11 @@ static void CreateValuesUnion(const StructNames &names, yyjson_mut_doc *doc, yyj
361
375
  auto keys = UnifiedVectorFormat::GetData<string_t>(key_data);
362
376
 
363
377
  for (idx_t i = 0; i < count; i++) {
378
+ auto value_index = value_data.sel->get_index(i);
379
+ if (!value_data.validity.RowIsValid(value_index)) {
380
+ // This entry is just NULL in it's entirety
381
+ continue;
382
+ }
364
383
  auto tag_idx = tag_data.sel->get_index(i);
365
384
  if (!tag_data.validity.RowIsValid(tag_idx)) {
366
385
  continue;
@@ -669,6 +669,77 @@ bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const
669
669
  return true;
670
670
  }
671
671
 
672
+ bool TransformValueIntoUnion(yyjson_val **vals, yyjson_alc *alc, Vector &result, const idx_t count,
673
+ JSONTransformOptions &options) {
674
+ auto type = result.GetType();
675
+
676
+ auto fields = UnionType::CopyMemberTypes(type);
677
+ vector<string> names;
678
+ for (const auto &field : fields) {
679
+ names.push_back(field.first);
680
+ }
681
+
682
+ bool success = true;
683
+
684
+ auto &validity = FlatVector::Validity(result);
685
+
686
+ auto set_error = [&](idx_t i, const string &message) {
687
+ validity.SetInvalid(i);
688
+ result.SetValue(i, Value(nullptr));
689
+ if (success && options.strict_cast) {
690
+ options.error_message = message;
691
+ options.object_index = i;
692
+ success = false;
693
+ }
694
+ };
695
+
696
+ for (idx_t i = 0; i < count; i++) {
697
+ const auto &obj = vals[i];
698
+
699
+ if (!obj || unsafe_yyjson_is_null(vals[i])) {
700
+ validity.SetInvalid(i);
701
+ result.SetValue(i, Value(nullptr));
702
+ continue;
703
+ }
704
+
705
+ if (!unsafe_yyjson_is_obj(obj)) {
706
+ set_error(i,
707
+ StringUtil::Format("Expected an object representing a union, got %s", yyjson_get_type_desc(obj)));
708
+ continue;
709
+ }
710
+
711
+ auto len = unsafe_yyjson_get_len(obj);
712
+ if (len > 1) {
713
+ set_error(i, "Found object containing more than one key, instead of union");
714
+ continue;
715
+ } else if (len == 0) {
716
+ set_error(i, "Found empty object, instead of union");
717
+ continue;
718
+ }
719
+
720
+ auto key = unsafe_yyjson_get_first(obj);
721
+ auto val = yyjson_obj_iter_get_val(key);
722
+
723
+ auto tag = std::find(names.begin(), names.end(), unsafe_yyjson_get_str(key));
724
+ if (tag == names.end()) {
725
+ set_error(i, StringUtil::Format("Found object containing unknown key, instead of union: %s",
726
+ unsafe_yyjson_get_str(key)));
727
+ continue;
728
+ }
729
+
730
+ idx_t actual_tag = tag - names.begin();
731
+
732
+ Vector single(UnionType::GetMemberType(type, actual_tag), 1);
733
+ if (!JSONTransform::Transform(&val, alc, single, 1, options)) {
734
+ success = false;
735
+ }
736
+
737
+ result.SetValue(i, Value::UNION(fields, actual_tag, single.GetValue(0)));
738
+ }
739
+
740
+ return success;
741
+ }
742
+
672
743
  bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
673
744
  JSONTransformOptions &options) {
674
745
  auto result_type = result.GetType();
@@ -747,8 +818,10 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
747
818
  return TransformArray(vals, alc, result, count, options);
748
819
  case LogicalTypeId::MAP:
749
820
  return TransformObjectToMap(vals, alc, result, count, options);
821
+ case LogicalTypeId::UNION:
822
+ return TransformValueIntoUnion(vals, alc, result, count, options);
750
823
  default:
751
- throw InternalException("Unexpected type at JSON Transform %s", result_type.ToString());
824
+ throw NotImplementedException("Cannot read a value of type %s from a json file", result_type.ToString());
752
825
  }
753
826
  }
754
827
 
@@ -152,10 +152,12 @@ BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
152
152
  for (auto &path : parquet_bind.files) {
153
153
  file_path.emplace_back(path);
154
154
  }
155
+ // LCOV_EXCL_START
155
156
  bind_info.InsertOption("file_path", Value::LIST(LogicalType::VARCHAR, file_path));
156
157
  bind_info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_bind.parquet_options.binary_as_string));
157
158
  bind_info.InsertOption("file_row_number", Value::BOOLEAN(parquet_bind.parquet_options.file_row_number));
158
159
  parquet_bind.parquet_options.file_options.AddBatchInfo(bind_info);
160
+ // LCOV_EXCL_STOP
159
161
  return bind_info;
160
162
  }
161
163
 
@@ -394,6 +394,76 @@ struct ArrowVarcharData {
394
394
  }
395
395
  };
396
396
 
397
+ //===--------------------------------------------------------------------===//
398
+ // Unions
399
+ //===--------------------------------------------------------------------===//
400
+ /**
401
+ * Based on https://arrow.apache.org/docs/format/Columnar.html#union-layout &
402
+ * https://arrow.apache.org/docs/format/CDataInterface.html
403
+ */
404
+ struct ArrowUnionData {
405
+ static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
406
+ result.main_buffer.reserve(capacity * sizeof(int8_t));
407
+
408
+ for (auto &child : UnionType::CopyMemberTypes(type)) {
409
+ auto child_buffer = InitializeArrowChild(child.second, capacity, result.options);
410
+ result.child_data.push_back(std::move(child_buffer));
411
+ }
412
+ }
413
+
414
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
415
+ UnifiedVectorFormat format;
416
+ input.ToUnifiedFormat(input_size, format);
417
+ idx_t size = to - from;
418
+
419
+ auto &types_buffer = append_data.main_buffer;
420
+
421
+ duckdb::vector<Vector> child_vectors;
422
+ for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
423
+ child_vectors.emplace_back(child.second);
424
+ }
425
+
426
+ for (idx_t input_idx = from; input_idx < to; input_idx++) {
427
+ const auto &val = input.GetValue(input_idx);
428
+
429
+ idx_t tag = 0;
430
+ Value resolved_value(nullptr);
431
+ if (!val.IsNull()) {
432
+ tag = UnionValue::GetTag(val);
433
+
434
+ resolved_value = UnionValue::GetValue(val);
435
+ }
436
+
437
+ for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
438
+ child_vectors[child_idx].SetValue(input_idx, child_idx == tag ? resolved_value : Value(nullptr));
439
+ }
440
+
441
+ types_buffer.data()[input_idx] = tag;
442
+ }
443
+
444
+ for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
445
+ auto &child_buffer = append_data.child_data[child_idx];
446
+ auto &child = child_vectors[child_idx];
447
+ child_buffer->append_vector(*child_buffer, child, from, to, size);
448
+ }
449
+ append_data.row_count += size;
450
+ }
451
+
452
+ static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
453
+ result->n_buffers = 2;
454
+ result->buffers[1] = append_data.main_buffer.data();
455
+
456
+ auto &child_types = UnionType::CopyMemberTypes(type);
457
+ append_data.child_pointers.resize(child_types.size());
458
+ result->children = append_data.child_pointers.data();
459
+ result->n_children = child_types.size();
460
+ for (idx_t i = 0; i < child_types.size(); i++) {
461
+ auto &child_type = child_types[i].second;
462
+ append_data.child_pointers[i] = FinalizeArrowChild(child_type, *append_data.child_data[i]);
463
+ }
464
+ }
465
+ };
466
+
397
467
  //===--------------------------------------------------------------------===//
398
468
  // Structs
399
469
  //===--------------------------------------------------------------------===//
@@ -716,6 +786,9 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
716
786
  case LogicalTypeId::INTERVAL:
717
787
  InitializeFunctionPointers<ArrowScalarData<ArrowInterval, interval_t, ArrowIntervalConverter>>(append_data);
718
788
  break;
789
+ case LogicalTypeId::UNION:
790
+ InitializeFunctionPointers<ArrowUnionData>(append_data);
791
+ break;
719
792
  case LogicalTypeId::STRUCT:
720
793
  InitializeFunctionPointers<ArrowStructData>(append_data);
721
794
  break;
@@ -726,7 +799,7 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
726
799
  InitializeFunctionPointers<ArrowMapData>(append_data);
727
800
  break;
728
801
  default:
729
- throw InternalException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
802
+ throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
730
803
  }
731
804
  }
732
805
 
@@ -76,6 +76,15 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
76
76
  SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
77
77
  }
78
78
 
79
+ unsafe_unique_array<char> AddName(const string &name) {
80
+ auto name_ptr = make_unsafe_uniq_array<char>(name.size() + 1);
81
+ for (size_t i = 0; i < name.size(); i++) {
82
+ name_ptr[i] = name[i];
83
+ }
84
+ name_ptr[name.size()] = '\0';
85
+ return name_ptr;
86
+ }
87
+
79
88
  void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
80
89
  const ArrowOptions &options) {
81
90
  switch (type.id()) {
@@ -135,12 +144,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
135
144
  break;
136
145
  case LogicalTypeId::TIMESTAMP_TZ: {
137
146
  string format = "tsu:" + options.time_zone;
138
- auto format_ptr = make_unsafe_uniq_array<char>(format.size() + 1);
139
- for (size_t i = 0; i < format.size(); i++) {
140
- format_ptr[i] = format[i];
141
- }
142
- format_ptr[format.size()] = '\0';
143
- root_holder.owned_type_names.push_back(std::move(format_ptr));
147
+ root_holder.owned_type_names.push_back(AddName(format));
144
148
  child.format = root_holder.owned_type_names.back().get();
145
149
  break;
146
150
  }
@@ -160,12 +164,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
160
164
  uint8_t width, scale;
161
165
  type.GetDecimalProperties(width, scale);
162
166
  string format = "d:" + to_string(width) + "," + to_string(scale);
163
- auto format_ptr = make_unsafe_uniq_array<char>(format.size() + 1);
164
- for (size_t i = 0; i < format.size(); i++) {
165
- format_ptr[i] = format[i];
166
- }
167
- format_ptr[format.size()] = '\0';
168
- root_holder.owned_type_names.push_back(std::move(format_ptr));
167
+ root_holder.owned_type_names.push_back(AddName(format));
169
168
  child.format = root_holder.owned_type_names.back().get();
170
169
  break;
171
170
  }
@@ -211,13 +210,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
211
210
 
212
211
  InitializeChild(*child.children[type_idx]);
213
212
 
214
- auto &struct_col_name = child_types[type_idx].first;
215
- auto name_ptr = make_unsafe_uniq_array<char>(struct_col_name.size() + 1);
216
- for (size_t i = 0; i < struct_col_name.size(); i++) {
217
- name_ptr[i] = struct_col_name[i];
218
- }
219
- name_ptr[struct_col_name.size()] = '\0';
220
- root_holder.owned_type_names.push_back(std::move(name_ptr));
213
+ root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
221
214
 
222
215
  child.children[type_idx]->name = root_holder.owned_type_names.back().get();
223
216
  SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options);
@@ -228,6 +221,38 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
228
221
  SetArrowMapFormat(root_holder, child, type, options);
229
222
  break;
230
223
  }
224
+ case LogicalTypeId::UNION: {
225
+ std::string format = "+us:";
226
+
227
+ auto &child_types = UnionType::CopyMemberTypes(type);
228
+ child.n_children = child_types.size();
229
+ root_holder.nested_children.emplace_back();
230
+ root_holder.nested_children.back().resize(child_types.size());
231
+ root_holder.nested_children_ptr.emplace_back();
232
+ root_holder.nested_children_ptr.back().resize(child_types.size());
233
+ for (idx_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
234
+ root_holder.nested_children_ptr.back()[type_idx] = &root_holder.nested_children.back()[type_idx];
235
+ }
236
+ child.children = &root_holder.nested_children_ptr.back()[0];
237
+ for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
238
+
239
+ InitializeChild(*child.children[type_idx]);
240
+
241
+ root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
242
+
243
+ child.children[type_idx]->name = root_holder.owned_type_names.back().get();
244
+ SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options);
245
+
246
+ format += to_string(type_idx) + ",";
247
+ }
248
+
249
+ format.pop_back();
250
+
251
+ root_holder.owned_type_names.push_back(AddName(format));
252
+ child.format = root_holder.owned_type_names.back().get();
253
+
254
+ break;
255
+ }
231
256
  case LogicalTypeId::ENUM: {
232
257
  // TODO what do we do with pointer enums here?
233
258
  switch (EnumType::GetPhysicalType(type)) {
@@ -678,7 +678,7 @@ Value Value::UNION(child_list_t<LogicalType> members, uint8_t tag, Value value)
678
678
  result.is_null = false;
679
679
  // add the tag to the front of the struct
680
680
  vector<Value> union_values;
681
- union_values.emplace_back(Value::TINYINT(tag));
681
+ union_values.emplace_back(Value::UTINYINT(tag));
682
682
  for (idx_t i = 0; i < members.size(); i++) {
683
683
  if (i != tag) {
684
684
  union_values.emplace_back(members[i].second);
@@ -1465,20 +1465,25 @@ const vector<Value> &ListValue::GetChildren(const Value &value) {
1465
1465
  }
1466
1466
 
1467
1467
  const Value &UnionValue::GetValue(const Value &value) {
1468
- D_ASSERT(value.type() == LogicalTypeId::UNION);
1468
+ D_ASSERT(value.type().id() == LogicalTypeId::UNION);
1469
1469
  auto &children = StructValue::GetChildren(value);
1470
- auto tag = children[0].GetValueUnsafe<uint8_t>();
1470
+ auto tag = children[0].GetValueUnsafe<union_tag_t>();
1471
1471
  D_ASSERT(tag < children.size() - 1);
1472
1472
  return children[tag + 1];
1473
1473
  }
1474
1474
 
1475
- uint8_t UnionValue::GetTag(const Value &value) {
1476
- D_ASSERT(value.type() == LogicalTypeId::UNION);
1475
+ union_tag_t UnionValue::GetTag(const Value &value) {
1476
+ D_ASSERT(value.type().id() == LogicalTypeId::UNION);
1477
1477
  auto children = StructValue::GetChildren(value);
1478
- auto tag = children[0].GetValueUnsafe<uint8_t>();
1478
+ auto tag = children[0].GetValueUnsafe<union_tag_t>();
1479
+ D_ASSERT(tag < children.size() - 1);
1479
1480
  return tag;
1480
1481
  }
1481
1482
 
1483
+ const LogicalType &UnionValue::GetType(const Value &value) {
1484
+ return UnionType::GetMemberType(value.type(), UnionValue::GetTag(value));
1485
+ }
1486
+
1482
1487
  hugeint_t IntegralValue::Get(const Value &value) {
1483
1488
  switch (value.type().InternalType()) {
1484
1489
  case PhysicalType::INT8:
@@ -948,7 +948,7 @@ LogicalType LogicalType::UNION(child_list_t<LogicalType> members) {
948
948
  D_ASSERT(!members.empty());
949
949
  D_ASSERT(members.size() <= UnionType::MAX_UNION_MEMBERS);
950
950
  // union types always have a hidden "tag" field in front
951
- members.insert(members.begin(), {"", LogicalType::TINYINT});
951
+ members.insert(members.begin(), {"", LogicalType::UTINYINT});
952
952
  auto info = make_shared<StructTypeInfo>(std::move(members));
953
953
  return LogicalType(LogicalTypeId::UNION, std::move(info));
954
954
  }
@@ -127,6 +127,25 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
127
127
  }
128
128
  return LogicalType::STRUCT(child_types);
129
129
 
130
+ } else if (format[0] == '+' && format[1] == 'u') {
131
+ if (format[2] != 's') {
132
+ throw NotImplementedException("Unsupported Internal Arrow Type: \"%c\" Union", format[2]);
133
+ }
134
+ D_ASSERT(format[3] == ':');
135
+
136
+ std::string prefix = "+us:";
137
+ // TODO: what are these type ids actually for?
138
+ auto type_ids = StringUtil::Split(format.substr(prefix.size()), ',');
139
+
140
+ child_list_t<LogicalType> members;
141
+ for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
142
+ auto type = schema.children[type_idx];
143
+
144
+ members.emplace_back(type->name, GetArrowLogicalType(*type, arrow_convert_data, col_idx));
145
+ }
146
+
147
+ return LogicalType::UNION(members);
148
+
130
149
  } else if (format == "+m") {
131
150
  convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
132
151
 
@@ -644,8 +644,42 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
644
644
  }
645
645
  break;
646
646
  }
647
+ case LogicalTypeId::UNION: {
648
+ auto type_ids = ArrowBufferData<int8_t>(array, array.n_buffers == 1 ? 0 : 1);
649
+ D_ASSERT(type_ids);
650
+ auto members = UnionType::CopyMemberTypes(vector.GetType());
651
+
652
+ auto &validity_mask = FlatVector::Validity(vector);
653
+
654
+ duckdb::vector<Vector> children;
655
+ for (idx_t type_idx = 0; type_idx < (::idx_t)array.n_children; type_idx++) {
656
+ Vector child(members[type_idx].second);
657
+ auto arrow_array = array.children[type_idx];
658
+
659
+ SetValidityMask(child, *arrow_array, scan_state, size, nested_offset);
660
+
661
+ ColumnArrowToDuckDB(child, *arrow_array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
662
+ nested_offset, &validity_mask);
663
+
664
+ children.push_back(std::move(child));
665
+ }
666
+
667
+ for (idx_t row_idx = 0; row_idx < size; row_idx++) {
668
+ auto tag = type_ids[row_idx];
669
+
670
+ auto out_of_range = tag < 0 || tag >= array.n_children;
671
+ if (out_of_range) {
672
+ throw InvalidInputException("Arrow union tag out of range: %d", tag);
673
+ }
674
+
675
+ const Value &value = children[tag].GetValue(row_idx);
676
+ vector.SetValue(row_idx, value.IsNull() ? Value() : Value::UNION(members, tag, value));
677
+ }
678
+
679
+ break;
680
+ }
647
681
  default:
648
- throw NotImplementedException("Unsupported type %s", vector.GetType().ToString());
682
+ throw NotImplementedException("Unsupported type for arrow conversion: %s", vector.GetType().ToString());
649
683
  }
650
684
  }
651
685
 
@@ -204,6 +204,13 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum) {
204
204
  auto max_map_value = Value::MAP(ListType::GetChildType(map_type), map_values);
205
205
  result.emplace_back(map_type, "map", std::move(min_map_value), std::move(max_map_value));
206
206
 
207
+ // union
208
+ child_list_t<LogicalType> members = {{"name", LogicalType::VARCHAR}, {"age", LogicalType::SMALLINT}};
209
+ auto union_type = LogicalType::UNION(members);
210
+ const Value &min = Value::UNION(members, 0, Value("Frank"));
211
+ const Value &max = Value::UNION(members, 1, Value::SMALLINT(5));
212
+ result.emplace_back(union_type, "union", min, max);
213
+
207
214
  return result;
208
215
  }
209
216
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev1573"
2
+ #define DUCKDB_VERSION "0.8.2-dev1724"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "c277db819b"
5
+ #define DUCKDB_SOURCE_ID "0e0fd210cd"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -99,11 +99,11 @@ public:
99
99
  bool IsPipe(const string &filename) override {
100
100
  return GetFileSystem().IsPipe(filename);
101
101
  }
102
- virtual void RemoveFile(const string &filename) override {
102
+ void RemoveFile(const string &filename) override {
103
103
  GetFileSystem().RemoveFile(filename);
104
104
  }
105
105
 
106
- virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr) override {
106
+ vector<string> Glob(const string &path, FileOpener *opener = nullptr) override {
107
107
  if (opener) {
108
108
  throw InternalException("OpenerFileSystem cannot take an opener - the opener is pushed automatically");
109
109
  }
@@ -394,6 +394,7 @@ struct ListValue {
394
394
  struct UnionValue {
395
395
  DUCKDB_API static const Value &GetValue(const Value &value);
396
396
  DUCKDB_API static uint8_t GetTag(const Value &value);
397
+ DUCKDB_API static const LogicalType &GetType(const Value &value);
397
398
  };
398
399
 
399
400
  //! Return the internal integral value for any type that is stored as an integral value internally
@@ -0,0 +1,22 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/main/extension/generated_extension_loader.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #if defined(GENERATED_EXTENSION_HEADERS) and !defined(DUCKDB_AMALGAMATION)
12
+ #include "generated_extension_headers.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ //! Looks through the CMake-generated list of extensions that are linked into DuckDB currently to try load <extension>
17
+ bool TryLoadLinkedExtension(DuckDB &db, const std::string &extension);
18
+ extern vector<string> linked_extensions;
19
+ extern vector<string> loaded_extension_test_paths;
20
+
21
+ } // namespace duckdb
22
+ #endif
@@ -58,6 +58,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {{"->>", "json"},
58
58
  {"json_deserialize_sql", "json"},
59
59
  {"json_serialize_sql", "json"},
60
60
  {"json_execute_serialized_sql", "json"},
61
+ {"load_aws_credentials", "aws"},
61
62
  {"make_timestamptz", "icu"},
62
63
  {"parquet_metadata", "parquet"},
63
64
  {"parquet_scan", "parquet"},
@@ -161,6 +162,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {{"->>", "json"},
161
162
  {"st_numpoints", "spatial"}};
162
163
 
163
164
  static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
165
+ {"azure_storage_connection_string", "azure"},
164
166
  {"binary_as_string", "parquet"},
165
167
  {"calendar", "icu"},
166
168
  {"http_retries", "httpfs"},
@@ -46,7 +46,7 @@
46
46
 
47
47
  // Load the generated header file containing our list of extension headers
48
48
  #if defined(GENERATED_EXTENSION_HEADERS) && GENERATED_EXTENSION_HEADERS && !defined(DUCKDB_AMALGAMATION)
49
- #include "generated_extension_loader.hpp"
49
+ #include "duckdb/main/extension/generated_extension_loader.hpp"
50
50
  #else
51
51
  // TODO: rewrite package_build.py to allow also loading out-of-tree extensions in non-cmake builds, after that
52
52
  // these can be removed
@@ -150,10 +150,8 @@ void ExtensionHelper::LoadAllExtensions(DuckDB &db) {
150
150
  }
151
151
 
152
152
  #if defined(GENERATED_EXTENSION_HEADERS) && GENERATED_EXTENSION_HEADERS
153
- for (auto &ext : LINKED_EXTENSIONS) {
154
- if (extensions.find(ext) != extensions.end()) {
155
- LoadExtensionInternal(db, ext, true);
156
- }
153
+ for (auto &ext : linked_extensions) {
154
+ LoadExtensionInternal(db, ext, true);
157
155
  }
158
156
  #endif
159
157
  }
@@ -12,7 +12,7 @@ describe('Column Types', function() {
12
12
 
13
13
  let cols = stmt.columns();
14
14
 
15
- assert.equal(cols.length, 41);
15
+ assert.equal(cols.length, 42);
16
16
 
17
17
  var expected = [
18
18
  { name: 'bool', type: { id: 'BOOLEAN', sql_type: 'BOOLEAN' } },
@@ -232,6 +232,29 @@ describe('Column Types', function() {
232
232
  sql_type: 'VARCHAR'
233
233
  }
234
234
  }
235
+ },
236
+ {
237
+ name: "union",
238
+ type: {
239
+ id: "UNION",
240
+ sql_type: "UNION(name VARCHAR, age SMALLINT)",
241
+ children: [
242
+ {
243
+ name: "name",
244
+ type: {
245
+ id: "VARCHAR",
246
+ sql_type: "VARCHAR"
247
+ }
248
+ },
249
+ {
250
+ name: "age",
251
+ type: {
252
+ id: "SMALLINT",
253
+ sql_type: "SMALLINT",
254
+ }
255
+ }
256
+ ],
257
+ }
235
258
  }
236
259
  ]
237
260
 
@@ -157,6 +157,7 @@ const correct_answer_map: Record<string, any[]> = {
157
157
  null,
158
158
  ],
159
159
  map: ["{}", "{key1=🦆🦆🦆🦆🦆🦆, key2=goose}", null],
160
+ union: ['Frank', '5', null],
160
161
 
161
162
  time_tz: ["00:00:00+00", "23:59:59.999999+00", null],
162
163
  interval: [