duckdb 0.8.2-dev1573.0 → 0.8.2-dev1724.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +74 -1
- package/src/duckdb/extension/parquet/parquet_extension.cpp +2 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +74 -1
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
- package/src/duckdb/src/common/types/value.cpp +11 -6
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/function/table/arrow.cpp +19 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
- package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -5
- package/test/columns.test.ts +24 -1
- package/test/test_all_types.test.ts +1 -0
package/package.json
CHANGED
@@ -333,8 +333,22 @@ static void CreateValuesMap(const StructNames &names, yyjson_mut_doc *doc, yyjso
|
|
333
333
|
static void CreateValuesUnion(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v,
|
334
334
|
idx_t count) {
|
335
335
|
// Structs become values, therefore we initialize vals to JSON values
|
336
|
-
|
337
|
-
|
336
|
+
UnifiedVectorFormat value_data;
|
337
|
+
value_v.ToUnifiedFormat(count, value_data);
|
338
|
+
if (value_data.validity.AllValid()) {
|
339
|
+
for (idx_t i = 0; i < count; i++) {
|
340
|
+
vals[i] = yyjson_mut_obj(doc);
|
341
|
+
}
|
342
|
+
} else {
|
343
|
+
for (idx_t i = 0; i < count; i++) {
|
344
|
+
auto index = value_data.sel->get_index(i);
|
345
|
+
if (!value_data.validity.RowIsValid(index)) {
|
346
|
+
// Make the entry NULL if the Union value is NULL
|
347
|
+
vals[i] = yyjson_mut_null(doc);
|
348
|
+
} else {
|
349
|
+
vals[i] = yyjson_mut_obj(doc);
|
350
|
+
}
|
351
|
+
}
|
338
352
|
}
|
339
353
|
|
340
354
|
// Initialize re-usable array for the nested values
|
@@ -361,6 +375,11 @@ static void CreateValuesUnion(const StructNames &names, yyjson_mut_doc *doc, yyj
|
|
361
375
|
auto keys = UnifiedVectorFormat::GetData<string_t>(key_data);
|
362
376
|
|
363
377
|
for (idx_t i = 0; i < count; i++) {
|
378
|
+
auto value_index = value_data.sel->get_index(i);
|
379
|
+
if (!value_data.validity.RowIsValid(value_index)) {
|
380
|
+
// This entry is just NULL in it's entirety
|
381
|
+
continue;
|
382
|
+
}
|
364
383
|
auto tag_idx = tag_data.sel->get_index(i);
|
365
384
|
if (!tag_data.validity.RowIsValid(tag_idx)) {
|
366
385
|
continue;
|
@@ -669,6 +669,77 @@ bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const
|
|
669
669
|
return true;
|
670
670
|
}
|
671
671
|
|
672
|
+
bool TransformValueIntoUnion(yyjson_val **vals, yyjson_alc *alc, Vector &result, const idx_t count,
|
673
|
+
JSONTransformOptions &options) {
|
674
|
+
auto type = result.GetType();
|
675
|
+
|
676
|
+
auto fields = UnionType::CopyMemberTypes(type);
|
677
|
+
vector<string> names;
|
678
|
+
for (const auto &field : fields) {
|
679
|
+
names.push_back(field.first);
|
680
|
+
}
|
681
|
+
|
682
|
+
bool success = true;
|
683
|
+
|
684
|
+
auto &validity = FlatVector::Validity(result);
|
685
|
+
|
686
|
+
auto set_error = [&](idx_t i, const string &message) {
|
687
|
+
validity.SetInvalid(i);
|
688
|
+
result.SetValue(i, Value(nullptr));
|
689
|
+
if (success && options.strict_cast) {
|
690
|
+
options.error_message = message;
|
691
|
+
options.object_index = i;
|
692
|
+
success = false;
|
693
|
+
}
|
694
|
+
};
|
695
|
+
|
696
|
+
for (idx_t i = 0; i < count; i++) {
|
697
|
+
const auto &obj = vals[i];
|
698
|
+
|
699
|
+
if (!obj || unsafe_yyjson_is_null(vals[i])) {
|
700
|
+
validity.SetInvalid(i);
|
701
|
+
result.SetValue(i, Value(nullptr));
|
702
|
+
continue;
|
703
|
+
}
|
704
|
+
|
705
|
+
if (!unsafe_yyjson_is_obj(obj)) {
|
706
|
+
set_error(i,
|
707
|
+
StringUtil::Format("Expected an object representing a union, got %s", yyjson_get_type_desc(obj)));
|
708
|
+
continue;
|
709
|
+
}
|
710
|
+
|
711
|
+
auto len = unsafe_yyjson_get_len(obj);
|
712
|
+
if (len > 1) {
|
713
|
+
set_error(i, "Found object containing more than one key, instead of union");
|
714
|
+
continue;
|
715
|
+
} else if (len == 0) {
|
716
|
+
set_error(i, "Found empty object, instead of union");
|
717
|
+
continue;
|
718
|
+
}
|
719
|
+
|
720
|
+
auto key = unsafe_yyjson_get_first(obj);
|
721
|
+
auto val = yyjson_obj_iter_get_val(key);
|
722
|
+
|
723
|
+
auto tag = std::find(names.begin(), names.end(), unsafe_yyjson_get_str(key));
|
724
|
+
if (tag == names.end()) {
|
725
|
+
set_error(i, StringUtil::Format("Found object containing unknown key, instead of union: %s",
|
726
|
+
unsafe_yyjson_get_str(key)));
|
727
|
+
continue;
|
728
|
+
}
|
729
|
+
|
730
|
+
idx_t actual_tag = tag - names.begin();
|
731
|
+
|
732
|
+
Vector single(UnionType::GetMemberType(type, actual_tag), 1);
|
733
|
+
if (!JSONTransform::Transform(&val, alc, single, 1, options)) {
|
734
|
+
success = false;
|
735
|
+
}
|
736
|
+
|
737
|
+
result.SetValue(i, Value::UNION(fields, actual_tag, single.GetValue(0)));
|
738
|
+
}
|
739
|
+
|
740
|
+
return success;
|
741
|
+
}
|
742
|
+
|
672
743
|
bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
|
673
744
|
JSONTransformOptions &options) {
|
674
745
|
auto result_type = result.GetType();
|
@@ -747,8 +818,10 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
|
|
747
818
|
return TransformArray(vals, alc, result, count, options);
|
748
819
|
case LogicalTypeId::MAP:
|
749
820
|
return TransformObjectToMap(vals, alc, result, count, options);
|
821
|
+
case LogicalTypeId::UNION:
|
822
|
+
return TransformValueIntoUnion(vals, alc, result, count, options);
|
750
823
|
default:
|
751
|
-
throw
|
824
|
+
throw NotImplementedException("Cannot read a value of type %s from a json file", result_type.ToString());
|
752
825
|
}
|
753
826
|
}
|
754
827
|
|
@@ -152,10 +152,12 @@ BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
|
|
152
152
|
for (auto &path : parquet_bind.files) {
|
153
153
|
file_path.emplace_back(path);
|
154
154
|
}
|
155
|
+
// LCOV_EXCL_START
|
155
156
|
bind_info.InsertOption("file_path", Value::LIST(LogicalType::VARCHAR, file_path));
|
156
157
|
bind_info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_bind.parquet_options.binary_as_string));
|
157
158
|
bind_info.InsertOption("file_row_number", Value::BOOLEAN(parquet_bind.parquet_options.file_row_number));
|
158
159
|
parquet_bind.parquet_options.file_options.AddBatchInfo(bind_info);
|
160
|
+
// LCOV_EXCL_STOP
|
159
161
|
return bind_info;
|
160
162
|
}
|
161
163
|
|
@@ -394,6 +394,76 @@ struct ArrowVarcharData {
|
|
394
394
|
}
|
395
395
|
};
|
396
396
|
|
397
|
+
//===--------------------------------------------------------------------===//
|
398
|
+
// Unions
|
399
|
+
//===--------------------------------------------------------------------===//
|
400
|
+
/**
|
401
|
+
* Based on https://arrow.apache.org/docs/format/Columnar.html#union-layout &
|
402
|
+
* https://arrow.apache.org/docs/format/CDataInterface.html
|
403
|
+
*/
|
404
|
+
struct ArrowUnionData {
|
405
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
406
|
+
result.main_buffer.reserve(capacity * sizeof(int8_t));
|
407
|
+
|
408
|
+
for (auto &child : UnionType::CopyMemberTypes(type)) {
|
409
|
+
auto child_buffer = InitializeArrowChild(child.second, capacity, result.options);
|
410
|
+
result.child_data.push_back(std::move(child_buffer));
|
411
|
+
}
|
412
|
+
}
|
413
|
+
|
414
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
415
|
+
UnifiedVectorFormat format;
|
416
|
+
input.ToUnifiedFormat(input_size, format);
|
417
|
+
idx_t size = to - from;
|
418
|
+
|
419
|
+
auto &types_buffer = append_data.main_buffer;
|
420
|
+
|
421
|
+
duckdb::vector<Vector> child_vectors;
|
422
|
+
for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
|
423
|
+
child_vectors.emplace_back(child.second);
|
424
|
+
}
|
425
|
+
|
426
|
+
for (idx_t input_idx = from; input_idx < to; input_idx++) {
|
427
|
+
const auto &val = input.GetValue(input_idx);
|
428
|
+
|
429
|
+
idx_t tag = 0;
|
430
|
+
Value resolved_value(nullptr);
|
431
|
+
if (!val.IsNull()) {
|
432
|
+
tag = UnionValue::GetTag(val);
|
433
|
+
|
434
|
+
resolved_value = UnionValue::GetValue(val);
|
435
|
+
}
|
436
|
+
|
437
|
+
for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
|
438
|
+
child_vectors[child_idx].SetValue(input_idx, child_idx == tag ? resolved_value : Value(nullptr));
|
439
|
+
}
|
440
|
+
|
441
|
+
types_buffer.data()[input_idx] = tag;
|
442
|
+
}
|
443
|
+
|
444
|
+
for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) {
|
445
|
+
auto &child_buffer = append_data.child_data[child_idx];
|
446
|
+
auto &child = child_vectors[child_idx];
|
447
|
+
child_buffer->append_vector(*child_buffer, child, from, to, size);
|
448
|
+
}
|
449
|
+
append_data.row_count += size;
|
450
|
+
}
|
451
|
+
|
452
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
453
|
+
result->n_buffers = 2;
|
454
|
+
result->buffers[1] = append_data.main_buffer.data();
|
455
|
+
|
456
|
+
auto &child_types = UnionType::CopyMemberTypes(type);
|
457
|
+
append_data.child_pointers.resize(child_types.size());
|
458
|
+
result->children = append_data.child_pointers.data();
|
459
|
+
result->n_children = child_types.size();
|
460
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
461
|
+
auto &child_type = child_types[i].second;
|
462
|
+
append_data.child_pointers[i] = FinalizeArrowChild(child_type, *append_data.child_data[i]);
|
463
|
+
}
|
464
|
+
}
|
465
|
+
};
|
466
|
+
|
397
467
|
//===--------------------------------------------------------------------===//
|
398
468
|
// Structs
|
399
469
|
//===--------------------------------------------------------------------===//
|
@@ -716,6 +786,9 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
716
786
|
case LogicalTypeId::INTERVAL:
|
717
787
|
InitializeFunctionPointers<ArrowScalarData<ArrowInterval, interval_t, ArrowIntervalConverter>>(append_data);
|
718
788
|
break;
|
789
|
+
case LogicalTypeId::UNION:
|
790
|
+
InitializeFunctionPointers<ArrowUnionData>(append_data);
|
791
|
+
break;
|
719
792
|
case LogicalTypeId::STRUCT:
|
720
793
|
InitializeFunctionPointers<ArrowStructData>(append_data);
|
721
794
|
break;
|
@@ -726,7 +799,7 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
726
799
|
InitializeFunctionPointers<ArrowMapData>(append_data);
|
727
800
|
break;
|
728
801
|
default:
|
729
|
-
throw
|
802
|
+
throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
|
730
803
|
}
|
731
804
|
}
|
732
805
|
|
@@ -76,6 +76,15 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
|
|
76
76
|
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
|
77
77
|
}
|
78
78
|
|
79
|
+
unsafe_unique_array<char> AddName(const string &name) {
|
80
|
+
auto name_ptr = make_unsafe_uniq_array<char>(name.size() + 1);
|
81
|
+
for (size_t i = 0; i < name.size(); i++) {
|
82
|
+
name_ptr[i] = name[i];
|
83
|
+
}
|
84
|
+
name_ptr[name.size()] = '\0';
|
85
|
+
return name_ptr;
|
86
|
+
}
|
87
|
+
|
79
88
|
void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
80
89
|
const ArrowOptions &options) {
|
81
90
|
switch (type.id()) {
|
@@ -135,12 +144,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
135
144
|
break;
|
136
145
|
case LogicalTypeId::TIMESTAMP_TZ: {
|
137
146
|
string format = "tsu:" + options.time_zone;
|
138
|
-
|
139
|
-
for (size_t i = 0; i < format.size(); i++) {
|
140
|
-
format_ptr[i] = format[i];
|
141
|
-
}
|
142
|
-
format_ptr[format.size()] = '\0';
|
143
|
-
root_holder.owned_type_names.push_back(std::move(format_ptr));
|
147
|
+
root_holder.owned_type_names.push_back(AddName(format));
|
144
148
|
child.format = root_holder.owned_type_names.back().get();
|
145
149
|
break;
|
146
150
|
}
|
@@ -160,12 +164,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
160
164
|
uint8_t width, scale;
|
161
165
|
type.GetDecimalProperties(width, scale);
|
162
166
|
string format = "d:" + to_string(width) + "," + to_string(scale);
|
163
|
-
|
164
|
-
for (size_t i = 0; i < format.size(); i++) {
|
165
|
-
format_ptr[i] = format[i];
|
166
|
-
}
|
167
|
-
format_ptr[format.size()] = '\0';
|
168
|
-
root_holder.owned_type_names.push_back(std::move(format_ptr));
|
167
|
+
root_holder.owned_type_names.push_back(AddName(format));
|
169
168
|
child.format = root_holder.owned_type_names.back().get();
|
170
169
|
break;
|
171
170
|
}
|
@@ -211,13 +210,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
211
210
|
|
212
211
|
InitializeChild(*child.children[type_idx]);
|
213
212
|
|
214
|
-
|
215
|
-
auto name_ptr = make_unsafe_uniq_array<char>(struct_col_name.size() + 1);
|
216
|
-
for (size_t i = 0; i < struct_col_name.size(); i++) {
|
217
|
-
name_ptr[i] = struct_col_name[i];
|
218
|
-
}
|
219
|
-
name_ptr[struct_col_name.size()] = '\0';
|
220
|
-
root_holder.owned_type_names.push_back(std::move(name_ptr));
|
213
|
+
root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
|
221
214
|
|
222
215
|
child.children[type_idx]->name = root_holder.owned_type_names.back().get();
|
223
216
|
SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options);
|
@@ -228,6 +221,38 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
228
221
|
SetArrowMapFormat(root_holder, child, type, options);
|
229
222
|
break;
|
230
223
|
}
|
224
|
+
case LogicalTypeId::UNION: {
|
225
|
+
std::string format = "+us:";
|
226
|
+
|
227
|
+
auto &child_types = UnionType::CopyMemberTypes(type);
|
228
|
+
child.n_children = child_types.size();
|
229
|
+
root_holder.nested_children.emplace_back();
|
230
|
+
root_holder.nested_children.back().resize(child_types.size());
|
231
|
+
root_holder.nested_children_ptr.emplace_back();
|
232
|
+
root_holder.nested_children_ptr.back().resize(child_types.size());
|
233
|
+
for (idx_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
|
234
|
+
root_holder.nested_children_ptr.back()[type_idx] = &root_holder.nested_children.back()[type_idx];
|
235
|
+
}
|
236
|
+
child.children = &root_holder.nested_children_ptr.back()[0];
|
237
|
+
for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
|
238
|
+
|
239
|
+
InitializeChild(*child.children[type_idx]);
|
240
|
+
|
241
|
+
root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
|
242
|
+
|
243
|
+
child.children[type_idx]->name = root_holder.owned_type_names.back().get();
|
244
|
+
SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options);
|
245
|
+
|
246
|
+
format += to_string(type_idx) + ",";
|
247
|
+
}
|
248
|
+
|
249
|
+
format.pop_back();
|
250
|
+
|
251
|
+
root_holder.owned_type_names.push_back(AddName(format));
|
252
|
+
child.format = root_holder.owned_type_names.back().get();
|
253
|
+
|
254
|
+
break;
|
255
|
+
}
|
231
256
|
case LogicalTypeId::ENUM: {
|
232
257
|
// TODO what do we do with pointer enums here?
|
233
258
|
switch (EnumType::GetPhysicalType(type)) {
|
@@ -678,7 +678,7 @@ Value Value::UNION(child_list_t<LogicalType> members, uint8_t tag, Value value)
|
|
678
678
|
result.is_null = false;
|
679
679
|
// add the tag to the front of the struct
|
680
680
|
vector<Value> union_values;
|
681
|
-
union_values.emplace_back(Value::
|
681
|
+
union_values.emplace_back(Value::UTINYINT(tag));
|
682
682
|
for (idx_t i = 0; i < members.size(); i++) {
|
683
683
|
if (i != tag) {
|
684
684
|
union_values.emplace_back(members[i].second);
|
@@ -1465,20 +1465,25 @@ const vector<Value> &ListValue::GetChildren(const Value &value) {
|
|
1465
1465
|
}
|
1466
1466
|
|
1467
1467
|
const Value &UnionValue::GetValue(const Value &value) {
|
1468
|
-
D_ASSERT(value.type() == LogicalTypeId::UNION);
|
1468
|
+
D_ASSERT(value.type().id() == LogicalTypeId::UNION);
|
1469
1469
|
auto &children = StructValue::GetChildren(value);
|
1470
|
-
auto tag = children[0].GetValueUnsafe<
|
1470
|
+
auto tag = children[0].GetValueUnsafe<union_tag_t>();
|
1471
1471
|
D_ASSERT(tag < children.size() - 1);
|
1472
1472
|
return children[tag + 1];
|
1473
1473
|
}
|
1474
1474
|
|
1475
|
-
|
1476
|
-
D_ASSERT(value.type() == LogicalTypeId::UNION);
|
1475
|
+
union_tag_t UnionValue::GetTag(const Value &value) {
|
1476
|
+
D_ASSERT(value.type().id() == LogicalTypeId::UNION);
|
1477
1477
|
auto children = StructValue::GetChildren(value);
|
1478
|
-
auto tag = children[0].GetValueUnsafe<
|
1478
|
+
auto tag = children[0].GetValueUnsafe<union_tag_t>();
|
1479
|
+
D_ASSERT(tag < children.size() - 1);
|
1479
1480
|
return tag;
|
1480
1481
|
}
|
1481
1482
|
|
1483
|
+
const LogicalType &UnionValue::GetType(const Value &value) {
|
1484
|
+
return UnionType::GetMemberType(value.type(), UnionValue::GetTag(value));
|
1485
|
+
}
|
1486
|
+
|
1482
1487
|
hugeint_t IntegralValue::Get(const Value &value) {
|
1483
1488
|
switch (value.type().InternalType()) {
|
1484
1489
|
case PhysicalType::INT8:
|
@@ -948,7 +948,7 @@ LogicalType LogicalType::UNION(child_list_t<LogicalType> members) {
|
|
948
948
|
D_ASSERT(!members.empty());
|
949
949
|
D_ASSERT(members.size() <= UnionType::MAX_UNION_MEMBERS);
|
950
950
|
// union types always have a hidden "tag" field in front
|
951
|
-
members.insert(members.begin(), {"", LogicalType::
|
951
|
+
members.insert(members.begin(), {"", LogicalType::UTINYINT});
|
952
952
|
auto info = make_shared<StructTypeInfo>(std::move(members));
|
953
953
|
return LogicalType(LogicalTypeId::UNION, std::move(info));
|
954
954
|
}
|
@@ -127,6 +127,25 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
127
127
|
}
|
128
128
|
return LogicalType::STRUCT(child_types);
|
129
129
|
|
130
|
+
} else if (format[0] == '+' && format[1] == 'u') {
|
131
|
+
if (format[2] != 's') {
|
132
|
+
throw NotImplementedException("Unsupported Internal Arrow Type: \"%c\" Union", format[2]);
|
133
|
+
}
|
134
|
+
D_ASSERT(format[3] == ':');
|
135
|
+
|
136
|
+
std::string prefix = "+us:";
|
137
|
+
// TODO: what are these type ids actually for?
|
138
|
+
auto type_ids = StringUtil::Split(format.substr(prefix.size()), ',');
|
139
|
+
|
140
|
+
child_list_t<LogicalType> members;
|
141
|
+
for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
|
142
|
+
auto type = schema.children[type_idx];
|
143
|
+
|
144
|
+
members.emplace_back(type->name, GetArrowLogicalType(*type, arrow_convert_data, col_idx));
|
145
|
+
}
|
146
|
+
|
147
|
+
return LogicalType::UNION(members);
|
148
|
+
|
130
149
|
} else if (format == "+m") {
|
131
150
|
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
132
151
|
|
@@ -644,8 +644,42 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
644
644
|
}
|
645
645
|
break;
|
646
646
|
}
|
647
|
+
case LogicalTypeId::UNION: {
|
648
|
+
auto type_ids = ArrowBufferData<int8_t>(array, array.n_buffers == 1 ? 0 : 1);
|
649
|
+
D_ASSERT(type_ids);
|
650
|
+
auto members = UnionType::CopyMemberTypes(vector.GetType());
|
651
|
+
|
652
|
+
auto &validity_mask = FlatVector::Validity(vector);
|
653
|
+
|
654
|
+
duckdb::vector<Vector> children;
|
655
|
+
for (idx_t type_idx = 0; type_idx < (::idx_t)array.n_children; type_idx++) {
|
656
|
+
Vector child(members[type_idx].second);
|
657
|
+
auto arrow_array = array.children[type_idx];
|
658
|
+
|
659
|
+
SetValidityMask(child, *arrow_array, scan_state, size, nested_offset);
|
660
|
+
|
661
|
+
ColumnArrowToDuckDB(child, *arrow_array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
|
662
|
+
nested_offset, &validity_mask);
|
663
|
+
|
664
|
+
children.push_back(std::move(child));
|
665
|
+
}
|
666
|
+
|
667
|
+
for (idx_t row_idx = 0; row_idx < size; row_idx++) {
|
668
|
+
auto tag = type_ids[row_idx];
|
669
|
+
|
670
|
+
auto out_of_range = tag < 0 || tag >= array.n_children;
|
671
|
+
if (out_of_range) {
|
672
|
+
throw InvalidInputException("Arrow union tag out of range: %d", tag);
|
673
|
+
}
|
674
|
+
|
675
|
+
const Value &value = children[tag].GetValue(row_idx);
|
676
|
+
vector.SetValue(row_idx, value.IsNull() ? Value() : Value::UNION(members, tag, value));
|
677
|
+
}
|
678
|
+
|
679
|
+
break;
|
680
|
+
}
|
647
681
|
default:
|
648
|
-
throw NotImplementedException("Unsupported type %s", vector.GetType().ToString());
|
682
|
+
throw NotImplementedException("Unsupported type for arrow conversion: %s", vector.GetType().ToString());
|
649
683
|
}
|
650
684
|
}
|
651
685
|
|
@@ -204,6 +204,13 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum) {
|
|
204
204
|
auto max_map_value = Value::MAP(ListType::GetChildType(map_type), map_values);
|
205
205
|
result.emplace_back(map_type, "map", std::move(min_map_value), std::move(max_map_value));
|
206
206
|
|
207
|
+
// union
|
208
|
+
child_list_t<LogicalType> members = {{"name", LogicalType::VARCHAR}, {"age", LogicalType::SMALLINT}};
|
209
|
+
auto union_type = LogicalType::UNION(members);
|
210
|
+
const Value &min = Value::UNION(members, 0, Value("Frank"));
|
211
|
+
const Value &max = Value::UNION(members, 1, Value::SMALLINT(5));
|
212
|
+
result.emplace_back(union_type, "union", min, max);
|
213
|
+
|
207
214
|
return result;
|
208
215
|
}
|
209
216
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev1724"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "0e0fd210cd"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -99,11 +99,11 @@ public:
|
|
99
99
|
bool IsPipe(const string &filename) override {
|
100
100
|
return GetFileSystem().IsPipe(filename);
|
101
101
|
}
|
102
|
-
|
102
|
+
void RemoveFile(const string &filename) override {
|
103
103
|
GetFileSystem().RemoveFile(filename);
|
104
104
|
}
|
105
105
|
|
106
|
-
|
106
|
+
vector<string> Glob(const string &path, FileOpener *opener = nullptr) override {
|
107
107
|
if (opener) {
|
108
108
|
throw InternalException("OpenerFileSystem cannot take an opener - the opener is pushed automatically");
|
109
109
|
}
|
@@ -394,6 +394,7 @@ struct ListValue {
|
|
394
394
|
struct UnionValue {
|
395
395
|
DUCKDB_API static const Value &GetValue(const Value &value);
|
396
396
|
DUCKDB_API static uint8_t GetTag(const Value &value);
|
397
|
+
DUCKDB_API static const LogicalType &GetType(const Value &value);
|
397
398
|
};
|
398
399
|
|
399
400
|
//! Return the internal integral value for any type that is stored as an integral value internally
|
@@ -0,0 +1,22 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/main/extension/generated_extension_loader.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#if defined(GENERATED_EXTENSION_HEADERS) and !defined(DUCKDB_AMALGAMATION)
|
12
|
+
#include "generated_extension_headers.hpp"
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
|
16
|
+
//! Looks through the CMake-generated list of extensions that are linked into DuckDB currently to try load <extension>
|
17
|
+
bool TryLoadLinkedExtension(DuckDB &db, const std::string &extension);
|
18
|
+
extern vector<string> linked_extensions;
|
19
|
+
extern vector<string> loaded_extension_test_paths;
|
20
|
+
|
21
|
+
} // namespace duckdb
|
22
|
+
#endif
|
@@ -58,6 +58,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {{"->>", "json"},
|
|
58
58
|
{"json_deserialize_sql", "json"},
|
59
59
|
{"json_serialize_sql", "json"},
|
60
60
|
{"json_execute_serialized_sql", "json"},
|
61
|
+
{"load_aws_credentials", "aws"},
|
61
62
|
{"make_timestamptz", "icu"},
|
62
63
|
{"parquet_metadata", "parquet"},
|
63
64
|
{"parquet_scan", "parquet"},
|
@@ -161,6 +162,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {{"->>", "json"},
|
|
161
162
|
{"st_numpoints", "spatial"}};
|
162
163
|
|
163
164
|
static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
|
165
|
+
{"azure_storage_connection_string", "azure"},
|
164
166
|
{"binary_as_string", "parquet"},
|
165
167
|
{"calendar", "icu"},
|
166
168
|
{"http_retries", "httpfs"},
|
@@ -46,7 +46,7 @@
|
|
46
46
|
|
47
47
|
// Load the generated header file containing our list of extension headers
|
48
48
|
#if defined(GENERATED_EXTENSION_HEADERS) && GENERATED_EXTENSION_HEADERS && !defined(DUCKDB_AMALGAMATION)
|
49
|
-
#include "generated_extension_loader.hpp"
|
49
|
+
#include "duckdb/main/extension/generated_extension_loader.hpp"
|
50
50
|
#else
|
51
51
|
// TODO: rewrite package_build.py to allow also loading out-of-tree extensions in non-cmake builds, after that
|
52
52
|
// these can be removed
|
@@ -150,10 +150,8 @@ void ExtensionHelper::LoadAllExtensions(DuckDB &db) {
|
|
150
150
|
}
|
151
151
|
|
152
152
|
#if defined(GENERATED_EXTENSION_HEADERS) && GENERATED_EXTENSION_HEADERS
|
153
|
-
for (auto &ext :
|
154
|
-
|
155
|
-
LoadExtensionInternal(db, ext, true);
|
156
|
-
}
|
153
|
+
for (auto &ext : linked_extensions) {
|
154
|
+
LoadExtensionInternal(db, ext, true);
|
157
155
|
}
|
158
156
|
#endif
|
159
157
|
}
|
package/test/columns.test.ts
CHANGED
@@ -12,7 +12,7 @@ describe('Column Types', function() {
|
|
12
12
|
|
13
13
|
let cols = stmt.columns();
|
14
14
|
|
15
|
-
assert.equal(cols.length,
|
15
|
+
assert.equal(cols.length, 42);
|
16
16
|
|
17
17
|
var expected = [
|
18
18
|
{ name: 'bool', type: { id: 'BOOLEAN', sql_type: 'BOOLEAN' } },
|
@@ -232,6 +232,29 @@ describe('Column Types', function() {
|
|
232
232
|
sql_type: 'VARCHAR'
|
233
233
|
}
|
234
234
|
}
|
235
|
+
},
|
236
|
+
{
|
237
|
+
name: "union",
|
238
|
+
type: {
|
239
|
+
id: "UNION",
|
240
|
+
sql_type: "UNION(name VARCHAR, age SMALLINT)",
|
241
|
+
children: [
|
242
|
+
{
|
243
|
+
name: "name",
|
244
|
+
type: {
|
245
|
+
id: "VARCHAR",
|
246
|
+
sql_type: "VARCHAR"
|
247
|
+
}
|
248
|
+
},
|
249
|
+
{
|
250
|
+
name: "age",
|
251
|
+
type: {
|
252
|
+
id: "SMALLINT",
|
253
|
+
sql_type: "SMALLINT",
|
254
|
+
}
|
255
|
+
}
|
256
|
+
],
|
257
|
+
}
|
235
258
|
}
|
236
259
|
]
|
237
260
|
|