duckdb 0.5.2-dev815.0 → 0.5.2-dev833.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +252 -21
- package/src/duckdb.hpp +751 -737
- package/src/parquet-amalgamation.cpp +31851 -31851
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -29106,7 +29106,7 @@ struct RowOperations {
|
|
|
29106
29106
|
namespace duckdb {
|
|
29107
29107
|
|
|
29108
29108
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
|
29109
|
-
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...
|
|
29109
|
+
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
|
|
29110
29110
|
D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
|
|
29111
29111
|
switch (radix_bits) {
|
|
29112
29112
|
case 1:
|
|
@@ -29135,7 +29135,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
|
|
|
29135
29135
|
}
|
|
29136
29136
|
|
|
29137
29137
|
template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
|
|
29138
|
-
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...
|
|
29138
|
+
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...args) {
|
|
29139
29139
|
D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
|
|
29140
29140
|
switch (radix_bits_2) {
|
|
29141
29141
|
case 1:
|
|
@@ -29164,7 +29164,7 @@ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
|
|
|
29164
29164
|
}
|
|
29165
29165
|
|
|
29166
29166
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
|
29167
|
-
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...
|
|
29167
|
+
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...args) {
|
|
29168
29168
|
D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
|
|
29169
29169
|
switch (radix_bits_1) {
|
|
29170
29170
|
case 1:
|
|
@@ -49455,6 +49455,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
|
|
|
49455
49455
|
memset(data, 0, capacity * type_size);
|
|
49456
49456
|
}
|
|
49457
49457
|
}
|
|
49458
|
+
if (capacity > STANDARD_VECTOR_SIZE) {
|
|
49459
|
+
validity.Resize(STANDARD_VECTOR_SIZE, capacity);
|
|
49460
|
+
}
|
|
49458
49461
|
}
|
|
49459
49462
|
|
|
49460
49463
|
struct DataArrays {
|
|
@@ -93661,6 +93664,13 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
|
|
|
93661
93664
|
vector<unique_ptr<Expression>> &arguments) {
|
|
93662
93665
|
|
|
93663
93666
|
D_ASSERT(arguments.size() == 1);
|
|
93667
|
+
|
|
93668
|
+
if (arguments[0]->return_type.id() == LogicalTypeId::LIST ||
|
|
93669
|
+
arguments[0]->return_type.id() == LogicalTypeId::STRUCT ||
|
|
93670
|
+
arguments[0]->return_type.id() == LogicalTypeId::MAP) {
|
|
93671
|
+
throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
|
|
93672
|
+
}
|
|
93673
|
+
|
|
93664
93674
|
child_list_t<LogicalType> struct_children;
|
|
93665
93675
|
struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
|
|
93666
93676
|
struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
|
|
@@ -95714,6 +95724,11 @@ struct VectorCastHelpers {
|
|
|
95714
95724
|
}
|
|
95715
95725
|
};
|
|
95716
95726
|
|
|
95727
|
+
struct VectorStringifiedListParser {
|
|
95728
|
+
static idx_t CountParts(const string_t &input);
|
|
95729
|
+
static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
|
|
95730
|
+
};
|
|
95731
|
+
|
|
95717
95732
|
} // namespace duckdb
|
|
95718
95733
|
|
|
95719
95734
|
|
|
@@ -96484,20 +96499,8 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
|
|
|
96484
96499
|
|
|
96485
96500
|
namespace duckdb {
|
|
96486
96501
|
|
|
96487
|
-
|
|
96488
|
-
|
|
96489
|
-
}
|
|
96490
|
-
|
|
96491
|
-
BoundCastInfo child_cast_info;
|
|
96492
|
-
|
|
96493
|
-
public:
|
|
96494
|
-
unique_ptr<BoundCastData> Copy() const override {
|
|
96495
|
-
return make_unique<ListBoundCastData>(child_cast_info.Copy());
|
|
96496
|
-
}
|
|
96497
|
-
};
|
|
96498
|
-
|
|
96499
|
-
unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
|
|
96500
|
-
const LogicalType &target) {
|
|
96502
|
+
unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &input, const LogicalType &source,
|
|
96503
|
+
const LogicalType &target) {
|
|
96501
96504
|
vector<BoundCastInfo> child_cast_info;
|
|
96502
96505
|
auto &source_child_type = ListType::GetChildType(source);
|
|
96503
96506
|
auto &result_child_type = ListType::GetChildType(target);
|
|
@@ -96608,11 +96611,11 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
96608
96611
|
BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
96609
96612
|
switch (target.id()) {
|
|
96610
96613
|
case LogicalTypeId::LIST:
|
|
96611
|
-
return BoundCastInfo(ListToListCast, BindListToListCast(input, source, target));
|
|
96614
|
+
return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
96612
96615
|
case LogicalTypeId::VARCHAR:
|
|
96613
96616
|
case LogicalTypeId::JSON:
|
|
96614
|
-
return BoundCastInfo(ListToVarcharCast,
|
|
96615
|
-
|
|
96617
|
+
return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
|
|
96618
|
+
input, source, LogicalType::LIST(LogicalType::VARCHAR)));
|
|
96616
96619
|
default:
|
|
96617
96620
|
return DefaultCasts::TryVectorNullCast;
|
|
96618
96621
|
}
|
|
@@ -96950,9 +96953,97 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
|
|
|
96950
96953
|
}
|
|
96951
96954
|
}
|
|
96952
96955
|
|
|
96956
|
+
bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
|
|
96957
|
+
idx_t count, CastParameters ¶meters, const SelectionVector *sel) {
|
|
96958
|
+
|
|
96959
|
+
idx_t total_list_size = 0;
|
|
96960
|
+
for (idx_t i = 0; i < count; i++) {
|
|
96961
|
+
idx_t idx = i;
|
|
96962
|
+
if (sel) {
|
|
96963
|
+
idx = sel->get_index(i);
|
|
96964
|
+
}
|
|
96965
|
+
if (!source_mask.RowIsValid(idx)) {
|
|
96966
|
+
continue;
|
|
96967
|
+
}
|
|
96968
|
+
total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
|
|
96969
|
+
}
|
|
96970
|
+
|
|
96971
|
+
Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
|
|
96972
|
+
|
|
96973
|
+
ListVector::Reserve(result, total_list_size);
|
|
96974
|
+
ListVector::SetListSize(result, total_list_size);
|
|
96975
|
+
|
|
96976
|
+
auto list_data = ListVector::GetData(result);
|
|
96977
|
+
auto child_data = FlatVector::GetData<string_t>(varchar_vector);
|
|
96978
|
+
|
|
96979
|
+
bool all_converted = true;
|
|
96980
|
+
idx_t total = 0;
|
|
96981
|
+
for (idx_t i = 0; i < count; i++) {
|
|
96982
|
+
idx_t idx = i;
|
|
96983
|
+
if (sel) {
|
|
96984
|
+
idx = sel->get_index(i);
|
|
96985
|
+
}
|
|
96986
|
+
if (!source_mask.RowIsValid(idx)) {
|
|
96987
|
+
result_mask.SetInvalid(i);
|
|
96988
|
+
continue;
|
|
96989
|
+
}
|
|
96990
|
+
|
|
96991
|
+
list_data[i].offset = total;
|
|
96992
|
+
auto valid =
|
|
96993
|
+
VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
|
|
96994
|
+
if (!valid) {
|
|
96995
|
+
string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
|
|
96996
|
+
"' can't be cast to the destination type LIST";
|
|
96997
|
+
HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
|
|
96998
|
+
}
|
|
96999
|
+
list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
|
|
97000
|
+
}
|
|
97001
|
+
D_ASSERT(total_list_size == total);
|
|
97002
|
+
|
|
97003
|
+
auto &result_child = ListVector::GetEntry(result);
|
|
97004
|
+
auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
|
|
97005
|
+
CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
|
|
97006
|
+
return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
|
|
97007
|
+
all_converted;
|
|
97008
|
+
}
|
|
97009
|
+
|
|
97010
|
+
bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
97011
|
+
D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
|
|
97012
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
|
|
97013
|
+
|
|
97014
|
+
switch (source.GetVectorType()) {
|
|
97015
|
+
case VectorType::CONSTANT_VECTOR: {
|
|
97016
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
97017
|
+
|
|
97018
|
+
auto source_data = ConstantVector::GetData<string_t>(source);
|
|
97019
|
+
auto &source_mask = ConstantVector::Validity(source);
|
|
97020
|
+
auto &result_mask = ConstantVector::Validity(result);
|
|
97021
|
+
|
|
97022
|
+
return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
|
|
97023
|
+
}
|
|
97024
|
+
default: {
|
|
97025
|
+
UnifiedVectorFormat unified_source;
|
|
97026
|
+
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
97027
|
+
|
|
97028
|
+
source.ToUnifiedFormat(count, unified_source);
|
|
97029
|
+
auto source_sel = unified_source.sel;
|
|
97030
|
+
auto source_data = (string_t *)unified_source.data;
|
|
97031
|
+
auto &source_mask = unified_source.validity;
|
|
97032
|
+
auto &result_mask = FlatVector::Validity(result);
|
|
97033
|
+
|
|
97034
|
+
return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
|
|
97035
|
+
}
|
|
97036
|
+
}
|
|
97037
|
+
}
|
|
97038
|
+
|
|
97039
|
+
BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
97040
|
+
// second argument allows for a secondary casting function to be passed in the CastParameters
|
|
97041
|
+
return BoundCastInfo(&StringListCast,
|
|
97042
|
+
ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
|
|
97043
|
+
}
|
|
97044
|
+
|
|
96953
97045
|
BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
|
|
96954
97046
|
const LogicalType &target) {
|
|
96955
|
-
// now switch on the target type
|
|
96956
97047
|
switch (target.id()) {
|
|
96957
97048
|
case LogicalTypeId::DATE:
|
|
96958
97049
|
return BoundCastInfo(&VectorCastHelpers::TryCastErrorLoop<string_t, date_t, duckdb::TryCastErrorMessage>);
|
|
@@ -96980,6 +97071,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
|
|
|
96980
97071
|
case LogicalTypeId::VARCHAR:
|
|
96981
97072
|
case LogicalTypeId::JSON:
|
|
96982
97073
|
return &DefaultCasts::ReinterpretCast;
|
|
97074
|
+
case LogicalTypeId::LIST:
|
|
97075
|
+
return StringToListCast(input, source, target);
|
|
96983
97076
|
default:
|
|
96984
97077
|
return VectorStringCastNumericSwitch(input, source, target);
|
|
96985
97078
|
}
|
|
@@ -97341,6 +97434,144 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
|
|
|
97341
97434
|
} // namespace duckdb
|
|
97342
97435
|
|
|
97343
97436
|
|
|
97437
|
+
namespace duckdb {
|
|
97438
|
+
|
|
97439
|
+
struct CountPartOperation {
|
|
97440
|
+
idx_t count = 0;
|
|
97441
|
+
|
|
97442
|
+
void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
|
|
97443
|
+
count++;
|
|
97444
|
+
}
|
|
97445
|
+
};
|
|
97446
|
+
|
|
97447
|
+
struct SplitStringOperation {
|
|
97448
|
+
SplitStringOperation(string_t *child_data, idx_t &child_start, Vector &child)
|
|
97449
|
+
: child_data(child_data), child_start(child_start), child(child) {
|
|
97450
|
+
}
|
|
97451
|
+
|
|
97452
|
+
string_t *child_data;
|
|
97453
|
+
idx_t &child_start;
|
|
97454
|
+
Vector &child;
|
|
97455
|
+
|
|
97456
|
+
void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
|
|
97457
|
+
|
|
97458
|
+
if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
|
|
97459
|
+
buf[start_pos + 3] == 'L') {
|
|
97460
|
+
FlatVector::SetNull(child, child_start, true);
|
|
97461
|
+
child_start++;
|
|
97462
|
+
return;
|
|
97463
|
+
}
|
|
97464
|
+
child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
|
|
97465
|
+
child_start++;
|
|
97466
|
+
}
|
|
97467
|
+
};
|
|
97468
|
+
|
|
97469
|
+
static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
|
|
97470
|
+
char quote = buf[pos];
|
|
97471
|
+
pos++;
|
|
97472
|
+
|
|
97473
|
+
while (pos < len) {
|
|
97474
|
+
if (buf[pos] == quote) {
|
|
97475
|
+
return true;
|
|
97476
|
+
}
|
|
97477
|
+
pos++;
|
|
97478
|
+
}
|
|
97479
|
+
return false;
|
|
97480
|
+
}
|
|
97481
|
+
|
|
97482
|
+
static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
|
|
97483
|
+
while (idx < len) {
|
|
97484
|
+
if (buf[idx] == '[') {
|
|
97485
|
+
if (!SkipToClose(++idx, buf, len, lvl)) {
|
|
97486
|
+
return false;
|
|
97487
|
+
}
|
|
97488
|
+
lvl++;
|
|
97489
|
+
idx++;
|
|
97490
|
+
}
|
|
97491
|
+
if (buf[idx] == '"' || buf[idx] == '\'') {
|
|
97492
|
+
SkipToCloseQuotes(idx, buf, len);
|
|
97493
|
+
}
|
|
97494
|
+
if (buf[idx] == ']') {
|
|
97495
|
+
lvl--;
|
|
97496
|
+
return true;
|
|
97497
|
+
}
|
|
97498
|
+
idx++;
|
|
97499
|
+
}
|
|
97500
|
+
return false;
|
|
97501
|
+
}
|
|
97502
|
+
|
|
97503
|
+
template <class OP>
|
|
97504
|
+
static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
|
|
97505
|
+
const char *buf = input.GetDataUnsafe();
|
|
97506
|
+
idx_t len = input.GetSize();
|
|
97507
|
+
idx_t lvl = 1;
|
|
97508
|
+
idx_t pos = 0;
|
|
97509
|
+
|
|
97510
|
+
while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97511
|
+
pos++;
|
|
97512
|
+
}
|
|
97513
|
+
if (pos == len || buf[pos] != '[') {
|
|
97514
|
+
return false;
|
|
97515
|
+
}
|
|
97516
|
+
pos++;
|
|
97517
|
+
while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97518
|
+
pos++;
|
|
97519
|
+
}
|
|
97520
|
+
|
|
97521
|
+
idx_t start_pos = pos;
|
|
97522
|
+
while (pos < len) {
|
|
97523
|
+
if (buf[pos] == '[') {
|
|
97524
|
+
if (!SkipToClose(++pos, buf, len, ++lvl)) {
|
|
97525
|
+
return false;
|
|
97526
|
+
}
|
|
97527
|
+
} else if (buf[pos] == '"' || buf[pos] == '\'') {
|
|
97528
|
+
SkipToCloseQuotes(pos, buf, len);
|
|
97529
|
+
} else if (buf[pos] == ',' || buf[pos] == ']') {
|
|
97530
|
+
idx_t trailing_whitespace = 0;
|
|
97531
|
+
while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
|
|
97532
|
+
trailing_whitespace++;
|
|
97533
|
+
}
|
|
97534
|
+
if (!(buf[pos] == ']' && start_pos == (pos))) {
|
|
97535
|
+
state.HandleValue(buf, start_pos, pos - trailing_whitespace);
|
|
97536
|
+
} // else the list is empty
|
|
97537
|
+
if (buf[pos] == ']') {
|
|
97538
|
+
lvl--;
|
|
97539
|
+
break;
|
|
97540
|
+
}
|
|
97541
|
+
while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
|
|
97542
|
+
pos++;
|
|
97543
|
+
}
|
|
97544
|
+
start_pos = pos + 1;
|
|
97545
|
+
}
|
|
97546
|
+
pos++;
|
|
97547
|
+
}
|
|
97548
|
+
pos++;
|
|
97549
|
+
while (pos < len) {
|
|
97550
|
+
if (!StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97551
|
+
return false;
|
|
97552
|
+
}
|
|
97553
|
+
pos++;
|
|
97554
|
+
}
|
|
97555
|
+
if (lvl != 0) {
|
|
97556
|
+
return false;
|
|
97557
|
+
}
|
|
97558
|
+
return true;
|
|
97559
|
+
}
|
|
97560
|
+
|
|
97561
|
+
bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
|
|
97562
|
+
Vector &child) {
|
|
97563
|
+
SplitStringOperation state(child_data, child_start, child);
|
|
97564
|
+
return SplitStringifiedListInternal<SplitStringOperation>(input, state);
|
|
97565
|
+
}
|
|
97566
|
+
|
|
97567
|
+
idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
|
|
97568
|
+
CountPartOperation state;
|
|
97569
|
+
SplitStringifiedListInternal<CountPartOperation>(input, state);
|
|
97570
|
+
return state.count;
|
|
97571
|
+
}
|
|
97572
|
+
} // namespace duckdb
|
|
97573
|
+
|
|
97574
|
+
|
|
97344
97575
|
namespace duckdb {
|
|
97345
97576
|
|
|
97346
97577
|
//! The target type determines the preferred implicit casts
|