duckdb 0.5.2-dev815.0 → 0.5.2-dev841.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +476 -56
- package/src/duckdb.hpp +751 -737
- package/src/parquet-amalgamation.cpp +37428 -37428
package/src/duckdb.cpp
CHANGED
|
@@ -29106,7 +29106,7 @@ struct RowOperations {
|
|
|
29106
29106
|
namespace duckdb {
|
|
29107
29107
|
|
|
29108
29108
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
|
29109
|
-
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...
|
|
29109
|
+
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
|
|
29110
29110
|
D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
|
|
29111
29111
|
switch (radix_bits) {
|
|
29112
29112
|
case 1:
|
|
@@ -29135,7 +29135,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
|
|
|
29135
29135
|
}
|
|
29136
29136
|
|
|
29137
29137
|
template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
|
|
29138
|
-
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...
|
|
29138
|
+
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...args) {
|
|
29139
29139
|
D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
|
|
29140
29140
|
switch (radix_bits_2) {
|
|
29141
29141
|
case 1:
|
|
@@ -29164,7 +29164,7 @@ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
|
|
|
29164
29164
|
}
|
|
29165
29165
|
|
|
29166
29166
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
|
29167
|
-
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...
|
|
29167
|
+
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...args) {
|
|
29168
29168
|
D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
|
|
29169
29169
|
switch (radix_bits_1) {
|
|
29170
29170
|
case 1:
|
|
@@ -49455,6 +49455,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
|
|
|
49455
49455
|
memset(data, 0, capacity * type_size);
|
|
49456
49456
|
}
|
|
49457
49457
|
}
|
|
49458
|
+
if (capacity > STANDARD_VECTOR_SIZE) {
|
|
49459
|
+
validity.Resize(STANDARD_VECTOR_SIZE, capacity);
|
|
49460
|
+
}
|
|
49458
49461
|
}
|
|
49459
49462
|
|
|
49460
49463
|
struct DataArrays {
|
|
@@ -93661,6 +93664,13 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
|
|
|
93661
93664
|
vector<unique_ptr<Expression>> &arguments) {
|
|
93662
93665
|
|
|
93663
93666
|
D_ASSERT(arguments.size() == 1);
|
|
93667
|
+
|
|
93668
|
+
if (arguments[0]->return_type.id() == LogicalTypeId::LIST ||
|
|
93669
|
+
arguments[0]->return_type.id() == LogicalTypeId::STRUCT ||
|
|
93670
|
+
arguments[0]->return_type.id() == LogicalTypeId::MAP) {
|
|
93671
|
+
throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
|
|
93672
|
+
}
|
|
93673
|
+
|
|
93664
93674
|
child_list_t<LogicalType> struct_children;
|
|
93665
93675
|
struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
|
|
93666
93676
|
struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
|
|
@@ -95714,6 +95724,11 @@ struct VectorCastHelpers {
|
|
|
95714
95724
|
}
|
|
95715
95725
|
};
|
|
95716
95726
|
|
|
95727
|
+
struct VectorStringifiedListParser {
|
|
95728
|
+
static idx_t CountParts(const string_t &input);
|
|
95729
|
+
static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
|
|
95730
|
+
};
|
|
95731
|
+
|
|
95717
95732
|
} // namespace duckdb
|
|
95718
95733
|
|
|
95719
95734
|
|
|
@@ -96484,20 +96499,8 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
|
|
|
96484
96499
|
|
|
96485
96500
|
namespace duckdb {
|
|
96486
96501
|
|
|
96487
|
-
|
|
96488
|
-
|
|
96489
|
-
}
|
|
96490
|
-
|
|
96491
|
-
BoundCastInfo child_cast_info;
|
|
96492
|
-
|
|
96493
|
-
public:
|
|
96494
|
-
unique_ptr<BoundCastData> Copy() const override {
|
|
96495
|
-
return make_unique<ListBoundCastData>(child_cast_info.Copy());
|
|
96496
|
-
}
|
|
96497
|
-
};
|
|
96498
|
-
|
|
96499
|
-
unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
|
|
96500
|
-
const LogicalType &target) {
|
|
96502
|
+
unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &input, const LogicalType &source,
|
|
96503
|
+
const LogicalType &target) {
|
|
96501
96504
|
vector<BoundCastInfo> child_cast_info;
|
|
96502
96505
|
auto &source_child_type = ListType::GetChildType(source);
|
|
96503
96506
|
auto &result_child_type = ListType::GetChildType(target);
|
|
@@ -96608,11 +96611,11 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
96608
96611
|
BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
96609
96612
|
switch (target.id()) {
|
|
96610
96613
|
case LogicalTypeId::LIST:
|
|
96611
|
-
return BoundCastInfo(ListToListCast, BindListToListCast(input, source, target));
|
|
96614
|
+
return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
96612
96615
|
case LogicalTypeId::VARCHAR:
|
|
96613
96616
|
case LogicalTypeId::JSON:
|
|
96614
|
-
return BoundCastInfo(ListToVarcharCast,
|
|
96615
|
-
|
|
96617
|
+
return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
|
|
96618
|
+
input, source, LogicalType::LIST(LogicalType::VARCHAR)));
|
|
96616
96619
|
default:
|
|
96617
96620
|
return DefaultCasts::TryVectorNullCast;
|
|
96618
96621
|
}
|
|
@@ -96950,9 +96953,97 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
|
|
|
96950
96953
|
}
|
|
96951
96954
|
}
|
|
96952
96955
|
|
|
96956
|
+
bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
|
|
96957
|
+
idx_t count, CastParameters ¶meters, const SelectionVector *sel) {
|
|
96958
|
+
|
|
96959
|
+
idx_t total_list_size = 0;
|
|
96960
|
+
for (idx_t i = 0; i < count; i++) {
|
|
96961
|
+
idx_t idx = i;
|
|
96962
|
+
if (sel) {
|
|
96963
|
+
idx = sel->get_index(i);
|
|
96964
|
+
}
|
|
96965
|
+
if (!source_mask.RowIsValid(idx)) {
|
|
96966
|
+
continue;
|
|
96967
|
+
}
|
|
96968
|
+
total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
|
|
96969
|
+
}
|
|
96970
|
+
|
|
96971
|
+
Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
|
|
96972
|
+
|
|
96973
|
+
ListVector::Reserve(result, total_list_size);
|
|
96974
|
+
ListVector::SetListSize(result, total_list_size);
|
|
96975
|
+
|
|
96976
|
+
auto list_data = ListVector::GetData(result);
|
|
96977
|
+
auto child_data = FlatVector::GetData<string_t>(varchar_vector);
|
|
96978
|
+
|
|
96979
|
+
bool all_converted = true;
|
|
96980
|
+
idx_t total = 0;
|
|
96981
|
+
for (idx_t i = 0; i < count; i++) {
|
|
96982
|
+
idx_t idx = i;
|
|
96983
|
+
if (sel) {
|
|
96984
|
+
idx = sel->get_index(i);
|
|
96985
|
+
}
|
|
96986
|
+
if (!source_mask.RowIsValid(idx)) {
|
|
96987
|
+
result_mask.SetInvalid(i);
|
|
96988
|
+
continue;
|
|
96989
|
+
}
|
|
96990
|
+
|
|
96991
|
+
list_data[i].offset = total;
|
|
96992
|
+
auto valid =
|
|
96993
|
+
VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
|
|
96994
|
+
if (!valid) {
|
|
96995
|
+
string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
|
|
96996
|
+
"' can't be cast to the destination type LIST";
|
|
96997
|
+
HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
|
|
96998
|
+
}
|
|
96999
|
+
list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
|
|
97000
|
+
}
|
|
97001
|
+
D_ASSERT(total_list_size == total);
|
|
97002
|
+
|
|
97003
|
+
auto &result_child = ListVector::GetEntry(result);
|
|
97004
|
+
auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
|
|
97005
|
+
CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
|
|
97006
|
+
return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
|
|
97007
|
+
all_converted;
|
|
97008
|
+
}
|
|
97009
|
+
|
|
97010
|
+
bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
97011
|
+
D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
|
|
97012
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
|
|
97013
|
+
|
|
97014
|
+
switch (source.GetVectorType()) {
|
|
97015
|
+
case VectorType::CONSTANT_VECTOR: {
|
|
97016
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
97017
|
+
|
|
97018
|
+
auto source_data = ConstantVector::GetData<string_t>(source);
|
|
97019
|
+
auto &source_mask = ConstantVector::Validity(source);
|
|
97020
|
+
auto &result_mask = ConstantVector::Validity(result);
|
|
97021
|
+
|
|
97022
|
+
return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
|
|
97023
|
+
}
|
|
97024
|
+
default: {
|
|
97025
|
+
UnifiedVectorFormat unified_source;
|
|
97026
|
+
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
97027
|
+
|
|
97028
|
+
source.ToUnifiedFormat(count, unified_source);
|
|
97029
|
+
auto source_sel = unified_source.sel;
|
|
97030
|
+
auto source_data = (string_t *)unified_source.data;
|
|
97031
|
+
auto &source_mask = unified_source.validity;
|
|
97032
|
+
auto &result_mask = FlatVector::Validity(result);
|
|
97033
|
+
|
|
97034
|
+
return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
|
|
97035
|
+
}
|
|
97036
|
+
}
|
|
97037
|
+
}
|
|
97038
|
+
|
|
97039
|
+
BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
97040
|
+
// second argument allows for a secondary casting function to be passed in the CastParameters
|
|
97041
|
+
return BoundCastInfo(&StringListCast,
|
|
97042
|
+
ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
|
|
97043
|
+
}
|
|
97044
|
+
|
|
96953
97045
|
BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
|
|
96954
97046
|
const LogicalType &target) {
|
|
96955
|
-
// now switch on the target type
|
|
96956
97047
|
switch (target.id()) {
|
|
96957
97048
|
case LogicalTypeId::DATE:
|
|
96958
97049
|
return BoundCastInfo(&VectorCastHelpers::TryCastErrorLoop<string_t, date_t, duckdb::TryCastErrorMessage>);
|
|
@@ -96980,6 +97071,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
|
|
|
96980
97071
|
case LogicalTypeId::VARCHAR:
|
|
96981
97072
|
case LogicalTypeId::JSON:
|
|
96982
97073
|
return &DefaultCasts::ReinterpretCast;
|
|
97074
|
+
case LogicalTypeId::LIST:
|
|
97075
|
+
return StringToListCast(input, source, target);
|
|
96983
97076
|
default:
|
|
96984
97077
|
return VectorStringCastNumericSwitch(input, source, target);
|
|
96985
97078
|
}
|
|
@@ -97341,6 +97434,144 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
|
|
|
97341
97434
|
} // namespace duckdb
|
|
97342
97435
|
|
|
97343
97436
|
|
|
97437
|
+
namespace duckdb {
|
|
97438
|
+
|
|
97439
|
+
struct CountPartOperation {
|
|
97440
|
+
idx_t count = 0;
|
|
97441
|
+
|
|
97442
|
+
void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
|
|
97443
|
+
count++;
|
|
97444
|
+
}
|
|
97445
|
+
};
|
|
97446
|
+
|
|
97447
|
+
struct SplitStringOperation {
|
|
97448
|
+
SplitStringOperation(string_t *child_data, idx_t &child_start, Vector &child)
|
|
97449
|
+
: child_data(child_data), child_start(child_start), child(child) {
|
|
97450
|
+
}
|
|
97451
|
+
|
|
97452
|
+
string_t *child_data;
|
|
97453
|
+
idx_t &child_start;
|
|
97454
|
+
Vector &child;
|
|
97455
|
+
|
|
97456
|
+
void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
|
|
97457
|
+
|
|
97458
|
+
if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
|
|
97459
|
+
buf[start_pos + 3] == 'L') {
|
|
97460
|
+
FlatVector::SetNull(child, child_start, true);
|
|
97461
|
+
child_start++;
|
|
97462
|
+
return;
|
|
97463
|
+
}
|
|
97464
|
+
child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
|
|
97465
|
+
child_start++;
|
|
97466
|
+
}
|
|
97467
|
+
};
|
|
97468
|
+
|
|
97469
|
+
static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
|
|
97470
|
+
char quote = buf[pos];
|
|
97471
|
+
pos++;
|
|
97472
|
+
|
|
97473
|
+
while (pos < len) {
|
|
97474
|
+
if (buf[pos] == quote) {
|
|
97475
|
+
return true;
|
|
97476
|
+
}
|
|
97477
|
+
pos++;
|
|
97478
|
+
}
|
|
97479
|
+
return false;
|
|
97480
|
+
}
|
|
97481
|
+
|
|
97482
|
+
static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
|
|
97483
|
+
while (idx < len) {
|
|
97484
|
+
if (buf[idx] == '[') {
|
|
97485
|
+
if (!SkipToClose(++idx, buf, len, lvl)) {
|
|
97486
|
+
return false;
|
|
97487
|
+
}
|
|
97488
|
+
lvl++;
|
|
97489
|
+
idx++;
|
|
97490
|
+
}
|
|
97491
|
+
if (buf[idx] == '"' || buf[idx] == '\'') {
|
|
97492
|
+
SkipToCloseQuotes(idx, buf, len);
|
|
97493
|
+
}
|
|
97494
|
+
if (buf[idx] == ']') {
|
|
97495
|
+
lvl--;
|
|
97496
|
+
return true;
|
|
97497
|
+
}
|
|
97498
|
+
idx++;
|
|
97499
|
+
}
|
|
97500
|
+
return false;
|
|
97501
|
+
}
|
|
97502
|
+
|
|
97503
|
+
template <class OP>
|
|
97504
|
+
static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
|
|
97505
|
+
const char *buf = input.GetDataUnsafe();
|
|
97506
|
+
idx_t len = input.GetSize();
|
|
97507
|
+
idx_t lvl = 1;
|
|
97508
|
+
idx_t pos = 0;
|
|
97509
|
+
|
|
97510
|
+
while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97511
|
+
pos++;
|
|
97512
|
+
}
|
|
97513
|
+
if (pos == len || buf[pos] != '[') {
|
|
97514
|
+
return false;
|
|
97515
|
+
}
|
|
97516
|
+
pos++;
|
|
97517
|
+
while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97518
|
+
pos++;
|
|
97519
|
+
}
|
|
97520
|
+
|
|
97521
|
+
idx_t start_pos = pos;
|
|
97522
|
+
while (pos < len) {
|
|
97523
|
+
if (buf[pos] == '[') {
|
|
97524
|
+
if (!SkipToClose(++pos, buf, len, ++lvl)) {
|
|
97525
|
+
return false;
|
|
97526
|
+
}
|
|
97527
|
+
} else if (buf[pos] == '"' || buf[pos] == '\'') {
|
|
97528
|
+
SkipToCloseQuotes(pos, buf, len);
|
|
97529
|
+
} else if (buf[pos] == ',' || buf[pos] == ']') {
|
|
97530
|
+
idx_t trailing_whitespace = 0;
|
|
97531
|
+
while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
|
|
97532
|
+
trailing_whitespace++;
|
|
97533
|
+
}
|
|
97534
|
+
if (!(buf[pos] == ']' && start_pos == (pos))) {
|
|
97535
|
+
state.HandleValue(buf, start_pos, pos - trailing_whitespace);
|
|
97536
|
+
} // else the list is empty
|
|
97537
|
+
if (buf[pos] == ']') {
|
|
97538
|
+
lvl--;
|
|
97539
|
+
break;
|
|
97540
|
+
}
|
|
97541
|
+
while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
|
|
97542
|
+
pos++;
|
|
97543
|
+
}
|
|
97544
|
+
start_pos = pos + 1;
|
|
97545
|
+
}
|
|
97546
|
+
pos++;
|
|
97547
|
+
}
|
|
97548
|
+
pos++;
|
|
97549
|
+
while (pos < len) {
|
|
97550
|
+
if (!StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97551
|
+
return false;
|
|
97552
|
+
}
|
|
97553
|
+
pos++;
|
|
97554
|
+
}
|
|
97555
|
+
if (lvl != 0) {
|
|
97556
|
+
return false;
|
|
97557
|
+
}
|
|
97558
|
+
return true;
|
|
97559
|
+
}
|
|
97560
|
+
|
|
97561
|
+
bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
|
|
97562
|
+
Vector &child) {
|
|
97563
|
+
SplitStringOperation state(child_data, child_start, child);
|
|
97564
|
+
return SplitStringifiedListInternal<SplitStringOperation>(input, state);
|
|
97565
|
+
}
|
|
97566
|
+
|
|
97567
|
+
idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
|
|
97568
|
+
CountPartOperation state;
|
|
97569
|
+
SplitStringifiedListInternal<CountPartOperation>(input, state);
|
|
97570
|
+
return state.count;
|
|
97571
|
+
}
|
|
97572
|
+
} // namespace duckdb
|
|
97573
|
+
|
|
97574
|
+
|
|
97344
97575
|
namespace duckdb {
|
|
97345
97576
|
|
|
97346
97577
|
//! The target type determines the preferred implicit casts
|
|
@@ -146173,12 +146404,11 @@ private:
|
|
|
146173
146404
|
//! Find Joins with a DelimGet that can be removed
|
|
146174
146405
|
void FindCandidates(unique_ptr<LogicalOperator> *op_ptr, vector<unique_ptr<LogicalOperator> *> &candidates);
|
|
146175
146406
|
//! Try to remove a Join with a DelimGet, returns true if it was successful
|
|
146176
|
-
bool RemoveCandidate(unique_ptr<LogicalOperator> *
|
|
146177
|
-
|
|
146178
|
-
|
|
146179
|
-
|
|
146180
|
-
|
|
146181
|
-
bool HasChildDelimGet(LogicalOperator &op);
|
|
146407
|
+
bool RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146408
|
+
DeliminatorPlanUpdater &updater);
|
|
146409
|
+
//! Try to remove an inequality Join with a DelimGet, returns true if it was successful
|
|
146410
|
+
bool RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146411
|
+
DeliminatorPlanUpdater &updater);
|
|
146182
146412
|
};
|
|
146183
146413
|
|
|
146184
146414
|
} // namespace duckdb
|
|
@@ -146202,31 +146432,64 @@ public:
|
|
|
146202
146432
|
//! Update the plan after a DelimGet has been removed
|
|
146203
146433
|
void VisitOperator(LogicalOperator &op) override;
|
|
146204
146434
|
void VisitExpression(unique_ptr<Expression> *expression) override;
|
|
146205
|
-
|
|
146206
|
-
|
|
146435
|
+
|
|
146436
|
+
public:
|
|
146207
146437
|
expression_map_t<Expression *> expr_map;
|
|
146208
146438
|
column_binding_map_t<bool> projection_map;
|
|
146439
|
+
column_binding_map_t<Expression *> reverse_proj_or_agg_map;
|
|
146209
146440
|
unique_ptr<LogicalOperator> temp_ptr;
|
|
146210
146441
|
};
|
|
146211
146442
|
|
|
146443
|
+
static idx_t DelimGetCount(LogicalOperator &op) {
|
|
146444
|
+
if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
|
|
146445
|
+
return 1;
|
|
146446
|
+
}
|
|
146447
|
+
idx_t child_count = 0;
|
|
146448
|
+
for (auto &child : op.children) {
|
|
146449
|
+
child_count += DelimGetCount(*child);
|
|
146450
|
+
}
|
|
146451
|
+
return child_count;
|
|
146452
|
+
}
|
|
146453
|
+
|
|
146454
|
+
static bool IsEqualityJoinCondition(JoinCondition &cond) {
|
|
146455
|
+
switch (cond.comparison) {
|
|
146456
|
+
case ExpressionType::COMPARE_EQUAL:
|
|
146457
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
|
146458
|
+
return true;
|
|
146459
|
+
default:
|
|
146460
|
+
return false;
|
|
146461
|
+
}
|
|
146462
|
+
}
|
|
146463
|
+
|
|
146464
|
+
static bool InequalityDelimJoinCanBeEliminated(JoinType &join_type) {
|
|
146465
|
+
switch (join_type) {
|
|
146466
|
+
case JoinType::ANTI:
|
|
146467
|
+
case JoinType::MARK:
|
|
146468
|
+
case JoinType::SEMI:
|
|
146469
|
+
case JoinType::SINGLE:
|
|
146470
|
+
return true;
|
|
146471
|
+
default:
|
|
146472
|
+
return false;
|
|
146473
|
+
}
|
|
146474
|
+
}
|
|
146475
|
+
|
|
146212
146476
|
void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
|
|
146213
146477
|
VisitOperatorChildren(op);
|
|
146214
146478
|
VisitOperatorExpressions(op);
|
|
146215
|
-
if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN &&
|
|
146479
|
+
if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN && DelimGetCount(op) == 0) {
|
|
146216
146480
|
auto &delim_join = (LogicalDelimJoin &)op;
|
|
146217
146481
|
auto decs = &delim_join.duplicate_eliminated_columns;
|
|
146218
146482
|
for (auto &cond : delim_join.conditions) {
|
|
146219
|
-
if (cond
|
|
146220
|
-
cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
|
146483
|
+
if (!IsEqualityJoinCondition(cond)) {
|
|
146221
146484
|
continue;
|
|
146222
146485
|
}
|
|
146223
|
-
|
|
146486
|
+
auto rhs = cond.right.get();
|
|
146224
146487
|
while (rhs->type == ExpressionType::OPERATOR_CAST) {
|
|
146225
146488
|
auto &cast = (BoundCastExpression &)*rhs;
|
|
146226
146489
|
rhs = cast.child.get();
|
|
146227
146490
|
}
|
|
146228
146491
|
if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
146229
|
-
throw InternalException("
|
|
146492
|
+
throw InternalException("Error in Deliminator: expected a bound column reference");
|
|
146230
146493
|
}
|
|
146231
146494
|
auto &colref = (BoundColumnRefExpression &)*rhs;
|
|
146232
146495
|
if (projection_map.find(colref.binding) != projection_map.end()) {
|
|
@@ -146257,25 +146520,13 @@ void DeliminatorPlanUpdater::VisitExpression(unique_ptr<Expression> *expression)
|
|
|
146257
146520
|
}
|
|
146258
146521
|
}
|
|
146259
146522
|
|
|
146260
|
-
bool DeliminatorPlanUpdater::HasChildDelimGet(LogicalOperator &op) {
|
|
146261
|
-
if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
|
|
146262
|
-
return true;
|
|
146263
|
-
}
|
|
146264
|
-
for (auto &child : op.children) {
|
|
146265
|
-
if (HasChildDelimGet(*child)) {
|
|
146266
|
-
return true;
|
|
146267
|
-
}
|
|
146268
|
-
}
|
|
146269
|
-
return false;
|
|
146270
|
-
}
|
|
146271
|
-
|
|
146272
146523
|
unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op) {
|
|
146273
146524
|
vector<unique_ptr<LogicalOperator> *> candidates;
|
|
146274
146525
|
FindCandidates(&op, candidates);
|
|
146275
146526
|
|
|
146276
|
-
for (auto candidate : candidates) {
|
|
146527
|
+
for (auto &candidate : candidates) {
|
|
146277
146528
|
DeliminatorPlanUpdater updater;
|
|
146278
|
-
if (RemoveCandidate(candidate, updater)) {
|
|
146529
|
+
if (RemoveCandidate(&op, candidate, updater)) {
|
|
146279
146530
|
updater.VisitOperator(*op);
|
|
146280
146531
|
}
|
|
146281
146532
|
}
|
|
@@ -146330,10 +146581,21 @@ static bool OperatorIsDelimGet(LogicalOperator &op) {
|
|
|
146330
146581
|
return false;
|
|
146331
146582
|
}
|
|
146332
146583
|
|
|
146333
|
-
bool
|
|
146334
|
-
|
|
146584
|
+
static bool ChildJoinTypeCanBeDeliminated(JoinType &join_type) {
|
|
146585
|
+
switch (join_type) {
|
|
146586
|
+
case JoinType::INNER:
|
|
146587
|
+
case JoinType::SEMI:
|
|
146588
|
+
return true;
|
|
146589
|
+
default:
|
|
146590
|
+
return false;
|
|
146591
|
+
}
|
|
146592
|
+
}
|
|
146593
|
+
|
|
146594
|
+
bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146595
|
+
DeliminatorPlanUpdater &updater) {
|
|
146596
|
+
auto &proj_or_agg = **candidate;
|
|
146335
146597
|
auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
|
|
146336
|
-
if (join.join_type
|
|
146598
|
+
if (!ChildJoinTypeCanBeDeliminated(join.join_type)) {
|
|
146337
146599
|
return false;
|
|
146338
146600
|
}
|
|
146339
146601
|
|
|
@@ -146351,13 +146613,10 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146351
146613
|
return false;
|
|
146352
146614
|
}
|
|
146353
146615
|
// check if joining with the DelimGet is redundant, and collect relevant column information
|
|
146616
|
+
bool all_equality_conditions = true;
|
|
146354
146617
|
vector<Expression *> nulls_are_not_equal_exprs;
|
|
146355
146618
|
for (auto &cond : join.conditions) {
|
|
146356
|
-
|
|
146357
|
-
cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
|
146358
|
-
// non-equality join condition
|
|
146359
|
-
return false;
|
|
146360
|
-
}
|
|
146619
|
+
all_equality_conditions = all_equality_conditions && IsEqualityJoinCondition(cond);
|
|
146361
146620
|
auto delim_side = delim_idx == 0 ? cond.left.get() : cond.right.get();
|
|
146362
146621
|
auto other_side = delim_idx == 0 ? cond.right.get() : cond.left.get();
|
|
146363
146622
|
if (delim_side->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
@@ -146370,10 +146629,12 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146370
146629
|
nulls_are_not_equal_exprs.push_back(other_side);
|
|
146371
146630
|
}
|
|
146372
146631
|
}
|
|
146632
|
+
|
|
146373
146633
|
// removed DelimGet columns are assigned a new ColumnBinding by Projection/Aggregation, keep track here
|
|
146374
146634
|
if (proj_or_agg.type == LogicalOperatorType::LOGICAL_PROJECTION) {
|
|
146375
146635
|
for (auto &cb : proj_or_agg.GetColumnBindings()) {
|
|
146376
146636
|
updater.projection_map[cb] = true;
|
|
146637
|
+
updater.reverse_proj_or_agg_map[cb] = proj_or_agg.expressions[cb.column_index].get();
|
|
146377
146638
|
for (auto &expr : nulls_are_not_equal_exprs) {
|
|
146378
146639
|
if (proj_or_agg.expressions[cb.column_index]->Equals(expr)) {
|
|
146379
146640
|
updater.projection_map[cb] = false;
|
|
@@ -146383,8 +146644,19 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146383
146644
|
}
|
|
146384
146645
|
} else {
|
|
146385
146646
|
auto &agg = (LogicalAggregate &)proj_or_agg;
|
|
146647
|
+
|
|
146648
|
+
// Create a vector of all exprs in the agg
|
|
146649
|
+
vector<Expression *> all_agg_exprs;
|
|
146650
|
+
for (auto &expr : agg.groups) {
|
|
146651
|
+
all_agg_exprs.push_back(expr.get());
|
|
146652
|
+
}
|
|
146653
|
+
for (auto &expr : agg.expressions) {
|
|
146654
|
+
all_agg_exprs.push_back(expr.get());
|
|
146655
|
+
}
|
|
146656
|
+
|
|
146386
146657
|
for (auto &cb : agg.GetColumnBindings()) {
|
|
146387
146658
|
updater.projection_map[cb] = true;
|
|
146659
|
+
updater.reverse_proj_or_agg_map[cb] = all_agg_exprs[cb.column_index];
|
|
146388
146660
|
for (auto &expr : nulls_are_not_equal_exprs) {
|
|
146389
146661
|
if ((cb.table_index == agg.group_index && agg.groups[cb.column_index]->Equals(expr)) ||
|
|
146390
146662
|
(cb.table_index == agg.aggregate_index && agg.expressions[cb.column_index]->Equals(expr))) {
|
|
@@ -146394,6 +146666,14 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146394
146666
|
}
|
|
146395
146667
|
}
|
|
146396
146668
|
}
|
|
146669
|
+
|
|
146670
|
+
if (!all_equality_conditions) {
|
|
146671
|
+
// we can get rid of an inequality join with a DelimGet, but only under specific circumstances
|
|
146672
|
+
if (!RemoveInequalityCandidate(plan, candidate, updater)) {
|
|
146673
|
+
return false;
|
|
146674
|
+
}
|
|
146675
|
+
}
|
|
146676
|
+
|
|
146397
146677
|
// make a filter if needed
|
|
146398
146678
|
if (!nulls_are_not_equal_exprs.empty() || filter != nullptr) {
|
|
146399
146679
|
auto filter_op = make_unique<LogicalFilter>();
|
|
@@ -146421,6 +146701,146 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146421
146701
|
return true;
|
|
146422
146702
|
}
|
|
146423
146703
|
|
|
146704
|
+
static void GetDelimJoins(LogicalOperator &op, vector<LogicalOperator *> &delim_joins) {
|
|
146705
|
+
for (auto &child : op.children) {
|
|
146706
|
+
GetDelimJoins(*child, delim_joins);
|
|
146707
|
+
}
|
|
146708
|
+
if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
|
|
146709
|
+
delim_joins.push_back(&op);
|
|
146710
|
+
}
|
|
146711
|
+
}
|
|
146712
|
+
|
|
146713
|
+
static bool HasChild(LogicalOperator *haystack, LogicalOperator *needle, idx_t &side) {
|
|
146714
|
+
if (haystack == needle) {
|
|
146715
|
+
return true;
|
|
146716
|
+
}
|
|
146717
|
+
for (idx_t i = 0; i < haystack->children.size(); i++) {
|
|
146718
|
+
auto &child = haystack->children[i];
|
|
146719
|
+
idx_t dummy_side;
|
|
146720
|
+
if (HasChild(child.get(), needle, dummy_side)) {
|
|
146721
|
+
side = i;
|
|
146722
|
+
return true;
|
|
146723
|
+
}
|
|
146724
|
+
}
|
|
146725
|
+
return false;
|
|
146726
|
+
}
|
|
146727
|
+
|
|
146728
|
+
bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146729
|
+
DeliminatorPlanUpdater &updater) {
|
|
146730
|
+
auto &proj_or_agg = **candidate;
|
|
146731
|
+
// first, we find a DelimJoin in "plan" that has only one DelimGet as a child, which is in "candidate"
|
|
146732
|
+
if (DelimGetCount(proj_or_agg) != 1) {
|
|
146733
|
+
// the candidate therefore must have only a single DelimGet in its children
|
|
146734
|
+
return false;
|
|
146735
|
+
}
|
|
146736
|
+
|
|
146737
|
+
vector<LogicalOperator *> delim_joins;
|
|
146738
|
+
GetDelimJoins(**plan, delim_joins);
|
|
146739
|
+
|
|
146740
|
+
LogicalOperator *parent = nullptr;
|
|
146741
|
+
idx_t parent_delim_get_side;
|
|
146742
|
+
for (auto dj : delim_joins) {
|
|
146743
|
+
D_ASSERT(dj->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
|
|
146744
|
+
if (!HasChild(dj, &proj_or_agg, parent_delim_get_side)) {
|
|
146745
|
+
continue;
|
|
146746
|
+
}
|
|
146747
|
+
// we found a parent DelimJoin
|
|
146748
|
+
if (DelimGetCount(*dj) != 1) {
|
|
146749
|
+
// it has more than one DelimGet children
|
|
146750
|
+
continue;
|
|
146751
|
+
}
|
|
146752
|
+
|
|
146753
|
+
// we can only remove inequality join with a DelimGet if the parent DelimJoin has one of these join types
|
|
146754
|
+
auto &delim_join = (LogicalDelimJoin &)*dj;
|
|
146755
|
+
if (!InequalityDelimJoinCanBeEliminated(delim_join.join_type)) {
|
|
146756
|
+
continue;
|
|
146757
|
+
}
|
|
146758
|
+
|
|
146759
|
+
parent = dj;
|
|
146760
|
+
break;
|
|
146761
|
+
}
|
|
146762
|
+
if (!parent) {
|
|
146763
|
+
return false;
|
|
146764
|
+
}
|
|
146765
|
+
|
|
146766
|
+
// we found the parent delim join, and we may be able to remove the child DelimGet join
|
|
146767
|
+
// but we need to make sure that their conditions refer to exactly the same columns
|
|
146768
|
+
auto &parent_delim_join = (LogicalDelimJoin &)*parent;
|
|
146769
|
+
auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
|
|
146770
|
+
if (parent_delim_join.conditions.size() != join.conditions.size()) {
|
|
146771
|
+
// different number of conditions, can't replace
|
|
146772
|
+
return false;
|
|
146773
|
+
}
|
|
146774
|
+
|
|
146775
|
+
// we can only do this optimization under the following conditions:
|
|
146776
|
+
// 1. all join expressions coming from the DelimGet side are colrefs
|
|
146777
|
+
// 2. these expressions refer to colrefs coming from the proj/agg on top of the child DelimGet join
|
|
146778
|
+
// 3. the expression (before it was proj/agg) can be found in the conditions of the child DelimGet join
|
|
146779
|
+
for (auto &parent_cond : parent_delim_join.conditions) {
|
|
146780
|
+
auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
|
|
146781
|
+
if (parent_expr->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
146782
|
+
// can only deal with colrefs
|
|
146783
|
+
return false;
|
|
146784
|
+
}
|
|
146785
|
+
auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
|
|
146786
|
+
auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
|
|
146787
|
+
if (it == updater.reverse_proj_or_agg_map.end()) {
|
|
146788
|
+
// refers to a column that was not in the child DelimGet join
|
|
146789
|
+
return false;
|
|
146790
|
+
}
|
|
146791
|
+
// try to find the corresponding child condition
|
|
146792
|
+
// TODO: can be more flexible - allow CAST
|
|
146793
|
+
auto child_expr = it->second;
|
|
146794
|
+
bool found = false;
|
|
146795
|
+
for (auto &child_cond : join.conditions) {
|
|
146796
|
+
if (child_cond.left->Equals(child_expr) || child_cond.right->Equals(child_expr)) {
|
|
146797
|
+
found = true;
|
|
146798
|
+
break;
|
|
146799
|
+
}
|
|
146800
|
+
}
|
|
146801
|
+
if (!found) {
|
|
146802
|
+
// could not find the mapped expression in the child condition expressions
|
|
146803
|
+
return false;
|
|
146804
|
+
}
|
|
146805
|
+
}
|
|
146806
|
+
|
|
146807
|
+
// TODO: we cannot perform the optimization here because our pure inequality joins don't implement
|
|
146808
|
+
// JoinType::SINGLE yet
|
|
146809
|
+
if (parent_delim_join.join_type == JoinType::SINGLE) {
|
|
146810
|
+
bool has_one_equality = false;
|
|
146811
|
+
for (auto &cond : join.conditions) {
|
|
146812
|
+
has_one_equality = has_one_equality || IsEqualityJoinCondition(cond);
|
|
146813
|
+
}
|
|
146814
|
+
if (!has_one_equality) {
|
|
146815
|
+
return false;
|
|
146816
|
+
}
|
|
146817
|
+
}
|
|
146818
|
+
|
|
146819
|
+
// we are now sure that we can remove the child DelimGet join, so we basically do the same loop as above
|
|
146820
|
+
// this time without checks because we already did them, and replace the expressions
|
|
146821
|
+
for (auto &parent_cond : parent_delim_join.conditions) {
|
|
146822
|
+
auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
|
|
146823
|
+
auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
|
|
146824
|
+
auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
|
|
146825
|
+
auto child_expr = it->second;
|
|
146826
|
+
for (auto &child_cond : join.conditions) {
|
|
146827
|
+
if (!child_cond.left->Equals(child_expr) && !child_cond.right->Equals(child_expr)) {
|
|
146828
|
+
continue;
|
|
146829
|
+
}
|
|
146830
|
+
parent_expr =
|
|
146831
|
+
make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
|
|
146832
|
+
parent_cond.comparison = child_cond.comparison;
|
|
146833
|
+
break;
|
|
146834
|
+
}
|
|
146835
|
+
}
|
|
146836
|
+
|
|
146837
|
+
// no longer needs to be a delim join
|
|
146838
|
+
parent_delim_join.duplicate_eliminated_columns.clear();
|
|
146839
|
+
parent_delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
|
|
146840
|
+
|
|
146841
|
+
return true;
|
|
146842
|
+
}
|
|
146843
|
+
|
|
146424
146844
|
} // namespace duckdb
|
|
146425
146845
|
|
|
146426
146846
|
|