duckdb 0.3.4-dev91.0 → 0.3.5-dev116.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +1325 -884
- package/src/duckdb.hpp +91 -16
- package/src/parquet-amalgamation.cpp +36187 -36187
package/src/duckdb.cpp
CHANGED
|
@@ -29679,6 +29679,8 @@ public:
|
|
|
29679
29679
|
//! Completes the cascaded merge sort round.
|
|
29680
29680
|
//! Pass true if you wish to use the radix data for further comparisons.
|
|
29681
29681
|
void CompleteMergeRound(bool keep_radix_data = false);
|
|
29682
|
+
//! Print the sorted data to the console.
|
|
29683
|
+
void Print();
|
|
29682
29684
|
|
|
29683
29685
|
public:
|
|
29684
29686
|
//! The lock for updating the order global state
|
|
@@ -31591,6 +31593,19 @@ void GlobalSortState::CompleteMergeRound(bool keep_radix_data) {
|
|
|
31591
31593
|
sorted_blocks[0]->blob_sorting_data = nullptr;
|
|
31592
31594
|
}
|
|
31593
31595
|
}
|
|
31596
|
+
void GlobalSortState::Print() {
|
|
31597
|
+
PayloadScanner scanner(*this, false);
|
|
31598
|
+
DataChunk chunk;
|
|
31599
|
+
chunk.Initialize(scanner.GetPayloadTypes());
|
|
31600
|
+
for (;;) {
|
|
31601
|
+
scanner.Scan(chunk);
|
|
31602
|
+
const auto count = chunk.size();
|
|
31603
|
+
if (!count) {
|
|
31604
|
+
break;
|
|
31605
|
+
}
|
|
31606
|
+
chunk.Print();
|
|
31607
|
+
}
|
|
31608
|
+
}
|
|
31594
31609
|
|
|
31595
31610
|
} // namespace duckdb
|
|
31596
31611
|
|
|
@@ -41290,7 +41305,7 @@ void Vector::Normalify(idx_t count) {
|
|
|
41290
41305
|
break;
|
|
41291
41306
|
case VectorType::DICTIONARY_VECTOR: {
|
|
41292
41307
|
// create a new flat vector of this type
|
|
41293
|
-
Vector other(GetType());
|
|
41308
|
+
Vector other(GetType(), count);
|
|
41294
41309
|
// now copy the data of this vector to the other vector, removing the selection vector in the process
|
|
41295
41310
|
VectorOperations::Copy(*this, other, count, 0, 0);
|
|
41296
41311
|
// create a reference to the data in the other vector
|
|
@@ -46757,8 +46772,8 @@ static bool ListCastSwitch(Vector &source, Vector &result, idx_t count, string *
|
|
|
46757
46772
|
}
|
|
46758
46773
|
|
|
46759
46774
|
template <class SRC_TYPE, class RES_TYPE>
|
|
46760
|
-
|
|
46761
|
-
|
|
46775
|
+
bool FillEnum(Vector &source, Vector &result, idx_t count, string *error_message) {
|
|
46776
|
+
bool all_converted = true;
|
|
46762
46777
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
46763
46778
|
|
|
46764
46779
|
auto &str_vec = EnumType::GetValuesInsertOrder(source.GetType());
|
|
@@ -46786,25 +46801,29 @@ void FillEnum(Vector &source, Vector &result, idx_t count) {
|
|
|
46786
46801
|
auto key = EnumType::GetPos(res_enum_type, str);
|
|
46787
46802
|
if (key == -1) {
|
|
46788
46803
|
// key doesn't exist on result enum
|
|
46789
|
-
|
|
46804
|
+
if (!error_message) {
|
|
46805
|
+
result_data[i] = HandleVectorCastError::Operation<RES_TYPE>(
|
|
46806
|
+
CastExceptionText<SRC_TYPE, RES_TYPE>(source_data[src_idx]), result_mask, i, error_message,
|
|
46807
|
+
all_converted);
|
|
46808
|
+
} else {
|
|
46809
|
+
result_mask.SetInvalid(i);
|
|
46810
|
+
}
|
|
46790
46811
|
continue;
|
|
46791
46812
|
}
|
|
46792
46813
|
result_data[i] = key;
|
|
46793
46814
|
}
|
|
46815
|
+
return all_converted;
|
|
46794
46816
|
}
|
|
46795
46817
|
|
|
46796
46818
|
template <class SRC_TYPE>
|
|
46797
|
-
|
|
46819
|
+
bool FillEnumResultTemplate(Vector &source, Vector &result, idx_t count, string *error_message) {
|
|
46798
46820
|
switch (source.GetType().InternalType()) {
|
|
46799
46821
|
case PhysicalType::UINT8:
|
|
46800
|
-
FillEnum<SRC_TYPE, uint8_t>(source, result, count);
|
|
46801
|
-
break;
|
|
46822
|
+
return FillEnum<SRC_TYPE, uint8_t>(source, result, count, error_message);
|
|
46802
46823
|
case PhysicalType::UINT16:
|
|
46803
|
-
FillEnum<SRC_TYPE, uint16_t>(source, result, count);
|
|
46804
|
-
break;
|
|
46824
|
+
return FillEnum<SRC_TYPE, uint16_t>(source, result, count, error_message);
|
|
46805
46825
|
case PhysicalType::UINT32:
|
|
46806
|
-
FillEnum<SRC_TYPE, uint32_t>(source, result, count);
|
|
46807
|
-
break;
|
|
46826
|
+
return FillEnum<SRC_TYPE, uint32_t>(source, result, count, error_message);
|
|
46808
46827
|
default:
|
|
46809
46828
|
throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
|
|
46810
46829
|
}
|
|
@@ -46852,18 +46871,14 @@ static bool EnumCastSwitch(Vector &source, Vector &result, idx_t count, string *
|
|
|
46852
46871
|
// This means they are both ENUMs, but of different types.
|
|
46853
46872
|
switch (enum_physical_type) {
|
|
46854
46873
|
case PhysicalType::UINT8:
|
|
46855
|
-
FillEnumResultTemplate<uint8_t>(source, result, count);
|
|
46856
|
-
break;
|
|
46874
|
+
return FillEnumResultTemplate<uint8_t>(source, result, count, error_message);
|
|
46857
46875
|
case PhysicalType::UINT16:
|
|
46858
|
-
FillEnumResultTemplate<uint16_t>(source, result, count);
|
|
46859
|
-
break;
|
|
46876
|
+
return FillEnumResultTemplate<uint16_t>(source, result, count, error_message);
|
|
46860
46877
|
case PhysicalType::UINT32:
|
|
46861
|
-
FillEnumResultTemplate<uint32_t>(source, result, count);
|
|
46862
|
-
break;
|
|
46878
|
+
return FillEnumResultTemplate<uint32_t>(source, result, count, error_message);
|
|
46863
46879
|
default:
|
|
46864
46880
|
throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
|
|
46865
46881
|
}
|
|
46866
|
-
break;
|
|
46867
46882
|
}
|
|
46868
46883
|
case LogicalTypeId::JSON:
|
|
46869
46884
|
case LogicalTypeId::VARCHAR: {
|
|
@@ -47105,7 +47120,20 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
|
|
|
47105
47120
|
if (smask.IsMaskSet()) {
|
|
47106
47121
|
for (idx_t i = 0; i < copy_count; i++) {
|
|
47107
47122
|
auto idx = sel->get_index(source_offset + i);
|
|
47108
|
-
|
|
47123
|
+
|
|
47124
|
+
if (smask.RowIsValid(idx)) {
|
|
47125
|
+
// set valid
|
|
47126
|
+
if (!tmask.AllValid()) {
|
|
47127
|
+
tmask.SetValidUnsafe(target_offset + i);
|
|
47128
|
+
}
|
|
47129
|
+
} else {
|
|
47130
|
+
// set invalid
|
|
47131
|
+
if (tmask.AllValid()) {
|
|
47132
|
+
auto init_size = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, target_offset + copy_count);
|
|
47133
|
+
tmask.Initialize(init_size);
|
|
47134
|
+
}
|
|
47135
|
+
tmask.SetInvalidUnsafe(target_offset + i);
|
|
47136
|
+
}
|
|
47109
47137
|
}
|
|
47110
47138
|
}
|
|
47111
47139
|
}
|
|
@@ -59205,16 +59233,119 @@ void PhysicalHashJoin::GetData(ExecutionContext &context, DataChunk &chunk, Glob
|
|
|
59205
59233
|
|
|
59206
59234
|
|
|
59207
59235
|
|
|
59236
|
+
//===----------------------------------------------------------------------===//
|
|
59237
|
+
// DuckDB
|
|
59238
|
+
//
|
|
59239
|
+
// duckdb/execution/operator/join/physical_piecewise_merge_join.hpp
|
|
59240
|
+
//
|
|
59241
|
+
//
|
|
59242
|
+
//===----------------------------------------------------------------------===//
|
|
59243
|
+
|
|
59244
|
+
|
|
59245
|
+
|
|
59246
|
+
|
|
59208
59247
|
|
|
59209
59248
|
|
|
59210
59249
|
|
|
59211
59250
|
namespace duckdb {
|
|
59212
59251
|
|
|
59213
|
-
|
|
59252
|
+
struct GlobalSortState;
|
|
59253
|
+
|
|
59254
|
+
//! PhysicalRangeJoin represents one or more inequality range join predicates between
|
|
59255
|
+
//! two tables
|
|
59256
|
+
class PhysicalRangeJoin : public PhysicalComparisonJoin {
|
|
59257
|
+
public:
|
|
59258
|
+
class LocalSortedTable {
|
|
59259
|
+
public:
|
|
59260
|
+
LocalSortedTable(const PhysicalRangeJoin &op, const idx_t child);
|
|
59261
|
+
|
|
59262
|
+
void Sink(DataChunk &input, GlobalSortState &global_sort_state);
|
|
59263
|
+
|
|
59264
|
+
inline void Sort(GlobalSortState &global_sort_state) {
|
|
59265
|
+
local_sort_state.Sort(global_sort_state, true);
|
|
59266
|
+
}
|
|
59267
|
+
|
|
59268
|
+
//! The hosting operator
|
|
59269
|
+
const PhysicalRangeJoin &op;
|
|
59270
|
+
//! The local sort state
|
|
59271
|
+
LocalSortState local_sort_state;
|
|
59272
|
+
//! Local copy of the sorting expression executor
|
|
59273
|
+
ExpressionExecutor executor;
|
|
59274
|
+
//! Holds a vector of incoming sorting columns
|
|
59275
|
+
DataChunk keys;
|
|
59276
|
+
//! The number of NULL values
|
|
59277
|
+
idx_t has_null;
|
|
59278
|
+
//! The total number of rows
|
|
59279
|
+
idx_t count;
|
|
59280
|
+
|
|
59281
|
+
private:
|
|
59282
|
+
// Merge the NULLs of all non-DISTINCT predicates into the primary so they sort to the end.
|
|
59283
|
+
idx_t MergeNulls(const vector<JoinCondition> &conditions);
|
|
59284
|
+
};
|
|
59285
|
+
|
|
59286
|
+
class GlobalSortedTable {
|
|
59287
|
+
public:
|
|
59288
|
+
GlobalSortedTable(ClientContext &context, const vector<BoundOrderByNode> &orders, RowLayout &payload_layout);
|
|
59289
|
+
|
|
59290
|
+
inline idx_t Count() const {
|
|
59291
|
+
return count;
|
|
59292
|
+
}
|
|
59293
|
+
|
|
59294
|
+
inline idx_t BlockCount() const {
|
|
59295
|
+
if (global_sort_state.sorted_blocks.empty()) {
|
|
59296
|
+
return 0;
|
|
59297
|
+
}
|
|
59298
|
+
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
|
59299
|
+
return global_sort_state.sorted_blocks[0]->radix_sorting_data.size();
|
|
59300
|
+
}
|
|
59301
|
+
|
|
59302
|
+
inline idx_t BlockSize(idx_t i) const {
|
|
59303
|
+
return global_sort_state.sorted_blocks[0]->radix_sorting_data[i].count;
|
|
59304
|
+
}
|
|
59305
|
+
|
|
59306
|
+
void Combine(LocalSortedTable <able);
|
|
59307
|
+
void IntializeMatches();
|
|
59308
|
+
void Print();
|
|
59309
|
+
|
|
59310
|
+
//! Starts the sorting process.
|
|
59311
|
+
void Finalize(Pipeline &pipeline, Event &event);
|
|
59312
|
+
//! Schedules tasks to merge sort the current child's data during a Finalize phase
|
|
59313
|
+
void ScheduleMergeTasks(Pipeline &pipeline, Event &event);
|
|
59314
|
+
|
|
59315
|
+
GlobalSortState global_sort_state;
|
|
59316
|
+
//! Whether or not the RHS has NULL values
|
|
59317
|
+
atomic<idx_t> has_null;
|
|
59318
|
+
//! The total number of rows in the RHS
|
|
59319
|
+
atomic<idx_t> count;
|
|
59320
|
+
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
|
59321
|
+
unique_ptr<bool[]> found_match;
|
|
59322
|
+
//! Memory usage per thread
|
|
59323
|
+
idx_t memory_per_thread;
|
|
59324
|
+
};
|
|
59325
|
+
|
|
59326
|
+
public:
|
|
59327
|
+
PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType type, unique_ptr<PhysicalOperator> left,
|
|
59328
|
+
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
|
|
59329
|
+
idx_t estimated_cardinality);
|
|
59330
|
+
|
|
59331
|
+
public:
|
|
59332
|
+
// Gather the result values and slice the payload columns to those values.
|
|
59333
|
+
static void SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
|
|
59334
|
+
const SelectionVector &result, const idx_t result_count, const idx_t left_cols = 0);
|
|
59335
|
+
// Apply a tail condition to the current selection
|
|
59336
|
+
static idx_t SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right,
|
|
59337
|
+
const SelectionVector *sel, idx_t count, SelectionVector *true_sel);
|
|
59338
|
+
};
|
|
59339
|
+
|
|
59340
|
+
} // namespace duckdb
|
|
59341
|
+
|
|
59342
|
+
|
|
59343
|
+
|
|
59344
|
+
namespace duckdb {
|
|
59214
59345
|
|
|
59215
59346
|
//! PhysicalIEJoin represents a two inequality range join between
|
|
59216
59347
|
//! two tables
|
|
59217
|
-
class PhysicalIEJoin : public
|
|
59348
|
+
class PhysicalIEJoin : public PhysicalRangeJoin {
|
|
59218
59349
|
public:
|
|
59219
59350
|
PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right,
|
|
59220
59351
|
vector<JoinCondition> cond, JoinType join_type, idx_t estimated_cardinality);
|
|
@@ -59253,9 +59384,6 @@ public:
|
|
|
59253
59384
|
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
59254
59385
|
GlobalSinkState &gstate) const override;
|
|
59255
59386
|
|
|
59256
|
-
//! Schedules tasks to merge sort the current child's data during a Finalize phase
|
|
59257
|
-
static void ScheduleMergeTasks(Pipeline &pipeline, Event &event, IEJoinSortedTable &table);
|
|
59258
|
-
|
|
59259
59387
|
bool IsSink() const override {
|
|
59260
59388
|
return true;
|
|
59261
59389
|
}
|
|
@@ -59289,35 +59417,8 @@ namespace duckdb {
|
|
|
59289
59417
|
PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
|
|
59290
59418
|
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
|
|
59291
59419
|
idx_t estimated_cardinality)
|
|
59292
|
-
:
|
|
59293
|
-
|
|
59294
|
-
// TODO: use stats to improve the choice?
|
|
59295
|
-
// TODO: Prefer fixed length types?
|
|
59296
|
-
auto conditions_p = std::move(conditions);
|
|
59297
|
-
conditions.resize(conditions_p.size());
|
|
59298
|
-
idx_t range_position = 0;
|
|
59299
|
-
idx_t other_position = conditions_p.size();
|
|
59300
|
-
for (idx_t i = 0; i < conditions_p.size(); ++i) {
|
|
59301
|
-
switch (conditions_p[i].comparison) {
|
|
59302
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
|
59303
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
59304
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
|
59305
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
59306
|
-
conditions[range_position++] = std::move(conditions_p[i]);
|
|
59307
|
-
break;
|
|
59308
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
|
59309
|
-
case ExpressionType::COMPARE_DISTINCT_FROM:
|
|
59310
|
-
// Allowed in multi-predicate joins, but can't be first/sort.
|
|
59311
|
-
conditions[--other_position] = std::move(conditions_p[i]);
|
|
59312
|
-
break;
|
|
59313
|
-
default:
|
|
59314
|
-
// COMPARE EQUAL not supported with iejoin join
|
|
59315
|
-
throw NotImplementedException("Unimplemented join type for IEJoin");
|
|
59316
|
-
}
|
|
59317
|
-
}
|
|
59318
|
-
|
|
59319
|
-
// IEJoin requires at least two comparisons.
|
|
59320
|
-
D_ASSERT(range_position > 1);
|
|
59420
|
+
: PhysicalRangeJoin(op, PhysicalOperatorType::IE_JOIN, move(left), move(right), move(cond), join_type,
|
|
59421
|
+
estimated_cardinality) {
|
|
59321
59422
|
|
|
59322
59423
|
// 1. let L1 (resp. L2) be the array of column X (resp. Y)
|
|
59323
59424
|
D_ASSERT(conditions.size() >= 2);
|
|
@@ -59342,9 +59443,12 @@ PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator>
|
|
|
59342
59443
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
59343
59444
|
sense = i ? OrderType::ASCENDING : OrderType::DESCENDING;
|
|
59344
59445
|
break;
|
|
59345
|
-
|
|
59446
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
|
59447
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
59346
59448
|
sense = i ? OrderType::DESCENDING : OrderType::ASCENDING;
|
|
59347
59449
|
break;
|
|
59450
|
+
default:
|
|
59451
|
+
throw NotImplementedException("Unimplemented join type for IEJoin");
|
|
59348
59452
|
}
|
|
59349
59453
|
lhs_orders[i].emplace_back(BoundOrderByNode(sense, OrderByNullType::NULLS_LAST, move(left)));
|
|
59350
59454
|
rhs_orders[i].emplace_back(BoundOrderByNode(sense, OrderByNullType::NULLS_LAST, move(right)));
|
|
@@ -59355,9 +59459,6 @@ PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator>
|
|
|
59355
59459
|
D_ASSERT(cond.left->return_type == cond.right->return_type);
|
|
59356
59460
|
join_key_types.push_back(cond.left->return_type);
|
|
59357
59461
|
}
|
|
59358
|
-
|
|
59359
|
-
children.push_back(move(left));
|
|
59360
|
-
children.push_back(move(right));
|
|
59361
59462
|
}
|
|
59362
59463
|
|
|
59363
59464
|
//===--------------------------------------------------------------------===//
|
|
@@ -59365,193 +59466,19 @@ PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator>
|
|
|
59365
59466
|
//===--------------------------------------------------------------------===//
|
|
59366
59467
|
class IEJoinLocalState : public LocalSinkState {
|
|
59367
59468
|
public:
|
|
59368
|
-
|
|
59369
|
-
// Initialize order clause expression executor and key DataChunk
|
|
59370
|
-
vector<LogicalType> types;
|
|
59371
|
-
for (const auto &cond : conditions) {
|
|
59372
|
-
comparisons.emplace_back(cond.comparison);
|
|
59469
|
+
using LocalSortedTable = PhysicalRangeJoin::LocalSortedTable;
|
|
59373
59470
|
|
|
59374
|
-
|
|
59375
|
-
executor.AddExpression(*expr);
|
|
59376
|
-
|
|
59377
|
-
types.push_back(expr->return_type);
|
|
59378
|
-
}
|
|
59379
|
-
keys.Initialize(types);
|
|
59471
|
+
IEJoinLocalState(const PhysicalRangeJoin &op, const idx_t child) : table(op, child) {
|
|
59380
59472
|
}
|
|
59381
59473
|
|
|
59382
59474
|
//! The local sort state
|
|
59383
|
-
|
|
59384
|
-
//! Local copy of the sorting expression executor
|
|
59385
|
-
ExpressionExecutor executor;
|
|
59386
|
-
//! Holds a vector of incoming sorting columns
|
|
59387
|
-
DataChunk keys;
|
|
59388
|
-
//! The comparison list (for null merging)
|
|
59389
|
-
vector<ExpressionType> comparisons;
|
|
59390
|
-
//! The number of NULL values
|
|
59391
|
-
idx_t has_null;
|
|
59392
|
-
//! The total number of rows
|
|
59393
|
-
idx_t count;
|
|
59394
|
-
|
|
59395
|
-
idx_t MergeKeyNulls();
|
|
59396
|
-
|
|
59397
|
-
void Sink(DataChunk &input, GlobalSortState &global_sort_state) {
|
|
59398
|
-
// Initialize local state (if necessary)
|
|
59399
|
-
if (!local_sort_state.initialized) {
|
|
59400
|
-
local_sort_state.Initialize(global_sort_state, global_sort_state.buffer_manager);
|
|
59401
|
-
}
|
|
59402
|
-
|
|
59403
|
-
// Obtain sorting columns
|
|
59404
|
-
keys.Reset();
|
|
59405
|
-
executor.Execute(input, keys);
|
|
59406
|
-
|
|
59407
|
-
// Count the NULLs so we can exclude them later
|
|
59408
|
-
has_null += MergeKeyNulls();
|
|
59409
|
-
count += keys.size();
|
|
59410
|
-
|
|
59411
|
-
// Sink the data into the local sort state
|
|
59412
|
-
D_ASSERT(keys.ColumnCount() > 1);
|
|
59413
|
-
// Only sort the primary key
|
|
59414
|
-
DataChunk join_head;
|
|
59415
|
-
join_head.data.emplace_back(Vector(keys.data[0]));
|
|
59416
|
-
join_head.SetCardinality(keys.size());
|
|
59417
|
-
|
|
59418
|
-
local_sort_state.SinkChunk(join_head, input);
|
|
59419
|
-
}
|
|
59420
|
-
|
|
59421
|
-
void Sort(GlobalSortState &gss) {
|
|
59422
|
-
local_sort_state.Sort(gss, true);
|
|
59423
|
-
}
|
|
59424
|
-
void Reset() {
|
|
59425
|
-
has_null = 0;
|
|
59426
|
-
count = 0;
|
|
59427
|
-
}
|
|
59475
|
+
LocalSortedTable table;
|
|
59428
59476
|
};
|
|
59429
59477
|
|
|
59430
|
-
|
|
59431
|
-
// Merge the validity masks of the comparison keys into the primary
|
|
59432
|
-
// Return the number of NULLs in the resulting chunk
|
|
59433
|
-
D_ASSERT(keys.ColumnCount() > 0);
|
|
59434
|
-
const auto count = keys.size();
|
|
59435
|
-
|
|
59436
|
-
size_t all_constant = 0;
|
|
59437
|
-
for (auto &v : keys.data) {
|
|
59438
|
-
all_constant += int(v.GetVectorType() == VectorType::CONSTANT_VECTOR);
|
|
59439
|
-
}
|
|
59440
|
-
|
|
59441
|
-
auto &primary = keys.data[0];
|
|
59442
|
-
if (all_constant == keys.data.size()) {
|
|
59443
|
-
// Either all NULL or no NULLs
|
|
59444
|
-
for (auto &v : keys.data) {
|
|
59445
|
-
if (ConstantVector::IsNull(v)) {
|
|
59446
|
-
ConstantVector::SetNull(primary, true);
|
|
59447
|
-
return count;
|
|
59448
|
-
}
|
|
59449
|
-
}
|
|
59450
|
-
return 0;
|
|
59451
|
-
} else if (keys.ColumnCount() > 1) {
|
|
59452
|
-
// Normalify the primary, as it will need to merge arbitrary validity masks
|
|
59453
|
-
primary.Normalify(count);
|
|
59454
|
-
auto &pvalidity = FlatVector::Validity(primary);
|
|
59455
|
-
D_ASSERT(keys.ColumnCount() == comparisons.size());
|
|
59456
|
-
for (size_t c = 1; c < keys.data.size(); ++c) {
|
|
59457
|
-
// Skip comparisons that accept NULLs
|
|
59458
|
-
if (comparisons[c] == ExpressionType::COMPARE_DISTINCT_FROM) {
|
|
59459
|
-
continue;
|
|
59460
|
-
}
|
|
59461
|
-
// Orrify the rest, as the sort code will do this anyway.
|
|
59462
|
-
auto &v = keys.data[c];
|
|
59463
|
-
VectorData vdata;
|
|
59464
|
-
v.Orrify(count, vdata);
|
|
59465
|
-
auto &vvalidity = vdata.validity;
|
|
59466
|
-
if (vvalidity.AllValid()) {
|
|
59467
|
-
continue;
|
|
59468
|
-
}
|
|
59469
|
-
pvalidity.EnsureWritable();
|
|
59470
|
-
auto pmask = pvalidity.GetData();
|
|
59471
|
-
if (v.GetVectorType() == VectorType::FLAT_VECTOR) {
|
|
59472
|
-
// Merge entire entries
|
|
59473
|
-
const auto entry_count = pvalidity.EntryCount(count);
|
|
59474
|
-
for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
|
|
59475
|
-
pmask[entry_idx] &= vvalidity.GetValidityEntry(entry_idx);
|
|
59476
|
-
}
|
|
59477
|
-
}
|
|
59478
|
-
}
|
|
59479
|
-
return count - pvalidity.CountValid(count);
|
|
59480
|
-
} else {
|
|
59481
|
-
return count - VectorOperations::CountNotNull(primary, count);
|
|
59482
|
-
}
|
|
59483
|
-
}
|
|
59484
|
-
|
|
59485
|
-
class IEJoinSortedTable {
|
|
59478
|
+
class IEJoinGlobalState : public GlobalSinkState {
|
|
59486
59479
|
public:
|
|
59487
|
-
|
|
59488
|
-
: global_sort_state(BufferManager::GetBufferManager(context), orders, payload_layout), has_null(0), count(0),
|
|
59489
|
-
memory_per_thread(0) {
|
|
59490
|
-
D_ASSERT(orders.size() == 1);
|
|
59491
|
-
|
|
59492
|
-
// Set external (can be force with the PRAGMA)
|
|
59493
|
-
auto &config = ClientConfig::GetConfig(context);
|
|
59494
|
-
global_sort_state.external = config.force_external;
|
|
59495
|
-
// Memory usage per thread should scale with max mem / num threads
|
|
59496
|
-
// We take 1/4th of this, to be conservative
|
|
59497
|
-
idx_t max_memory = global_sort_state.buffer_manager.GetMaxMemory();
|
|
59498
|
-
idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
59499
|
-
memory_per_thread = (max_memory / num_threads) / 4;
|
|
59500
|
-
}
|
|
59501
|
-
|
|
59502
|
-
inline idx_t Count() const {
|
|
59503
|
-
return count;
|
|
59504
|
-
}
|
|
59505
|
-
|
|
59506
|
-
inline idx_t BlockCount() const {
|
|
59507
|
-
if (global_sort_state.sorted_blocks.empty()) {
|
|
59508
|
-
return 0;
|
|
59509
|
-
}
|
|
59510
|
-
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
|
59511
|
-
return global_sort_state.sorted_blocks[0]->radix_sorting_data.size();
|
|
59512
|
-
}
|
|
59480
|
+
using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
59513
59481
|
|
|
59514
|
-
inline idx_t BlockSize(idx_t i) const {
|
|
59515
|
-
return global_sort_state.sorted_blocks[0]->radix_sorting_data[i].count;
|
|
59516
|
-
}
|
|
59517
|
-
|
|
59518
|
-
inline void Combine(IEJoinLocalState &lstate) {
|
|
59519
|
-
global_sort_state.AddLocalState(lstate.local_sort_state);
|
|
59520
|
-
has_null += lstate.has_null;
|
|
59521
|
-
count += lstate.count;
|
|
59522
|
-
}
|
|
59523
|
-
|
|
59524
|
-
inline void IntializeMatches() {
|
|
59525
|
-
found_match = unique_ptr<bool[]>(new bool[Count()]);
|
|
59526
|
-
memset(found_match.get(), 0, sizeof(bool) * Count());
|
|
59527
|
-
}
|
|
59528
|
-
|
|
59529
|
-
void Print() {
|
|
59530
|
-
PayloadScanner scanner(global_sort_state, false);
|
|
59531
|
-
DataChunk chunk;
|
|
59532
|
-
chunk.Initialize(scanner.GetPayloadTypes());
|
|
59533
|
-
for (;;) {
|
|
59534
|
-
scanner.Scan(chunk);
|
|
59535
|
-
const auto count = chunk.size();
|
|
59536
|
-
if (!count) {
|
|
59537
|
-
break;
|
|
59538
|
-
}
|
|
59539
|
-
chunk.Print();
|
|
59540
|
-
}
|
|
59541
|
-
}
|
|
59542
|
-
|
|
59543
|
-
GlobalSortState global_sort_state;
|
|
59544
|
-
//! Whether or not the RHS has NULL values
|
|
59545
|
-
atomic<idx_t> has_null;
|
|
59546
|
-
//! The total number of rows in the RHS
|
|
59547
|
-
atomic<idx_t> count;
|
|
59548
|
-
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
|
59549
|
-
unique_ptr<bool[]> found_match;
|
|
59550
|
-
//! Memory usage per thread
|
|
59551
|
-
idx_t memory_per_thread;
|
|
59552
|
-
};
|
|
59553
|
-
|
|
59554
|
-
class IEJoinGlobalState : public GlobalSinkState {
|
|
59555
59482
|
public:
|
|
59556
59483
|
IEJoinGlobalState(ClientContext &context, const PhysicalIEJoin &op) : child(0) {
|
|
59557
59484
|
tables.resize(2);
|
|
@@ -59559,13 +59486,13 @@ public:
|
|
|
59559
59486
|
lhs_layout.Initialize(op.children[0]->types);
|
|
59560
59487
|
vector<BoundOrderByNode> lhs_order;
|
|
59561
59488
|
lhs_order.emplace_back(op.lhs_orders[0][0].Copy());
|
|
59562
|
-
tables[0] = make_unique<
|
|
59489
|
+
tables[0] = make_unique<GlobalSortedTable>(context, lhs_order, lhs_layout);
|
|
59563
59490
|
|
|
59564
59491
|
RowLayout rhs_layout;
|
|
59565
59492
|
rhs_layout.Initialize(op.children[1]->types);
|
|
59566
59493
|
vector<BoundOrderByNode> rhs_order;
|
|
59567
59494
|
rhs_order.emplace_back(op.rhs_orders[0][0].Copy());
|
|
59568
|
-
tables[1] = make_unique<
|
|
59495
|
+
tables[1] = make_unique<GlobalSortedTable>(context, rhs_order, rhs_layout);
|
|
59569
59496
|
}
|
|
59570
59497
|
|
|
59571
59498
|
IEJoinGlobalState(IEJoinGlobalState &prev)
|
|
@@ -59575,10 +59502,10 @@ public:
|
|
|
59575
59502
|
void Sink(DataChunk &input, IEJoinLocalState &lstate) {
|
|
59576
59503
|
auto &table = *tables[child];
|
|
59577
59504
|
auto &global_sort_state = table.global_sort_state;
|
|
59578
|
-
auto &local_sort_state = lstate.local_sort_state;
|
|
59505
|
+
auto &local_sort_state = lstate.table.local_sort_state;
|
|
59579
59506
|
|
|
59580
59507
|
// Sink the data into the local sort state
|
|
59581
|
-
lstate.Sink(input, global_sort_state);
|
|
59508
|
+
lstate.table.Sink(input, global_sort_state);
|
|
59582
59509
|
|
|
59583
59510
|
// When sorting data reaches a certain size, we sort it
|
|
59584
59511
|
if (local_sort_state.SizeInBytes() >= table.memory_per_thread) {
|
|
@@ -59586,7 +59513,7 @@ public:
|
|
|
59586
59513
|
}
|
|
59587
59514
|
}
|
|
59588
59515
|
|
|
59589
|
-
vector<unique_ptr<
|
|
59516
|
+
vector<unique_ptr<GlobalSortedTable>> tables;
|
|
59590
59517
|
size_t child;
|
|
59591
59518
|
};
|
|
59592
59519
|
|
|
@@ -59601,7 +59528,7 @@ unique_ptr<LocalSinkState> PhysicalIEJoin::GetLocalSinkState(ExecutionContext &c
|
|
|
59601
59528
|
const auto &ie_sink = (IEJoinGlobalState &)*sink_state;
|
|
59602
59529
|
sink_child = ie_sink.child;
|
|
59603
59530
|
}
|
|
59604
|
-
return make_unique<IEJoinLocalState>(
|
|
59531
|
+
return make_unique<IEJoinLocalState>(*this, sink_child);
|
|
59605
59532
|
}
|
|
59606
59533
|
|
|
59607
59534
|
SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
|
|
@@ -59617,80 +59544,16 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, GlobalSinkState &
|
|
|
59617
59544
|
void PhysicalIEJoin::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
|
|
59618
59545
|
auto &gstate = (IEJoinGlobalState &)gstate_p;
|
|
59619
59546
|
auto &lstate = (IEJoinLocalState &)lstate_p;
|
|
59620
|
-
gstate.tables[gstate.child]->Combine(lstate);
|
|
59547
|
+
gstate.tables[gstate.child]->Combine(lstate.table);
|
|
59621
59548
|
auto &client_profiler = QueryProfiler::Get(context.client);
|
|
59622
59549
|
|
|
59623
|
-
context.thread.profiler.Flush(this, &lstate.executor, gstate.child ? "rhs_executor" : "lhs_executor", 1);
|
|
59550
|
+
context.thread.profiler.Flush(this, &lstate.table.executor, gstate.child ? "rhs_executor" : "lhs_executor", 1);
|
|
59624
59551
|
client_profiler.Flush(context.thread.profiler);
|
|
59625
59552
|
}
|
|
59626
59553
|
|
|
59627
59554
|
//===--------------------------------------------------------------------===//
|
|
59628
59555
|
// Finalize
|
|
59629
59556
|
//===--------------------------------------------------------------------===//
|
|
59630
|
-
class IEJoinFinalizeTask : public ExecutorTask {
|
|
59631
|
-
public:
|
|
59632
|
-
IEJoinFinalizeTask(shared_ptr<Event> event_p, ClientContext &context, IEJoinSortedTable &table)
|
|
59633
|
-
: ExecutorTask(context), event(move(event_p)), context(context), table(table) {
|
|
59634
|
-
}
|
|
59635
|
-
|
|
59636
|
-
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
59637
|
-
// Initialize iejoin sorted and iterate until done
|
|
59638
|
-
auto &global_sort_state = table.global_sort_state;
|
|
59639
|
-
MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context));
|
|
59640
|
-
merge_sorter.PerformInMergeRound();
|
|
59641
|
-
event->FinishTask();
|
|
59642
|
-
|
|
59643
|
-
return TaskExecutionResult::TASK_FINISHED;
|
|
59644
|
-
}
|
|
59645
|
-
|
|
59646
|
-
private:
|
|
59647
|
-
shared_ptr<Event> event;
|
|
59648
|
-
ClientContext &context;
|
|
59649
|
-
IEJoinSortedTable &table;
|
|
59650
|
-
};
|
|
59651
|
-
|
|
59652
|
-
class IEJoinFinalizeEvent : public Event {
|
|
59653
|
-
public:
|
|
59654
|
-
IEJoinFinalizeEvent(IEJoinSortedTable &table_p, Pipeline &pipeline_p)
|
|
59655
|
-
: Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
|
|
59656
|
-
}
|
|
59657
|
-
|
|
59658
|
-
IEJoinSortedTable &table;
|
|
59659
|
-
Pipeline &pipeline;
|
|
59660
|
-
|
|
59661
|
-
public:
|
|
59662
|
-
void Schedule() override {
|
|
59663
|
-
auto &context = pipeline.GetClientContext();
|
|
59664
|
-
|
|
59665
|
-
// Schedule tasks equal to the number of threads, which will each iejoin multiple partitions
|
|
59666
|
-
auto &ts = TaskScheduler::GetScheduler(context);
|
|
59667
|
-
idx_t num_threads = ts.NumberOfThreads();
|
|
59668
|
-
|
|
59669
|
-
vector<unique_ptr<Task>> iejoin_tasks;
|
|
59670
|
-
for (idx_t tnum = 0; tnum < num_threads; tnum++) {
|
|
59671
|
-
iejoin_tasks.push_back(make_unique<IEJoinFinalizeTask>(shared_from_this(), context, table));
|
|
59672
|
-
}
|
|
59673
|
-
SetTasks(move(iejoin_tasks));
|
|
59674
|
-
}
|
|
59675
|
-
|
|
59676
|
-
void FinishEvent() override {
|
|
59677
|
-
auto &global_sort_state = table.global_sort_state;
|
|
59678
|
-
|
|
59679
|
-
global_sort_state.CompleteMergeRound(true);
|
|
59680
|
-
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
59681
|
-
// Multiple blocks remaining: Schedule the next round
|
|
59682
|
-
PhysicalIEJoin::ScheduleMergeTasks(pipeline, *this, table);
|
|
59683
|
-
}
|
|
59684
|
-
}
|
|
59685
|
-
};
|
|
59686
|
-
|
|
59687
|
-
void PhysicalIEJoin::ScheduleMergeTasks(Pipeline &pipeline, Event &event, IEJoinSortedTable &table) {
|
|
59688
|
-
// Initialize global sort state for a round of merging
|
|
59689
|
-
table.global_sort_state.InitializeMergeRound();
|
|
59690
|
-
auto new_event = make_shared<IEJoinFinalizeEvent>(table, pipeline);
|
|
59691
|
-
event.InsertEvent(move(new_event));
|
|
59692
|
-
}
|
|
59693
|
-
|
|
59694
59557
|
SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
59695
59558
|
GlobalSinkState &gstate_p) const {
|
|
59696
59559
|
auto &gstate = (IEJoinGlobalState &)gstate_p;
|
|
@@ -59706,14 +59569,10 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
|
59706
59569
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
|
59707
59570
|
}
|
|
59708
59571
|
|
|
59709
|
-
//
|
|
59710
|
-
|
|
59711
|
-
|
|
59712
|
-
// Start the iejoin phase or finish if a iejoin is not necessary
|
|
59713
|
-
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
59714
|
-
PhysicalIEJoin::ScheduleMergeTasks(pipeline, event, table);
|
|
59715
|
-
}
|
|
59572
|
+
// Sort the current input child
|
|
59573
|
+
table.Finalize(pipeline, event);
|
|
59716
59574
|
|
|
59575
|
+
// Move to the next input child
|
|
59717
59576
|
++gstate.child;
|
|
59718
59577
|
|
|
59719
59578
|
return SinkFinalizeType::READY;
|
|
@@ -59722,6 +59581,14 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
|
59722
59581
|
//===--------------------------------------------------------------------===//
|
|
59723
59582
|
// Operator
|
|
59724
59583
|
//===--------------------------------------------------------------------===//
|
|
59584
|
+
OperatorResultType PhysicalIEJoin::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
|
59585
|
+
GlobalOperatorState &gstate, OperatorState &state) const {
|
|
59586
|
+
return OperatorResultType::FINISHED;
|
|
59587
|
+
}
|
|
59588
|
+
|
|
59589
|
+
//===--------------------------------------------------------------------===//
|
|
59590
|
+
// Source
|
|
59591
|
+
//===--------------------------------------------------------------------===//
|
|
59725
59592
|
struct SBIterator {
|
|
59726
59593
|
static int ComparisonValue(ExpressionType comparison) {
|
|
59727
59594
|
switch (comparison) {
|
|
@@ -59821,7 +59688,7 @@ struct SBIterator {
|
|
|
59821
59688
|
};
|
|
59822
59689
|
|
|
59823
59690
|
struct IEJoinUnion {
|
|
59824
|
-
using SortedTable =
|
|
59691
|
+
using SortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
59825
59692
|
|
|
59826
59693
|
static idx_t AppendKey(SortedTable &table, ExpressionExecutor &executor, SortedTable &marked, int64_t increment,
|
|
59827
59694
|
int64_t base, const idx_t block_idx);
|
|
@@ -60262,61 +60129,11 @@ idx_t IEJoinUnion::JoinComplexBlocks(SelectionVector &lsel, SelectionVector &rse
|
|
|
60262
60129
|
|
|
60263
60130
|
class IEJoinState : public OperatorState {
|
|
60264
60131
|
public:
|
|
60265
|
-
explicit IEJoinState(const PhysicalIEJoin &op) : local_left(op
|
|
60132
|
+
explicit IEJoinState(const PhysicalIEJoin &op) : local_left(op, 0) {};
|
|
60266
60133
|
|
|
60267
60134
|
IEJoinLocalState local_left;
|
|
60268
60135
|
};
|
|
60269
60136
|
|
|
60270
|
-
static void SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
|
|
60271
|
-
const SelectionVector &result, const idx_t result_count, const idx_t left_cols = 0) {
|
|
60272
|
-
// There should only be one sorted block if they have been sorted
|
|
60273
|
-
D_ASSERT(state.sorted_blocks.size() == 1);
|
|
60274
|
-
SBScanState read_state(state.buffer_manager, state);
|
|
60275
|
-
read_state.sb = state.sorted_blocks[0].get();
|
|
60276
|
-
auto &sorted_data = *read_state.sb->payload_data;
|
|
60277
|
-
|
|
60278
|
-
read_state.SetIndices(block_idx, 0);
|
|
60279
|
-
read_state.PinData(sorted_data);
|
|
60280
|
-
const auto data_ptr = read_state.DataPtr(sorted_data);
|
|
60281
|
-
|
|
60282
|
-
// Set up a batch of pointers to scan data from
|
|
60283
|
-
Vector addresses(LogicalType::POINTER, result_count);
|
|
60284
|
-
auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
|
|
60285
|
-
|
|
60286
|
-
// Set up the data pointers for the values that are actually referenced
|
|
60287
|
-
const idx_t &row_width = sorted_data.layout.GetRowWidth();
|
|
60288
|
-
|
|
60289
|
-
auto prev_idx = result.get_index(0);
|
|
60290
|
-
SelectionVector gsel(result_count);
|
|
60291
|
-
idx_t addr_count = 0;
|
|
60292
|
-
gsel.set_index(0, addr_count);
|
|
60293
|
-
data_pointers[addr_count] = data_ptr + prev_idx * row_width;
|
|
60294
|
-
for (idx_t i = 1; i < result_count; ++i) {
|
|
60295
|
-
const auto row_idx = result.get_index(i);
|
|
60296
|
-
if (row_idx != prev_idx) {
|
|
60297
|
-
data_pointers[++addr_count] = data_ptr + row_idx * row_width;
|
|
60298
|
-
prev_idx = row_idx;
|
|
60299
|
-
}
|
|
60300
|
-
gsel.set_index(i, addr_count);
|
|
60301
|
-
}
|
|
60302
|
-
++addr_count;
|
|
60303
|
-
|
|
60304
|
-
// Unswizzle the offsets back to pointers (if needed)
|
|
60305
|
-
if (!sorted_data.layout.AllConstant() && state.external) {
|
|
60306
|
-
RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle->Ptr(),
|
|
60307
|
-
addr_count);
|
|
60308
|
-
}
|
|
60309
|
-
|
|
60310
|
-
// Deserialize the payload data
|
|
60311
|
-
auto sel = FlatVector::IncrementalSelectionVector();
|
|
60312
|
-
for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) {
|
|
60313
|
-
const auto col_offset = sorted_data.layout.GetOffsets()[col_idx];
|
|
60314
|
-
auto &col = payload.data[left_cols + col_idx];
|
|
60315
|
-
RowOperations::Gather(addresses, *sel, col, *sel, addr_count, col_offset, col_idx);
|
|
60316
|
-
col.Slice(gsel, result_count);
|
|
60317
|
-
}
|
|
60318
|
-
}
|
|
60319
|
-
|
|
60320
60137
|
class IEJoinLocalSourceState : public LocalSourceState {
|
|
60321
60138
|
public:
|
|
60322
60139
|
explicit IEJoinLocalSourceState(const PhysicalIEJoin &op)
|
|
@@ -60342,9 +60159,6 @@ public:
|
|
|
60342
60159
|
right_keys.Initialize(right_types);
|
|
60343
60160
|
}
|
|
60344
60161
|
|
|
60345
|
-
idx_t SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right, const SelectionVector *sel,
|
|
60346
|
-
idx_t count);
|
|
60347
|
-
|
|
60348
60162
|
idx_t SelectOuterRows(bool *matches) {
|
|
60349
60163
|
idx_t count = 0;
|
|
60350
60164
|
for (; outer_idx < outer_count; ++outer_idx) {
|
|
@@ -60386,30 +60200,6 @@ public:
|
|
|
60386
60200
|
bool *right_matches;
|
|
60387
60201
|
};
|
|
60388
60202
|
|
|
60389
|
-
idx_t IEJoinLocalSourceState::SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right,
|
|
60390
|
-
const SelectionVector *sel, idx_t count) {
|
|
60391
|
-
switch (condition) {
|
|
60392
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
|
60393
|
-
return VectorOperations::NotEquals(left, right, sel, count, &true_sel, nullptr);
|
|
60394
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
|
60395
|
-
return VectorOperations::LessThan(left, right, sel, count, &true_sel, nullptr);
|
|
60396
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
|
60397
|
-
return VectorOperations::GreaterThan(left, right, sel, count, &true_sel, nullptr);
|
|
60398
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
60399
|
-
return VectorOperations::LessThanEquals(left, right, sel, count, &true_sel, nullptr);
|
|
60400
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
60401
|
-
return VectorOperations::GreaterThanEquals(left, right, sel, count, &true_sel, nullptr);
|
|
60402
|
-
case ExpressionType::COMPARE_DISTINCT_FROM:
|
|
60403
|
-
return VectorOperations::DistinctFrom(left, right, sel, count, &true_sel, nullptr);
|
|
60404
|
-
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
|
60405
|
-
case ExpressionType::COMPARE_EQUAL:
|
|
60406
|
-
default:
|
|
60407
|
-
throw InternalException("Unsupported comparison type for PhysicalIEJoin");
|
|
60408
|
-
}
|
|
60409
|
-
|
|
60410
|
-
return count;
|
|
60411
|
-
}
|
|
60412
|
-
|
|
60413
60203
|
void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, LocalSourceState &state_p) const {
|
|
60414
60204
|
auto &state = (IEJoinLocalSourceState &)state_p;
|
|
60415
60205
|
auto &ie_sink = (IEJoinGlobalState &)*sink_state;
|
|
@@ -60446,6 +60236,7 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
|
|
|
60446
60236
|
state.right_executor.SetChunk(right_chunk);
|
|
60447
60237
|
|
|
60448
60238
|
auto tail_count = result_count;
|
|
60239
|
+
auto true_sel = &state.true_sel;
|
|
60449
60240
|
for (size_t cmp_idx = 0; cmp_idx < tail_cols; ++cmp_idx) {
|
|
60450
60241
|
auto &left = state.left_keys.data[cmp_idx];
|
|
60451
60242
|
state.left_executor.ExecuteExpression(cmp_idx, left);
|
|
@@ -60457,8 +60248,8 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
|
|
|
60457
60248
|
left.Slice(*sel, tail_count);
|
|
60458
60249
|
right.Slice(*sel, tail_count);
|
|
60459
60250
|
}
|
|
60460
|
-
tail_count =
|
|
60461
|
-
sel =
|
|
60251
|
+
tail_count = SelectJoinTail(conditions[cmp_idx + 2].comparison, left, right, sel, tail_count, true_sel);
|
|
60252
|
+
sel = true_sel;
|
|
60462
60253
|
}
|
|
60463
60254
|
chunk.Fuse(right_chunk);
|
|
60464
60255
|
|
|
@@ -60483,14 +60274,6 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
|
|
|
60483
60274
|
} while (chunk.size() == 0);
|
|
60484
60275
|
}
|
|
60485
60276
|
|
|
60486
|
-
OperatorResultType PhysicalIEJoin::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
|
60487
|
-
GlobalOperatorState &gstate, OperatorState &state) const {
|
|
60488
|
-
return OperatorResultType::FINISHED;
|
|
60489
|
-
}
|
|
60490
|
-
|
|
60491
|
-
//===--------------------------------------------------------------------===//
|
|
60492
|
-
// Source
|
|
60493
|
-
//===--------------------------------------------------------------------===//
|
|
60494
60277
|
class IEJoinGlobalSourceState : public GlobalSourceState {
|
|
60495
60278
|
public:
|
|
60496
60279
|
explicit IEJoinGlobalSourceState(const PhysicalIEJoin &op)
|
|
@@ -61631,7 +61414,7 @@ class MergeJoinGlobalState;
|
|
|
61631
61414
|
|
|
61632
61415
|
//! PhysicalPiecewiseMergeJoin represents a piecewise merge loop join between
|
|
61633
61416
|
//! two tables
|
|
61634
|
-
class PhysicalPiecewiseMergeJoin : public
|
|
61417
|
+
class PhysicalPiecewiseMergeJoin : public PhysicalRangeJoin {
|
|
61635
61418
|
public:
|
|
61636
61419
|
PhysicalPiecewiseMergeJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
|
|
61637
61420
|
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
|
|
@@ -61678,9 +61461,6 @@ public:
|
|
|
61678
61461
|
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
61679
61462
|
GlobalSinkState &gstate) const override;
|
|
61680
61463
|
|
|
61681
|
-
//! Schedules tasks to merge sort the RHS data during the Finalize phase
|
|
61682
|
-
static void ScheduleMergeTasks(Pipeline &pipeline, Event &event, MergeJoinGlobalState &state);
|
|
61683
|
-
|
|
61684
61464
|
bool IsSink() const override {
|
|
61685
61465
|
return true;
|
|
61686
61466
|
}
|
|
@@ -61715,29 +61495,8 @@ namespace duckdb {
|
|
|
61715
61495
|
PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
|
|
61716
61496
|
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond,
|
|
61717
61497
|
JoinType join_type, idx_t estimated_cardinality)
|
|
61718
|
-
:
|
|
61719
|
-
|
|
61720
|
-
// Reorder the conditions so that ranges are at the front.
|
|
61721
|
-
// TODO: use stats to improve the choice?
|
|
61722
|
-
if (conditions.size() > 1) {
|
|
61723
|
-
auto conditions_p = std::move(conditions);
|
|
61724
|
-
conditions.resize(conditions_p.size());
|
|
61725
|
-
idx_t range_position = 0;
|
|
61726
|
-
idx_t other_position = conditions_p.size();
|
|
61727
|
-
for (idx_t i = 0; i < conditions_p.size(); ++i) {
|
|
61728
|
-
switch (conditions_p[i].comparison) {
|
|
61729
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
|
61730
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
61731
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
|
61732
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
61733
|
-
conditions[range_position++] = std::move(conditions_p[i]);
|
|
61734
|
-
break;
|
|
61735
|
-
default:
|
|
61736
|
-
conditions[--other_position] = std::move(conditions_p[i]);
|
|
61737
|
-
break;
|
|
61738
|
-
}
|
|
61739
|
-
}
|
|
61740
|
-
}
|
|
61498
|
+
: PhysicalRangeJoin(op, PhysicalOperatorType::PIECEWISE_MERGE_JOIN, move(left), move(right), move(cond), join_type,
|
|
61499
|
+
estimated_cardinality) {
|
|
61741
61500
|
|
|
61742
61501
|
for (auto &cond : conditions) {
|
|
61743
61502
|
D_ASSERT(cond.left->return_type == cond.right->return_type);
|
|
@@ -61770,172 +61529,60 @@ PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, uniq
|
|
|
61770
61529
|
throw NotImplementedException("Unimplemented join type for merge join");
|
|
61771
61530
|
}
|
|
61772
61531
|
}
|
|
61773
|
-
children.push_back(move(left));
|
|
61774
|
-
children.push_back(move(right));
|
|
61775
61532
|
}
|
|
61776
61533
|
|
|
61777
61534
|
//===--------------------------------------------------------------------===//
|
|
61778
61535
|
// Sink
|
|
61779
61536
|
//===--------------------------------------------------------------------===//
|
|
61780
|
-
class
|
|
61537
|
+
class MergeJoinLocalState : public LocalSinkState {
|
|
61781
61538
|
public:
|
|
61782
|
-
|
|
61783
|
-
: rhs_global_sort_state(buffer_manager, orders, rhs_layout), rhs_has_null(0), rhs_count(0),
|
|
61784
|
-
memory_per_thread(0) {
|
|
61785
|
-
D_ASSERT(orders.size() == 1);
|
|
61786
|
-
}
|
|
61787
|
-
|
|
61788
|
-
inline idx_t Count() const {
|
|
61789
|
-
return rhs_count;
|
|
61539
|
+
explicit MergeJoinLocalState(const PhysicalRangeJoin &op, const idx_t child) : table(op, child) {
|
|
61790
61540
|
}
|
|
61791
61541
|
|
|
61792
|
-
//! The
|
|
61793
|
-
|
|
61794
|
-
//! Global sort state
|
|
61795
|
-
GlobalSortState rhs_global_sort_state;
|
|
61796
|
-
//! Whether or not the RHS has NULL values
|
|
61797
|
-
idx_t rhs_has_null;
|
|
61798
|
-
//! The total number of rows in the RHS
|
|
61799
|
-
idx_t rhs_count;
|
|
61800
|
-
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
|
61801
|
-
unique_ptr<bool[]> rhs_found_match;
|
|
61802
|
-
//! Memory usage per thread
|
|
61803
|
-
idx_t memory_per_thread;
|
|
61542
|
+
//! The local sort state
|
|
61543
|
+
PhysicalRangeJoin::LocalSortedTable table;
|
|
61804
61544
|
};
|
|
61805
61545
|
|
|
61806
|
-
|
|
61807
|
-
|
|
61808
|
-
|
|
61809
|
-
rhs_layout.Initialize(children[1]->types);
|
|
61810
|
-
vector<BoundOrderByNode> rhs_order;
|
|
61811
|
-
rhs_order.emplace_back(rhs_orders[0].Copy());
|
|
61812
|
-
auto state = make_unique<MergeJoinGlobalState>(BufferManager::GetBufferManager(context), rhs_order, rhs_layout);
|
|
61813
|
-
// Set external (can be force with the PRAGMA)
|
|
61814
|
-
auto &config = ClientConfig::GetConfig(context);
|
|
61815
|
-
state->rhs_global_sort_state.external = config.force_external;
|
|
61816
|
-
// Memory usage per thread should scale with max mem / num threads
|
|
61817
|
-
// We take 1/4th of this, to be conservative
|
|
61818
|
-
idx_t max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
|
|
61819
|
-
idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
61820
|
-
state->memory_per_thread = (max_memory / num_threads) / 4;
|
|
61821
|
-
return move(state);
|
|
61822
|
-
}
|
|
61546
|
+
class MergeJoinGlobalState : public GlobalSinkState {
|
|
61547
|
+
public:
|
|
61548
|
+
using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
61823
61549
|
|
|
61824
|
-
class MergeJoinLocalState : public LocalSinkState {
|
|
61825
61550
|
public:
|
|
61826
|
-
|
|
61551
|
+
MergeJoinGlobalState(ClientContext &context, const PhysicalPiecewiseMergeJoin &op) {
|
|
61552
|
+
RowLayout rhs_layout;
|
|
61553
|
+
rhs_layout.Initialize(op.children[1]->types);
|
|
61554
|
+
vector<BoundOrderByNode> rhs_order;
|
|
61555
|
+
rhs_order.emplace_back(op.rhs_orders[0].Copy());
|
|
61556
|
+
table = make_unique<GlobalSortedTable>(context, rhs_order, rhs_layout);
|
|
61827
61557
|
}
|
|
61828
61558
|
|
|
61829
|
-
|
|
61830
|
-
|
|
61831
|
-
//! Local copy of the sorting expression executor
|
|
61832
|
-
ExpressionExecutor rhs_executor;
|
|
61833
|
-
//! Holds a vector of incoming sorting columns
|
|
61834
|
-
DataChunk rhs_keys;
|
|
61835
|
-
//! Whether or not the RHS has NULL values
|
|
61836
|
-
idx_t rhs_has_null;
|
|
61837
|
-
//! The total number of rows in the RHS
|
|
61838
|
-
idx_t rhs_count;
|
|
61839
|
-
};
|
|
61840
|
-
|
|
61841
|
-
unique_ptr<LocalSinkState> PhysicalPiecewiseMergeJoin::GetLocalSinkState(ExecutionContext &context) const {
|
|
61842
|
-
auto result = make_unique<MergeJoinLocalState>();
|
|
61843
|
-
// Initialize order clause expression executor and DataChunk
|
|
61844
|
-
vector<LogicalType> types;
|
|
61845
|
-
for (auto &order : rhs_orders) {
|
|
61846
|
-
types.push_back(order.expression->return_type);
|
|
61847
|
-
result->rhs_executor.AddExpression(*order.expression);
|
|
61559
|
+
inline idx_t Count() const {
|
|
61560
|
+
return table->count;
|
|
61848
61561
|
}
|
|
61849
|
-
result->rhs_keys.Initialize(types);
|
|
61850
|
-
return move(result);
|
|
61851
|
-
}
|
|
61852
61562
|
|
|
61853
|
-
|
|
61854
|
-
|
|
61855
|
-
|
|
61856
|
-
D_ASSERT(keys.ColumnCount() > 0);
|
|
61857
|
-
const auto count = keys.size();
|
|
61563
|
+
void Sink(DataChunk &input, MergeJoinLocalState &lstate) {
|
|
61564
|
+
auto &global_sort_state = table->global_sort_state;
|
|
61565
|
+
auto &local_sort_state = lstate.table.local_sort_state;
|
|
61858
61566
|
|
|
61859
|
-
|
|
61860
|
-
|
|
61861
|
-
if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
|
61862
|
-
++all_constant;
|
|
61863
|
-
}
|
|
61864
|
-
}
|
|
61567
|
+
// Sink the data into the local sort state
|
|
61568
|
+
lstate.table.Sink(input, global_sort_state);
|
|
61865
61569
|
|
|
61866
|
-
|
|
61867
|
-
|
|
61868
|
-
|
|
61869
|
-
for (auto &v : keys.data) {
|
|
61870
|
-
if (ConstantVector::IsNull(v)) {
|
|
61871
|
-
ConstantVector::SetNull(primary, true);
|
|
61872
|
-
return count;
|
|
61873
|
-
}
|
|
61874
|
-
}
|
|
61875
|
-
return 0;
|
|
61876
|
-
} else if (keys.ColumnCount() > 1) {
|
|
61877
|
-
// Normalify the primary, as it will need to merge arbitrary validity masks
|
|
61878
|
-
primary.Normalify(count);
|
|
61879
|
-
auto &pvalidity = FlatVector::Validity(primary);
|
|
61880
|
-
for (size_t c = 1; c < keys.data.size(); ++c) {
|
|
61881
|
-
// Skip comparisons that accept NULLs
|
|
61882
|
-
if (conditions[c].comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
|
|
61883
|
-
continue;
|
|
61884
|
-
}
|
|
61885
|
-
// Orrify the rest, as the sort code will do this anyway.
|
|
61886
|
-
auto &v = keys.data[c];
|
|
61887
|
-
VectorData vdata;
|
|
61888
|
-
v.Orrify(count, vdata);
|
|
61889
|
-
auto &vvalidity = vdata.validity;
|
|
61890
|
-
if (vvalidity.AllValid()) {
|
|
61891
|
-
continue;
|
|
61892
|
-
}
|
|
61893
|
-
pvalidity.EnsureWritable();
|
|
61894
|
-
switch (v.GetVectorType()) {
|
|
61895
|
-
case VectorType::FLAT_VECTOR: {
|
|
61896
|
-
// Merge entire entries
|
|
61897
|
-
auto pmask = pvalidity.GetData();
|
|
61898
|
-
const auto entry_count = pvalidity.EntryCount(count);
|
|
61899
|
-
for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
|
|
61900
|
-
pmask[entry_idx] &= vvalidity.GetValidityEntry(entry_idx);
|
|
61901
|
-
}
|
|
61902
|
-
break;
|
|
61903
|
-
}
|
|
61904
|
-
case VectorType::CONSTANT_VECTOR:
|
|
61905
|
-
// All or nothing
|
|
61906
|
-
if (ConstantVector::IsNull(v)) {
|
|
61907
|
-
pvalidity.SetAllInvalid(count);
|
|
61908
|
-
return count;
|
|
61909
|
-
}
|
|
61910
|
-
break;
|
|
61911
|
-
default:
|
|
61912
|
-
// One by one
|
|
61913
|
-
for (idx_t i = 0; i < count; ++i) {
|
|
61914
|
-
const auto idx = vdata.sel->get_index(i);
|
|
61915
|
-
if (!vvalidity.RowIsValidUnsafe(idx)) {
|
|
61916
|
-
pvalidity.SetInvalidUnsafe(i);
|
|
61917
|
-
}
|
|
61918
|
-
}
|
|
61919
|
-
break;
|
|
61920
|
-
}
|
|
61570
|
+
// When sorting data reaches a certain size, we sort it
|
|
61571
|
+
if (local_sort_state.SizeInBytes() >= table->memory_per_thread) {
|
|
61572
|
+
local_sort_state.Sort(global_sort_state, true);
|
|
61921
61573
|
}
|
|
61922
|
-
return count - pvalidity.CountValid(count);
|
|
61923
|
-
} else {
|
|
61924
|
-
return count - VectorOperations::CountNotNull(primary, count);
|
|
61925
61574
|
}
|
|
61926
|
-
}
|
|
61927
61575
|
|
|
61928
|
-
|
|
61929
|
-
|
|
61930
|
-
// Only sort the first key
|
|
61931
|
-
DataChunk join_head;
|
|
61932
|
-
join_head.data.emplace_back(Vector(join_keys.data[0]));
|
|
61933
|
-
join_head.SetCardinality(join_keys.size());
|
|
61576
|
+
unique_ptr<GlobalSortedTable> table;
|
|
61577
|
+
};
|
|
61934
61578
|
|
|
61935
|
-
|
|
61936
|
-
|
|
61937
|
-
|
|
61938
|
-
|
|
61579
|
+
unique_ptr<GlobalSinkState> PhysicalPiecewiseMergeJoin::GetGlobalSinkState(ClientContext &context) const {
|
|
61580
|
+
return make_unique<MergeJoinGlobalState>(context, *this);
|
|
61581
|
+
}
|
|
61582
|
+
|
|
61583
|
+
unique_ptr<LocalSinkState> PhysicalPiecewiseMergeJoin::GetLocalSinkState(ExecutionContext &context) const {
|
|
61584
|
+
// We only sink the RHS
|
|
61585
|
+
return make_unique<MergeJoinLocalState>(*this, 1);
|
|
61939
61586
|
}
|
|
61940
61587
|
|
|
61941
61588
|
SinkResultType PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, GlobalSinkState &gstate_p,
|
|
@@ -61943,30 +61590,8 @@ SinkResultType PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, Globa
|
|
|
61943
61590
|
auto &gstate = (MergeJoinGlobalState &)gstate_p;
|
|
61944
61591
|
auto &lstate = (MergeJoinLocalState &)lstate_p;
|
|
61945
61592
|
|
|
61946
|
-
|
|
61947
|
-
auto &local_sort_state = lstate.rhs_local_sort_state;
|
|
61948
|
-
|
|
61949
|
-
// Initialize local state (if necessary)
|
|
61950
|
-
if (!local_sort_state.initialized) {
|
|
61951
|
-
local_sort_state.Initialize(global_sort_state, BufferManager::GetBufferManager(context.client));
|
|
61952
|
-
}
|
|
61953
|
-
|
|
61954
|
-
// Obtain sorting columns
|
|
61955
|
-
auto &join_keys = lstate.rhs_keys;
|
|
61956
|
-
join_keys.Reset();
|
|
61957
|
-
lstate.rhs_executor.Execute(input, join_keys);
|
|
61958
|
-
|
|
61959
|
-
// Count the NULLs so we can exclude them later
|
|
61960
|
-
lstate.rhs_has_null += PiecewiseMergeNulls(join_keys, conditions);
|
|
61961
|
-
lstate.rhs_count += join_keys.size();
|
|
61962
|
-
|
|
61963
|
-
// Sink the data into the local sort state
|
|
61964
|
-
SinkPiecewiseMergeChunk(local_sort_state, join_keys, input);
|
|
61593
|
+
gstate.Sink(input, lstate);
|
|
61965
61594
|
|
|
61966
|
-
// When sorting data reaches a certain size, we sort it
|
|
61967
|
-
if (local_sort_state.SizeInBytes() >= gstate.memory_per_thread) {
|
|
61968
|
-
local_sort_state.Sort(global_sort_state, true);
|
|
61969
|
-
}
|
|
61970
61595
|
return SinkResultType::NEED_MORE_INPUT;
|
|
61971
61596
|
}
|
|
61972
61597
|
|
|
@@ -61974,105 +61599,33 @@ void PhysicalPiecewiseMergeJoin::Combine(ExecutionContext &context, GlobalSinkSt
|
|
|
61974
61599
|
LocalSinkState &lstate_p) const {
|
|
61975
61600
|
auto &gstate = (MergeJoinGlobalState &)gstate_p;
|
|
61976
61601
|
auto &lstate = (MergeJoinLocalState &)lstate_p;
|
|
61977
|
-
gstate.
|
|
61978
|
-
lock_guard<mutex> locked(gstate.lock);
|
|
61979
|
-
gstate.rhs_has_null += lstate.rhs_has_null;
|
|
61980
|
-
gstate.rhs_count += lstate.rhs_count;
|
|
61602
|
+
gstate.table->Combine(lstate.table);
|
|
61981
61603
|
auto &client_profiler = QueryProfiler::Get(context.client);
|
|
61982
61604
|
|
|
61983
|
-
context.thread.profiler.Flush(this, &lstate.
|
|
61605
|
+
context.thread.profiler.Flush(this, &lstate.table.executor, "rhs_executor", 1);
|
|
61984
61606
|
client_profiler.Flush(context.thread.profiler);
|
|
61985
61607
|
}
|
|
61986
61608
|
|
|
61987
61609
|
//===--------------------------------------------------------------------===//
|
|
61988
61610
|
// Finalize
|
|
61989
61611
|
//===--------------------------------------------------------------------===//
|
|
61990
|
-
class MergeJoinFinalizeTask : public ExecutorTask {
|
|
61991
|
-
public:
|
|
61992
|
-
MergeJoinFinalizeTask(shared_ptr<Event> event_p, ClientContext &context, MergeJoinGlobalState &state)
|
|
61993
|
-
: ExecutorTask(context), event(move(event_p)), context(context), state(state) {
|
|
61994
|
-
}
|
|
61995
|
-
|
|
61996
|
-
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
61997
|
-
// Initialize merge sorted and iterate until done
|
|
61998
|
-
auto &global_sort_state = state.rhs_global_sort_state;
|
|
61999
|
-
MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context));
|
|
62000
|
-
merge_sorter.PerformInMergeRound();
|
|
62001
|
-
event->FinishTask();
|
|
62002
|
-
|
|
62003
|
-
return TaskExecutionResult::TASK_FINISHED;
|
|
62004
|
-
}
|
|
62005
|
-
|
|
62006
|
-
private:
|
|
62007
|
-
shared_ptr<Event> event;
|
|
62008
|
-
ClientContext &context;
|
|
62009
|
-
MergeJoinGlobalState &state;
|
|
62010
|
-
};
|
|
62011
|
-
|
|
62012
|
-
class MergeJoinFinalizeEvent : public Event {
|
|
62013
|
-
public:
|
|
62014
|
-
MergeJoinFinalizeEvent(MergeJoinGlobalState &gstate_p, Pipeline &pipeline_p)
|
|
62015
|
-
: Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
|
|
62016
|
-
}
|
|
62017
|
-
|
|
62018
|
-
MergeJoinGlobalState &gstate;
|
|
62019
|
-
Pipeline &pipeline;
|
|
62020
|
-
|
|
62021
|
-
public:
|
|
62022
|
-
void Schedule() override {
|
|
62023
|
-
auto &context = pipeline.GetClientContext();
|
|
62024
|
-
|
|
62025
|
-
// Schedule tasks equal to the number of threads, which will each merge multiple partitions
|
|
62026
|
-
auto &ts = TaskScheduler::GetScheduler(context);
|
|
62027
|
-
idx_t num_threads = ts.NumberOfThreads();
|
|
62028
|
-
|
|
62029
|
-
vector<unique_ptr<Task>> merge_tasks;
|
|
62030
|
-
for (idx_t tnum = 0; tnum < num_threads; tnum++) {
|
|
62031
|
-
merge_tasks.push_back(make_unique<MergeJoinFinalizeTask>(shared_from_this(), context, gstate));
|
|
62032
|
-
}
|
|
62033
|
-
SetTasks(move(merge_tasks));
|
|
62034
|
-
}
|
|
62035
|
-
|
|
62036
|
-
void FinishEvent() override {
|
|
62037
|
-
auto &global_sort_state = gstate.rhs_global_sort_state;
|
|
62038
|
-
|
|
62039
|
-
global_sort_state.CompleteMergeRound(true);
|
|
62040
|
-
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
62041
|
-
// Multiple blocks remaining: Schedule the next round
|
|
62042
|
-
PhysicalPiecewiseMergeJoin::ScheduleMergeTasks(pipeline, *this, gstate);
|
|
62043
|
-
}
|
|
62044
|
-
}
|
|
62045
|
-
};
|
|
62046
|
-
|
|
62047
|
-
void PhysicalPiecewiseMergeJoin::ScheduleMergeTasks(Pipeline &pipeline, Event &event, MergeJoinGlobalState &gstate) {
|
|
62048
|
-
// Initialize global sort state for a round of merging
|
|
62049
|
-
gstate.rhs_global_sort_state.InitializeMergeRound();
|
|
62050
|
-
auto new_event = make_shared<MergeJoinFinalizeEvent>(gstate, pipeline);
|
|
62051
|
-
event.InsertEvent(move(new_event));
|
|
62052
|
-
}
|
|
62053
|
-
|
|
62054
61612
|
SinkFinalizeType PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
62055
61613
|
GlobalSinkState &gstate_p) const {
|
|
62056
61614
|
auto &gstate = (MergeJoinGlobalState &)gstate_p;
|
|
62057
|
-
auto &global_sort_state = gstate.
|
|
61615
|
+
auto &global_sort_state = gstate.table->global_sort_state;
|
|
62058
61616
|
|
|
62059
61617
|
if (IsRightOuterJoin(join_type)) {
|
|
62060
61618
|
// for FULL/RIGHT OUTER JOIN, initialize found_match to false for every tuple
|
|
62061
|
-
gstate.
|
|
62062
|
-
memset(gstate.rhs_found_match.get(), 0, sizeof(bool) * gstate.Count());
|
|
61619
|
+
gstate.table->IntializeMatches();
|
|
62063
61620
|
}
|
|
62064
61621
|
if (global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) {
|
|
62065
61622
|
// Empty input!
|
|
62066
61623
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
|
62067
61624
|
}
|
|
62068
61625
|
|
|
62069
|
-
//
|
|
62070
|
-
|
|
61626
|
+
// Sort the current input child
|
|
61627
|
+
gstate.table->Finalize(pipeline, event);
|
|
62071
61628
|
|
|
62072
|
-
// Start the merge phase or finish if a merge is not necessary
|
|
62073
|
-
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
62074
|
-
PhysicalPiecewiseMergeJoin::ScheduleMergeTasks(pipeline, event, gstate);
|
|
62075
|
-
}
|
|
62076
61629
|
return SinkFinalizeType::READY;
|
|
62077
61630
|
}
|
|
62078
61631
|
|
|
@@ -62081,16 +61634,16 @@ SinkFinalizeType PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, Event
|
|
|
62081
61634
|
//===--------------------------------------------------------------------===//
|
|
62082
61635
|
class PiecewiseMergeJoinState : public OperatorState {
|
|
62083
61636
|
public:
|
|
61637
|
+
using LocalSortedTable = PhysicalRangeJoin::LocalSortedTable;
|
|
61638
|
+
|
|
62084
61639
|
explicit PiecewiseMergeJoinState(const PhysicalPiecewiseMergeJoin &op, BufferManager &buffer_manager,
|
|
62085
61640
|
bool force_external)
|
|
62086
61641
|
: op(op), buffer_manager(buffer_manager), force_external(force_external), left_position(0), first_fetch(true),
|
|
62087
61642
|
finished(true), right_position(0), right_chunk_index(0) {
|
|
62088
61643
|
vector<LogicalType> condition_types;
|
|
62089
61644
|
for (auto &order : op.lhs_orders) {
|
|
62090
|
-
lhs_executor.AddExpression(*order.expression);
|
|
62091
61645
|
condition_types.push_back(order.expression->return_type);
|
|
62092
61646
|
}
|
|
62093
|
-
lhs_keys.Initialize(condition_types);
|
|
62094
61647
|
if (IsLeftOuterJoin(op.join_type)) {
|
|
62095
61648
|
lhs_found_match = unique_ptr<bool[]>(new bool[STANDARD_VECTOR_SIZE]);
|
|
62096
61649
|
memset(lhs_found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
|
|
@@ -62115,16 +61668,12 @@ public:
|
|
|
62115
61668
|
bool force_external;
|
|
62116
61669
|
|
|
62117
61670
|
// Block sorting
|
|
62118
|
-
DataChunk lhs_keys;
|
|
62119
61671
|
DataChunk lhs_payload;
|
|
62120
|
-
ExpressionExecutor lhs_executor;
|
|
62121
61672
|
unique_ptr<bool[]> lhs_found_match;
|
|
62122
61673
|
vector<BoundOrderByNode> lhs_order;
|
|
62123
61674
|
RowLayout lhs_layout;
|
|
62124
|
-
unique_ptr<
|
|
61675
|
+
unique_ptr<LocalSortedTable> lhs_local_table;
|
|
62125
61676
|
unique_ptr<GlobalSortState> lhs_global_state;
|
|
62126
|
-
idx_t lhs_count;
|
|
62127
|
-
idx_t lhs_has_null;
|
|
62128
61677
|
|
|
62129
61678
|
// Simple scans
|
|
62130
61679
|
idx_t left_position;
|
|
@@ -62144,23 +61693,14 @@ public:
|
|
|
62144
61693
|
|
|
62145
61694
|
public:
|
|
62146
61695
|
void ResolveJoinKeys(DataChunk &input) {
|
|
62147
|
-
// resolve the join keys for the input
|
|
62148
|
-
lhs_keys.Reset();
|
|
62149
|
-
lhs_executor.Execute(input, lhs_keys);
|
|
62150
|
-
|
|
62151
|
-
// Count the NULLs so we can exclude them later
|
|
62152
|
-
lhs_count = lhs_keys.size();
|
|
62153
|
-
lhs_has_null = PiecewiseMergeNulls(lhs_keys, op.conditions);
|
|
62154
|
-
|
|
62155
61696
|
// sort by join key
|
|
62156
61697
|
lhs_global_state = make_unique<GlobalSortState>(buffer_manager, lhs_order, lhs_layout);
|
|
62157
|
-
|
|
62158
|
-
|
|
62159
|
-
SinkPiecewiseMergeChunk(*lhs_local_state, lhs_keys, input);
|
|
61698
|
+
lhs_local_table = make_unique<LocalSortedTable>(op, 0);
|
|
61699
|
+
lhs_local_table->Sink(input, *lhs_global_state);
|
|
62160
61700
|
|
|
62161
|
-
// Set external (can be
|
|
61701
|
+
// Set external (can be forced with the PRAGMA)
|
|
62162
61702
|
lhs_global_state->external = force_external;
|
|
62163
|
-
lhs_global_state->AddLocalState(
|
|
61703
|
+
lhs_global_state->AddLocalState(lhs_local_table->local_sort_state);
|
|
62164
61704
|
lhs_global_state->PrepareMergePhase();
|
|
62165
61705
|
while (lhs_global_state->sorted_blocks.size() > 1) {
|
|
62166
61706
|
MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
|
|
@@ -62176,12 +61716,14 @@ public:
|
|
|
62176
61716
|
scanner.Scan(lhs_payload);
|
|
62177
61717
|
|
|
62178
61718
|
// Recompute the sorted keys from the sorted input
|
|
62179
|
-
|
|
62180
|
-
|
|
61719
|
+
lhs_local_table->keys.Reset();
|
|
61720
|
+
lhs_local_table->executor.Execute(lhs_payload, lhs_local_table->keys);
|
|
62181
61721
|
}
|
|
62182
61722
|
|
|
62183
61723
|
void Finalize(PhysicalOperator *op, ExecutionContext &context) override {
|
|
62184
|
-
|
|
61724
|
+
if (lhs_local_table) {
|
|
61725
|
+
context.thread.profiler.Flush(op, &lhs_local_table->executor, "lhs_executor", 0);
|
|
61726
|
+
}
|
|
62185
61727
|
}
|
|
62186
61728
|
};
|
|
62187
61729
|
|
|
@@ -62212,76 +61754,17 @@ struct BlockMergeInfo {
|
|
|
62212
61754
|
GlobalSortState &state;
|
|
62213
61755
|
//! The block being scanned
|
|
62214
61756
|
const idx_t block_idx;
|
|
62215
|
-
//! The start position being read from the block
|
|
62216
|
-
const idx_t base_idx;
|
|
62217
61757
|
//! The number of not-NULL values in the block (they are at the end)
|
|
62218
61758
|
const idx_t not_null;
|
|
62219
61759
|
//! The current offset in the block
|
|
62220
61760
|
idx_t &entry_idx;
|
|
62221
61761
|
SelectionVector result;
|
|
62222
61762
|
|
|
62223
|
-
BlockMergeInfo(GlobalSortState &state, idx_t block_idx, idx_t
|
|
62224
|
-
: state(state), block_idx(block_idx),
|
|
62225
|
-
result(STANDARD_VECTOR_SIZE) {
|
|
61763
|
+
BlockMergeInfo(GlobalSortState &state, idx_t block_idx, idx_t &entry_idx, idx_t not_null)
|
|
61764
|
+
: state(state), block_idx(block_idx), not_null(not_null), entry_idx(entry_idx), result(STANDARD_VECTOR_SIZE) {
|
|
62226
61765
|
}
|
|
62227
61766
|
};
|
|
62228
61767
|
|
|
62229
|
-
static idx_t SliceSortedPayload(DataChunk &payload, BlockMergeInfo &info, const idx_t result_count,
|
|
62230
|
-
const idx_t left_cols = 0) {
|
|
62231
|
-
// There should only be one sorted block if they have been sorted
|
|
62232
|
-
D_ASSERT(info.state.sorted_blocks.size() == 1);
|
|
62233
|
-
SBScanState read_state(info.state.buffer_manager, info.state);
|
|
62234
|
-
read_state.sb = info.state.sorted_blocks[0].get();
|
|
62235
|
-
auto &sorted_data = *read_state.sb->payload_data;
|
|
62236
|
-
|
|
62237
|
-
// We have to create pointers for the entire block
|
|
62238
|
-
// because unswizzle works on ranges not selections.
|
|
62239
|
-
const auto first_idx = info.result.get_index(0);
|
|
62240
|
-
read_state.SetIndices(info.block_idx, info.base_idx + first_idx);
|
|
62241
|
-
read_state.PinData(sorted_data);
|
|
62242
|
-
const auto data_ptr = read_state.DataPtr(sorted_data);
|
|
62243
|
-
|
|
62244
|
-
// Set up a batch of pointers to scan data from
|
|
62245
|
-
Vector addresses(LogicalType::POINTER, result_count);
|
|
62246
|
-
auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
|
|
62247
|
-
|
|
62248
|
-
// Set up the data pointers for the values that are actually referenced
|
|
62249
|
-
// and normalise the selection vector to zero
|
|
62250
|
-
data_ptr_t row_ptr = data_ptr;
|
|
62251
|
-
const idx_t &row_width = sorted_data.layout.GetRowWidth();
|
|
62252
|
-
|
|
62253
|
-
auto prev_idx = first_idx;
|
|
62254
|
-
info.result.set_index(0, 0);
|
|
62255
|
-
idx_t addr_count = 0;
|
|
62256
|
-
data_pointers[addr_count++] = row_ptr;
|
|
62257
|
-
for (idx_t i = 1; i < result_count; ++i) {
|
|
62258
|
-
const auto row_idx = info.result.get_index(i);
|
|
62259
|
-
info.result.set_index(i, row_idx - first_idx);
|
|
62260
|
-
if (row_idx == prev_idx) {
|
|
62261
|
-
continue;
|
|
62262
|
-
}
|
|
62263
|
-
row_ptr += (row_idx - prev_idx) * row_width;
|
|
62264
|
-
data_pointers[addr_count++] = row_ptr;
|
|
62265
|
-
prev_idx = row_idx;
|
|
62266
|
-
}
|
|
62267
|
-
// Unswizzle the offsets back to pointers (if needed)
|
|
62268
|
-
if (!sorted_data.layout.AllConstant() && info.state.external) {
|
|
62269
|
-
const auto next = prev_idx + 1;
|
|
62270
|
-
RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle->Ptr(), next);
|
|
62271
|
-
}
|
|
62272
|
-
|
|
62273
|
-
// Deserialize the payload data
|
|
62274
|
-
auto sel = FlatVector::IncrementalSelectionVector();
|
|
62275
|
-
for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) {
|
|
62276
|
-
const auto col_offset = sorted_data.layout.GetOffsets()[col_idx];
|
|
62277
|
-
auto &col = payload.data[left_cols + col_idx];
|
|
62278
|
-
RowOperations::Gather(addresses, *sel, col, *sel, addr_count, col_offset, col_idx);
|
|
62279
|
-
col.Slice(info.result, result_count);
|
|
62280
|
-
}
|
|
62281
|
-
|
|
62282
|
-
return first_idx;
|
|
62283
|
-
}
|
|
62284
|
-
|
|
62285
61768
|
static void MergeJoinPinSortingBlock(SBScanState &scan, const idx_t block_idx) {
|
|
62286
61769
|
scan.SetIndices(block_idx, 0);
|
|
62287
61770
|
scan.PinRadix(block_idx);
|
|
@@ -62303,7 +61786,7 @@ static idx_t MergeJoinSimpleBlocks(PiecewiseMergeJoinState &lstate, MergeJoinGlo
|
|
|
62303
61786
|
|
|
62304
61787
|
// The sort parameters should all be the same
|
|
62305
61788
|
auto &lsort = *lstate.lhs_global_state;
|
|
62306
|
-
auto &rsort = rstate.
|
|
61789
|
+
auto &rsort = rstate.table->global_sort_state;
|
|
62307
61790
|
D_ASSERT(lsort.sort_layout.all_constant == rsort.sort_layout.all_constant);
|
|
62308
61791
|
const auto all_constant = lsort.sort_layout.all_constant;
|
|
62309
61792
|
D_ASSERT(lsort.external == rsort.external);
|
|
@@ -62316,7 +61799,7 @@ static idx_t MergeJoinSimpleBlocks(PiecewiseMergeJoinState &lstate, MergeJoinGlo
|
|
|
62316
61799
|
|
|
62317
61800
|
const idx_t l_block_idx = 0;
|
|
62318
61801
|
idx_t l_entry_idx = 0;
|
|
62319
|
-
const auto lhs_not_null = lstate.
|
|
61802
|
+
const auto lhs_not_null = lstate.lhs_local_table->count - lstate.lhs_local_table->has_null;
|
|
62320
61803
|
MergeJoinPinSortingBlock(lread, l_block_idx);
|
|
62321
61804
|
auto l_ptr = MergeJoinRadixPtr(lread, l_entry_idx);
|
|
62322
61805
|
|
|
@@ -62335,7 +61818,8 @@ static idx_t MergeJoinSimpleBlocks(PiecewiseMergeJoinState &lstate, MergeJoinGlo
|
|
|
62335
61818
|
MergeJoinPinSortingBlock(rread, r_block_idx);
|
|
62336
61819
|
|
|
62337
61820
|
auto &rblock = rread.sb->radix_sorting_data[r_block_idx];
|
|
62338
|
-
const auto r_not_null =
|
|
61821
|
+
const auto r_not_null =
|
|
61822
|
+
SortedBlockNotNull(right_base, rblock.count, rstate.table->count - rstate.table->has_null);
|
|
62339
61823
|
if (r_not_null == 0) {
|
|
62340
61824
|
break;
|
|
62341
61825
|
}
|
|
@@ -62381,6 +61865,7 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
|
|
|
62381
61865
|
auto &gstate = (MergeJoinGlobalState &)*sink_state;
|
|
62382
61866
|
|
|
62383
61867
|
state.ResolveJoinKeys(input);
|
|
61868
|
+
auto &lhs_table = *state.lhs_local_table;
|
|
62384
61869
|
|
|
62385
61870
|
// perform the actual join
|
|
62386
61871
|
bool found_match[STANDARD_VECTOR_SIZE];
|
|
@@ -62388,7 +61873,7 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
|
|
|
62388
61873
|
MergeJoinSimpleBlocks(state, gstate, found_match, conditions[0].comparison);
|
|
62389
61874
|
|
|
62390
61875
|
// use the sorted payload
|
|
62391
|
-
const auto lhs_not_null =
|
|
61876
|
+
const auto lhs_not_null = lhs_table.count - lhs_table.has_null;
|
|
62392
61877
|
auto &payload = state.lhs_payload;
|
|
62393
61878
|
|
|
62394
61879
|
// now construct the result based on the join result
|
|
@@ -62396,19 +61881,19 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
|
|
|
62396
61881
|
case JoinType::MARK: {
|
|
62397
61882
|
// The only part of the join keys that is actually used is the validity mask.
|
|
62398
61883
|
// Since the payload is sorted, we can just set the tail end of the validity masks to invalid.
|
|
62399
|
-
for (auto &key :
|
|
62400
|
-
key.Normalify(
|
|
61884
|
+
for (auto &key : lhs_table.keys.data) {
|
|
61885
|
+
key.Normalify(lhs_table.keys.size());
|
|
62401
61886
|
auto &mask = FlatVector::Validity(key);
|
|
62402
61887
|
if (mask.AllValid()) {
|
|
62403
61888
|
continue;
|
|
62404
61889
|
}
|
|
62405
61890
|
mask.SetAllValid(lhs_not_null);
|
|
62406
|
-
for (idx_t i = lhs_not_null; i <
|
|
61891
|
+
for (idx_t i = lhs_not_null; i < lhs_table.count; ++i) {
|
|
62407
61892
|
mask.SetInvalid(i);
|
|
62408
61893
|
}
|
|
62409
61894
|
}
|
|
62410
61895
|
// So we make a set of keys that have the validity mask set for the
|
|
62411
|
-
PhysicalJoin::ConstructMarkJoinResult(
|
|
61896
|
+
PhysicalJoin::ConstructMarkJoinResult(lhs_table.keys, payload, chunk, found_match, gstate.table->has_null);
|
|
62412
61897
|
break;
|
|
62413
61898
|
}
|
|
62414
61899
|
case JoinType::SEMI:
|
|
@@ -62468,8 +61953,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
|
62468
61953
|
|
|
62469
61954
|
if (comp_res <= cmp) {
|
|
62470
61955
|
// left side smaller: found match
|
|
62471
|
-
l.result.set_index(result_count, sel_t(l.entry_idx
|
|
62472
|
-
r.result.set_index(result_count, sel_t(r.entry_idx
|
|
61956
|
+
l.result.set_index(result_count, sel_t(l.entry_idx));
|
|
61957
|
+
r.result.set_index(result_count, sel_t(r.entry_idx));
|
|
62473
61958
|
result_count++;
|
|
62474
61959
|
// move left side forward
|
|
62475
61960
|
l.entry_idx++;
|
|
@@ -62496,35 +61981,11 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
|
62496
61981
|
return result_count;
|
|
62497
61982
|
}
|
|
62498
61983
|
|
|
62499
|
-
static idx_t SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right, const SelectionVector *sel,
|
|
62500
|
-
idx_t count, SelectionVector *true_sel) {
|
|
62501
|
-
switch (condition) {
|
|
62502
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
|
62503
|
-
return VectorOperations::NotEquals(left, right, sel, count, true_sel, nullptr);
|
|
62504
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
|
62505
|
-
return VectorOperations::LessThan(left, right, sel, count, true_sel, nullptr);
|
|
62506
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
|
62507
|
-
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, nullptr);
|
|
62508
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
62509
|
-
return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, nullptr);
|
|
62510
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
62511
|
-
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, nullptr);
|
|
62512
|
-
case ExpressionType::COMPARE_DISTINCT_FROM:
|
|
62513
|
-
return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
|
|
62514
|
-
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
|
62515
|
-
case ExpressionType::COMPARE_EQUAL:
|
|
62516
|
-
default:
|
|
62517
|
-
throw InternalException("Unsupported comparison type for PhysicalPiecewiseMergeJoin");
|
|
62518
|
-
}
|
|
62519
|
-
|
|
62520
|
-
return count;
|
|
62521
|
-
}
|
|
62522
|
-
|
|
62523
61984
|
OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &input,
|
|
62524
61985
|
DataChunk &chunk, OperatorState &state_p) const {
|
|
62525
61986
|
auto &state = (PiecewiseMergeJoinState &)state_p;
|
|
62526
61987
|
auto &gstate = (MergeJoinGlobalState &)*sink_state;
|
|
62527
|
-
auto &rsorted = *gstate.
|
|
61988
|
+
auto &rsorted = *gstate.table->global_sort_state.sorted_blocks[0];
|
|
62528
61989
|
const auto left_cols = input.ColumnCount();
|
|
62529
61990
|
const auto tail_cols = conditions.size() - 1;
|
|
62530
61991
|
do {
|
|
@@ -62550,14 +62011,15 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
|
62550
62011
|
return OperatorResultType::NEED_MORE_INPUT;
|
|
62551
62012
|
}
|
|
62552
62013
|
|
|
62553
|
-
|
|
62554
|
-
|
|
62014
|
+
auto &lhs_table = *state.lhs_local_table;
|
|
62015
|
+
const auto lhs_not_null = lhs_table.count - lhs_table.has_null;
|
|
62016
|
+
BlockMergeInfo left_info(*state.lhs_global_state, 0, state.left_position, lhs_not_null);
|
|
62555
62017
|
|
|
62556
62018
|
const auto &rblock = rsorted.radix_sorting_data[state.right_chunk_index];
|
|
62557
62019
|
const auto rhs_not_null =
|
|
62558
|
-
SortedBlockNotNull(state.right_base, rblock.count, gstate.
|
|
62559
|
-
BlockMergeInfo right_info(gstate.
|
|
62560
|
-
|
|
62020
|
+
SortedBlockNotNull(state.right_base, rblock.count, gstate.table->count - gstate.table->has_null);
|
|
62021
|
+
BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
|
|
62022
|
+
rhs_not_null);
|
|
62561
62023
|
|
|
62562
62024
|
idx_t result_count = MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison);
|
|
62563
62025
|
if (result_count == 0) {
|
|
@@ -62576,7 +62038,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
|
62576
62038
|
for (idx_t c = 0; c < state.lhs_payload.ColumnCount(); ++c) {
|
|
62577
62039
|
chunk.data[c].Slice(state.lhs_payload.data[c], left_info.result, result_count);
|
|
62578
62040
|
}
|
|
62579
|
-
|
|
62041
|
+
SliceSortedPayload(chunk, right_info.state, right_info.block_idx, right_info.result, result_count,
|
|
62042
|
+
left_cols);
|
|
62580
62043
|
chunk.SetCardinality(result_count);
|
|
62581
62044
|
|
|
62582
62045
|
auto sel = FlatVector::IncrementalSelectionVector();
|
|
@@ -62590,7 +62053,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
|
62590
62053
|
|
|
62591
62054
|
auto tail_count = result_count;
|
|
62592
62055
|
for (size_t cmp_idx = 1; cmp_idx < conditions.size(); ++cmp_idx) {
|
|
62593
|
-
Vector left(
|
|
62056
|
+
Vector left(lhs_table.keys.data[cmp_idx]);
|
|
62594
62057
|
left.Slice(left_info.result, result_count);
|
|
62595
62058
|
|
|
62596
62059
|
auto &right = state.rhs_keys.data[cmp_idx];
|
|
@@ -62618,11 +62081,10 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
|
62618
62081
|
state.lhs_found_match[left_info.result[sel->get_index(i)]] = true;
|
|
62619
62082
|
}
|
|
62620
62083
|
}
|
|
62621
|
-
if (gstate.
|
|
62084
|
+
if (gstate.table->found_match) {
|
|
62622
62085
|
// Absolute position of the block + start position inside that block
|
|
62623
|
-
const idx_t base_index = right_info.base_idx + first_idx;
|
|
62624
62086
|
for (idx_t i = 0; i < result_count; i++) {
|
|
62625
|
-
gstate.
|
|
62087
|
+
gstate.table->found_match[state.right_base + right_info.result[sel->get_index(i)]] = true;
|
|
62626
62088
|
}
|
|
62627
62089
|
}
|
|
62628
62090
|
chunk.SetCardinality(result_count);
|
|
@@ -62639,7 +62101,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::Execute(ExecutionContext &context
|
|
|
62639
62101
|
if (gstate.Count() == 0) {
|
|
62640
62102
|
// empty RHS
|
|
62641
62103
|
if (!EmptyResultIfRHSIsEmpty()) {
|
|
62642
|
-
ConstructEmptyJoinResult(join_type, gstate.
|
|
62104
|
+
ConstructEmptyJoinResult(join_type, gstate.table->has_null, input, chunk);
|
|
62643
62105
|
return OperatorResultType::NEED_MORE_INPUT;
|
|
62644
62106
|
} else {
|
|
62645
62107
|
return OperatorResultType::FINISHED;
|
|
@@ -62697,7 +62159,7 @@ void PhysicalPiecewiseMergeJoin::GetData(ExecutionContext &context, DataChunk &r
|
|
|
62697
62159
|
lock_guard<mutex> l(state.lock);
|
|
62698
62160
|
if (!state.scanner) {
|
|
62699
62161
|
// Initialize scanner (if not yet initialized)
|
|
62700
|
-
auto &sort_state = sink.
|
|
62162
|
+
auto &sort_state = sink.table->global_sort_state;
|
|
62701
62163
|
if (sort_state.sorted_blocks.empty()) {
|
|
62702
62164
|
return;
|
|
62703
62165
|
}
|
|
@@ -62706,11 +62168,12 @@ void PhysicalPiecewiseMergeJoin::GetData(ExecutionContext &context, DataChunk &r
|
|
|
62706
62168
|
|
|
62707
62169
|
// if the LHS is exhausted in a FULL/RIGHT OUTER JOIN, we scan the found_match for any chunks we
|
|
62708
62170
|
// still need to output
|
|
62709
|
-
const auto found_match = sink.
|
|
62171
|
+
const auto found_match = sink.table->found_match.get();
|
|
62710
62172
|
|
|
62711
|
-
// ConstructFullOuterJoinResult(sink.
|
|
62173
|
+
// ConstructFullOuterJoinResult(sink.table->found_match.get(), sink.right_chunks, chunk,
|
|
62174
|
+
// state.right_outer_position);
|
|
62712
62175
|
DataChunk rhs_chunk;
|
|
62713
|
-
rhs_chunk.Initialize(sink.
|
|
62176
|
+
rhs_chunk.Initialize(sink.table->global_sort_state.payload_layout.GetTypes());
|
|
62714
62177
|
SelectionVector rsel(STANDARD_VECTOR_SIZE);
|
|
62715
62178
|
for (;;) {
|
|
62716
62179
|
// Read the next sorted chunk
|
|
@@ -62749,6 +62212,353 @@ void PhysicalPiecewiseMergeJoin::GetData(ExecutionContext &context, DataChunk &r
|
|
|
62749
62212
|
}
|
|
62750
62213
|
|
|
62751
62214
|
} // namespace duckdb
|
|
62215
|
+
|
|
62216
|
+
|
|
62217
|
+
|
|
62218
|
+
|
|
62219
|
+
|
|
62220
|
+
|
|
62221
|
+
|
|
62222
|
+
|
|
62223
|
+
|
|
62224
|
+
|
|
62225
|
+
|
|
62226
|
+
|
|
62227
|
+
|
|
62228
|
+
#include <thread>
|
|
62229
|
+
|
|
62230
|
+
namespace duckdb {
|
|
62231
|
+
|
|
62232
|
+
PhysicalRangeJoin::LocalSortedTable::LocalSortedTable(const PhysicalRangeJoin &op, const idx_t child)
|
|
62233
|
+
: op(op), has_null(0), count(0) {
|
|
62234
|
+
// Initialize order clause expression executor and key DataChunk
|
|
62235
|
+
vector<LogicalType> types;
|
|
62236
|
+
for (const auto &cond : op.conditions) {
|
|
62237
|
+
const auto &expr = child ? cond.right : cond.left;
|
|
62238
|
+
executor.AddExpression(*expr);
|
|
62239
|
+
|
|
62240
|
+
types.push_back(expr->return_type);
|
|
62241
|
+
}
|
|
62242
|
+
keys.Initialize(types);
|
|
62243
|
+
}
|
|
62244
|
+
|
|
62245
|
+
void PhysicalRangeJoin::LocalSortedTable::Sink(DataChunk &input, GlobalSortState &global_sort_state) {
|
|
62246
|
+
// Initialize local state (if necessary)
|
|
62247
|
+
if (!local_sort_state.initialized) {
|
|
62248
|
+
local_sort_state.Initialize(global_sort_state, global_sort_state.buffer_manager);
|
|
62249
|
+
}
|
|
62250
|
+
|
|
62251
|
+
// Obtain sorting columns
|
|
62252
|
+
keys.Reset();
|
|
62253
|
+
executor.Execute(input, keys);
|
|
62254
|
+
|
|
62255
|
+
// Count the NULLs so we can exclude them later
|
|
62256
|
+
has_null += MergeNulls(op.conditions);
|
|
62257
|
+
count += keys.size();
|
|
62258
|
+
|
|
62259
|
+
// Only sort the primary key
|
|
62260
|
+
DataChunk join_head;
|
|
62261
|
+
join_head.data.emplace_back(Vector(keys.data[0]));
|
|
62262
|
+
join_head.SetCardinality(keys.size());
|
|
62263
|
+
|
|
62264
|
+
// Sink the data into the local sort state
|
|
62265
|
+
local_sort_state.SinkChunk(join_head, input);
|
|
62266
|
+
}
|
|
62267
|
+
|
|
62268
|
+
PhysicalRangeJoin::GlobalSortedTable::GlobalSortedTable(ClientContext &context, const vector<BoundOrderByNode> &orders,
|
|
62269
|
+
RowLayout &payload_layout)
|
|
62270
|
+
: global_sort_state(BufferManager::GetBufferManager(context), orders, payload_layout), has_null(0), count(0),
|
|
62271
|
+
memory_per_thread(0) {
|
|
62272
|
+
D_ASSERT(orders.size() == 1);
|
|
62273
|
+
|
|
62274
|
+
// Set external (can be force with the PRAGMA)
|
|
62275
|
+
auto &config = ClientConfig::GetConfig(context);
|
|
62276
|
+
global_sort_state.external = config.force_external;
|
|
62277
|
+
// Memory usage per thread should scale with max mem / num threads
|
|
62278
|
+
// We take 1/4th of this, to be conservative
|
|
62279
|
+
idx_t max_memory = global_sort_state.buffer_manager.GetMaxMemory();
|
|
62280
|
+
idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
62281
|
+
memory_per_thread = (max_memory / num_threads) / 4;
|
|
62282
|
+
}
|
|
62283
|
+
|
|
62284
|
+
void PhysicalRangeJoin::GlobalSortedTable::Combine(LocalSortedTable <able) {
|
|
62285
|
+
global_sort_state.AddLocalState(ltable.local_sort_state);
|
|
62286
|
+
has_null += ltable.has_null;
|
|
62287
|
+
count += ltable.count;
|
|
62288
|
+
}
|
|
62289
|
+
|
|
62290
|
+
void PhysicalRangeJoin::GlobalSortedTable::IntializeMatches() {
|
|
62291
|
+
found_match = unique_ptr<bool[]>(new bool[Count()]);
|
|
62292
|
+
memset(found_match.get(), 0, sizeof(bool) * Count());
|
|
62293
|
+
}
|
|
62294
|
+
|
|
62295
|
+
void PhysicalRangeJoin::GlobalSortedTable::Print() {
|
|
62296
|
+
global_sort_state.Print();
|
|
62297
|
+
}
|
|
62298
|
+
|
|
62299
|
+
class RangeJoinMergeTask : public ExecutorTask {
|
|
62300
|
+
public:
|
|
62301
|
+
using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
62302
|
+
|
|
62303
|
+
public:
|
|
62304
|
+
RangeJoinMergeTask(shared_ptr<Event> event_p, ClientContext &context, GlobalSortedTable &table)
|
|
62305
|
+
: ExecutorTask(context), event(move(event_p)), context(context), table(table) {
|
|
62306
|
+
}
|
|
62307
|
+
|
|
62308
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
62309
|
+
// Initialize iejoin sorted and iterate until done
|
|
62310
|
+
auto &global_sort_state = table.global_sort_state;
|
|
62311
|
+
MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context));
|
|
62312
|
+
merge_sorter.PerformInMergeRound();
|
|
62313
|
+
event->FinishTask();
|
|
62314
|
+
|
|
62315
|
+
return TaskExecutionResult::TASK_FINISHED;
|
|
62316
|
+
}
|
|
62317
|
+
|
|
62318
|
+
private:
|
|
62319
|
+
shared_ptr<Event> event;
|
|
62320
|
+
ClientContext &context;
|
|
62321
|
+
GlobalSortedTable &table;
|
|
62322
|
+
};
|
|
62323
|
+
|
|
62324
|
+
class RangeJoinMergeEvent : public Event {
|
|
62325
|
+
public:
|
|
62326
|
+
using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
62327
|
+
|
|
62328
|
+
public:
|
|
62329
|
+
RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
|
|
62330
|
+
: Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
|
|
62331
|
+
}
|
|
62332
|
+
|
|
62333
|
+
GlobalSortedTable &table;
|
|
62334
|
+
Pipeline &pipeline;
|
|
62335
|
+
|
|
62336
|
+
public:
|
|
62337
|
+
void Schedule() override {
|
|
62338
|
+
auto &context = pipeline.GetClientContext();
|
|
62339
|
+
|
|
62340
|
+
// Schedule tasks equal to the number of threads, which will each merge multiple partitions
|
|
62341
|
+
auto &ts = TaskScheduler::GetScheduler(context);
|
|
62342
|
+
idx_t num_threads = ts.NumberOfThreads();
|
|
62343
|
+
|
|
62344
|
+
vector<unique_ptr<Task>> iejoin_tasks;
|
|
62345
|
+
for (idx_t tnum = 0; tnum < num_threads; tnum++) {
|
|
62346
|
+
iejoin_tasks.push_back(make_unique<RangeJoinMergeTask>(shared_from_this(), context, table));
|
|
62347
|
+
}
|
|
62348
|
+
SetTasks(move(iejoin_tasks));
|
|
62349
|
+
}
|
|
62350
|
+
|
|
62351
|
+
void FinishEvent() override {
|
|
62352
|
+
auto &global_sort_state = table.global_sort_state;
|
|
62353
|
+
|
|
62354
|
+
global_sort_state.CompleteMergeRound(true);
|
|
62355
|
+
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
62356
|
+
// Multiple blocks remaining: Schedule the next round
|
|
62357
|
+
table.ScheduleMergeTasks(pipeline, *this);
|
|
62358
|
+
}
|
|
62359
|
+
}
|
|
62360
|
+
};
|
|
62361
|
+
|
|
62362
|
+
void PhysicalRangeJoin::GlobalSortedTable::ScheduleMergeTasks(Pipeline &pipeline, Event &event) {
|
|
62363
|
+
// Initialize global sort state for a round of merging
|
|
62364
|
+
global_sort_state.InitializeMergeRound();
|
|
62365
|
+
auto new_event = make_shared<RangeJoinMergeEvent>(*this, pipeline);
|
|
62366
|
+
event.InsertEvent(move(new_event));
|
|
62367
|
+
}
|
|
62368
|
+
|
|
62369
|
+
void PhysicalRangeJoin::GlobalSortedTable::Finalize(Pipeline &pipeline, Event &event) {
|
|
62370
|
+
// Prepare for merge sort phase
|
|
62371
|
+
global_sort_state.PrepareMergePhase();
|
|
62372
|
+
|
|
62373
|
+
// Start the merge phase or finish if a merge is not necessary
|
|
62374
|
+
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
62375
|
+
ScheduleMergeTasks(pipeline, event);
|
|
62376
|
+
}
|
|
62377
|
+
}
|
|
62378
|
+
|
|
62379
|
+
PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType type, unique_ptr<PhysicalOperator> left,
|
|
62380
|
+
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
|
|
62381
|
+
idx_t estimated_cardinality)
|
|
62382
|
+
: PhysicalComparisonJoin(op, type, move(cond), join_type, estimated_cardinality) {
|
|
62383
|
+
// Reorder the conditions so that ranges are at the front.
|
|
62384
|
+
// TODO: use stats to improve the choice?
|
|
62385
|
+
// TODO: Prefer fixed length types?
|
|
62386
|
+
if (conditions.size() > 1) {
|
|
62387
|
+
auto conditions_p = std::move(conditions);
|
|
62388
|
+
conditions.resize(conditions_p.size());
|
|
62389
|
+
idx_t range_position = 0;
|
|
62390
|
+
idx_t other_position = conditions_p.size();
|
|
62391
|
+
for (idx_t i = 0; i < conditions_p.size(); ++i) {
|
|
62392
|
+
switch (conditions_p[i].comparison) {
|
|
62393
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
|
62394
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
62395
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
|
62396
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
62397
|
+
conditions[range_position++] = std::move(conditions_p[i]);
|
|
62398
|
+
break;
|
|
62399
|
+
default:
|
|
62400
|
+
conditions[--other_position] = std::move(conditions_p[i]);
|
|
62401
|
+
break;
|
|
62402
|
+
}
|
|
62403
|
+
}
|
|
62404
|
+
}
|
|
62405
|
+
|
|
62406
|
+
children.push_back(move(left));
|
|
62407
|
+
children.push_back(move(right));
|
|
62408
|
+
}
|
|
62409
|
+
|
|
62410
|
+
idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition> &conditions) {
|
|
62411
|
+
// Merge the validity masks of the comparison keys into the primary
|
|
62412
|
+
// Return the number of NULLs in the resulting chunk
|
|
62413
|
+
D_ASSERT(keys.ColumnCount() > 0);
|
|
62414
|
+
const auto count = keys.size();
|
|
62415
|
+
|
|
62416
|
+
size_t all_constant = 0;
|
|
62417
|
+
for (auto &v : keys.data) {
|
|
62418
|
+
if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
|
62419
|
+
++all_constant;
|
|
62420
|
+
}
|
|
62421
|
+
}
|
|
62422
|
+
|
|
62423
|
+
auto &primary = keys.data[0];
|
|
62424
|
+
if (all_constant == keys.data.size()) {
|
|
62425
|
+
// Either all NULL or no NULLs
|
|
62426
|
+
for (auto &v : keys.data) {
|
|
62427
|
+
if (ConstantVector::IsNull(v)) {
|
|
62428
|
+
ConstantVector::SetNull(primary, true);
|
|
62429
|
+
return count;
|
|
62430
|
+
}
|
|
62431
|
+
}
|
|
62432
|
+
return 0;
|
|
62433
|
+
} else if (keys.ColumnCount() > 1) {
|
|
62434
|
+
// Normalify the primary, as it will need to merge arbitrary validity masks
|
|
62435
|
+
primary.Normalify(count);
|
|
62436
|
+
auto &pvalidity = FlatVector::Validity(primary);
|
|
62437
|
+
D_ASSERT(keys.ColumnCount() == conditions.size());
|
|
62438
|
+
for (size_t c = 1; c < keys.data.size(); ++c) {
|
|
62439
|
+
// Skip comparisons that accept NULLs
|
|
62440
|
+
if (conditions[c].comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
|
|
62441
|
+
continue;
|
|
62442
|
+
}
|
|
62443
|
+
// Orrify the rest, as the sort code will do this anyway.
|
|
62444
|
+
auto &v = keys.data[c];
|
|
62445
|
+
VectorData vdata;
|
|
62446
|
+
v.Orrify(count, vdata);
|
|
62447
|
+
auto &vvalidity = vdata.validity;
|
|
62448
|
+
if (vvalidity.AllValid()) {
|
|
62449
|
+
continue;
|
|
62450
|
+
}
|
|
62451
|
+
pvalidity.EnsureWritable();
|
|
62452
|
+
switch (v.GetVectorType()) {
|
|
62453
|
+
case VectorType::FLAT_VECTOR: {
|
|
62454
|
+
// Merge entire entries
|
|
62455
|
+
auto pmask = pvalidity.GetData();
|
|
62456
|
+
const auto entry_count = pvalidity.EntryCount(count);
|
|
62457
|
+
for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
|
|
62458
|
+
pmask[entry_idx] &= vvalidity.GetValidityEntry(entry_idx);
|
|
62459
|
+
}
|
|
62460
|
+
break;
|
|
62461
|
+
}
|
|
62462
|
+
case VectorType::CONSTANT_VECTOR:
|
|
62463
|
+
// All or nothing
|
|
62464
|
+
if (ConstantVector::IsNull(v)) {
|
|
62465
|
+
pvalidity.SetAllInvalid(count);
|
|
62466
|
+
return count;
|
|
62467
|
+
}
|
|
62468
|
+
break;
|
|
62469
|
+
default:
|
|
62470
|
+
// One by one
|
|
62471
|
+
for (idx_t i = 0; i < count; ++i) {
|
|
62472
|
+
const auto idx = vdata.sel->get_index(i);
|
|
62473
|
+
if (!vvalidity.RowIsValidUnsafe(idx)) {
|
|
62474
|
+
pvalidity.SetInvalidUnsafe(i);
|
|
62475
|
+
}
|
|
62476
|
+
}
|
|
62477
|
+
break;
|
|
62478
|
+
}
|
|
62479
|
+
}
|
|
62480
|
+
return count - pvalidity.CountValid(count);
|
|
62481
|
+
} else {
|
|
62482
|
+
return count - VectorOperations::CountNotNull(primary, count);
|
|
62483
|
+
}
|
|
62484
|
+
}
|
|
62485
|
+
|
|
62486
|
+
void PhysicalRangeJoin::SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
|
|
62487
|
+
const SelectionVector &result, const idx_t result_count,
|
|
62488
|
+
const idx_t left_cols) {
|
|
62489
|
+
// There should only be one sorted block if they have been sorted
|
|
62490
|
+
D_ASSERT(state.sorted_blocks.size() == 1);
|
|
62491
|
+
SBScanState read_state(state.buffer_manager, state);
|
|
62492
|
+
read_state.sb = state.sorted_blocks[0].get();
|
|
62493
|
+
auto &sorted_data = *read_state.sb->payload_data;
|
|
62494
|
+
|
|
62495
|
+
read_state.SetIndices(block_idx, 0);
|
|
62496
|
+
read_state.PinData(sorted_data);
|
|
62497
|
+
const auto data_ptr = read_state.DataPtr(sorted_data);
|
|
62498
|
+
|
|
62499
|
+
// Set up a batch of pointers to scan data from
|
|
62500
|
+
Vector addresses(LogicalType::POINTER, result_count);
|
|
62501
|
+
auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
|
|
62502
|
+
|
|
62503
|
+
// Set up the data pointers for the values that are actually referenced
|
|
62504
|
+
const idx_t &row_width = sorted_data.layout.GetRowWidth();
|
|
62505
|
+
|
|
62506
|
+
auto prev_idx = result.get_index(0);
|
|
62507
|
+
SelectionVector gsel(result_count);
|
|
62508
|
+
idx_t addr_count = 0;
|
|
62509
|
+
gsel.set_index(0, addr_count);
|
|
62510
|
+
data_pointers[addr_count] = data_ptr + prev_idx * row_width;
|
|
62511
|
+
for (idx_t i = 1; i < result_count; ++i) {
|
|
62512
|
+
const auto row_idx = result.get_index(i);
|
|
62513
|
+
if (row_idx != prev_idx) {
|
|
62514
|
+
data_pointers[++addr_count] = data_ptr + row_idx * row_width;
|
|
62515
|
+
prev_idx = row_idx;
|
|
62516
|
+
}
|
|
62517
|
+
gsel.set_index(i, addr_count);
|
|
62518
|
+
}
|
|
62519
|
+
++addr_count;
|
|
62520
|
+
|
|
62521
|
+
// Unswizzle the offsets back to pointers (if needed)
|
|
62522
|
+
if (!sorted_data.layout.AllConstant() && state.external) {
|
|
62523
|
+
RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle->Ptr(),
|
|
62524
|
+
addr_count);
|
|
62525
|
+
}
|
|
62526
|
+
|
|
62527
|
+
// Deserialize the payload data
|
|
62528
|
+
auto sel = FlatVector::IncrementalSelectionVector();
|
|
62529
|
+
for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) {
|
|
62530
|
+
const auto col_offset = sorted_data.layout.GetOffsets()[col_idx];
|
|
62531
|
+
auto &col = payload.data[left_cols + col_idx];
|
|
62532
|
+
RowOperations::Gather(addresses, *sel, col, *sel, addr_count, col_offset, col_idx);
|
|
62533
|
+
col.Slice(gsel, result_count);
|
|
62534
|
+
}
|
|
62535
|
+
}
|
|
62536
|
+
|
|
62537
|
+
idx_t PhysicalRangeJoin::SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right,
|
|
62538
|
+
const SelectionVector *sel, idx_t count, SelectionVector *true_sel) {
|
|
62539
|
+
switch (condition) {
|
|
62540
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
|
62541
|
+
return VectorOperations::NotEquals(left, right, sel, count, true_sel, nullptr);
|
|
62542
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
|
62543
|
+
return VectorOperations::LessThan(left, right, sel, count, true_sel, nullptr);
|
|
62544
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
|
62545
|
+
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, nullptr);
|
|
62546
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
62547
|
+
return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, nullptr);
|
|
62548
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
62549
|
+
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, nullptr);
|
|
62550
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
|
62551
|
+
return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
|
|
62552
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
|
62553
|
+
case ExpressionType::COMPARE_EQUAL:
|
|
62554
|
+
default:
|
|
62555
|
+
throw InternalException("Unsupported comparison type for PhysicalRangeJoin");
|
|
62556
|
+
}
|
|
62557
|
+
|
|
62558
|
+
return count;
|
|
62559
|
+
}
|
|
62560
|
+
|
|
62561
|
+
} // namespace duckdb
|
|
62752
62562
|
//===----------------------------------------------------------------------===//
|
|
62753
62563
|
// DuckDB
|
|
62754
62564
|
//
|
|
@@ -63751,7 +63561,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
|
|
|
63751
63561
|
", HEADER=" + std::to_string(header) +
|
|
63752
63562
|
(has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
|
|
63753
63563
|
", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
|
|
63754
|
-
", ALL_VARCHAR=" + std::to_string(all_varchar);
|
|
63564
|
+
", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
|
|
63755
63565
|
}
|
|
63756
63566
|
|
|
63757
63567
|
static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
|
|
@@ -65214,9 +65024,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
65214
65024
|
return;
|
|
65215
65025
|
}
|
|
65216
65026
|
if (column >= sql_types.size()) {
|
|
65217
|
-
|
|
65218
|
-
|
|
65219
|
-
|
|
65027
|
+
if (options.ignore_errors) {
|
|
65028
|
+
error_column_overflow = true;
|
|
65029
|
+
return;
|
|
65030
|
+
} else {
|
|
65031
|
+
throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
|
|
65032
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
|
|
65033
|
+
options.ToString());
|
|
65034
|
+
}
|
|
65220
65035
|
}
|
|
65221
65036
|
|
|
65222
65037
|
// insert the line number into the chunk
|
|
@@ -65268,10 +65083,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
|
|
|
65268
65083
|
}
|
|
65269
65084
|
}
|
|
65270
65085
|
|
|
65086
|
+
// Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
|
|
65087
|
+
if (error_column_overflow) {
|
|
65088
|
+
D_ASSERT(options.ignore_errors);
|
|
65089
|
+
error_column_overflow = false;
|
|
65090
|
+
column = 0;
|
|
65091
|
+
return false;
|
|
65092
|
+
}
|
|
65093
|
+
|
|
65271
65094
|
if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
|
|
65272
|
-
|
|
65273
|
-
|
|
65274
|
-
|
|
65095
|
+
if (options.ignore_errors) {
|
|
65096
|
+
column = 0;
|
|
65097
|
+
return false;
|
|
65098
|
+
} else {
|
|
65099
|
+
throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
|
|
65100
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
|
|
65101
|
+
options.ToString());
|
|
65102
|
+
}
|
|
65275
65103
|
}
|
|
65276
65104
|
|
|
65277
65105
|
if (mode == ParserMode::SNIFFING_DIALECT) {
|
|
@@ -65305,6 +65133,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
65305
65133
|
if (parse_chunk.size() == 0) {
|
|
65306
65134
|
return;
|
|
65307
65135
|
}
|
|
65136
|
+
|
|
65137
|
+
bool conversion_error_ignored = false;
|
|
65138
|
+
|
|
65308
65139
|
// convert the columns in the parsed chunk to the types of the table
|
|
65309
65140
|
insert_chunk.SetCardinality(parse_chunk);
|
|
65310
65141
|
for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
|
|
@@ -65346,26 +65177,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
65346
65177
|
success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
|
|
65347
65178
|
parse_chunk.size(), &error_message);
|
|
65348
65179
|
}
|
|
65349
|
-
if (
|
|
65350
|
-
|
|
65351
|
-
|
|
65352
|
-
|
|
65353
|
-
|
|
65180
|
+
if (success) {
|
|
65181
|
+
continue;
|
|
65182
|
+
}
|
|
65183
|
+
if (options.ignore_errors) {
|
|
65184
|
+
conversion_error_ignored = true;
|
|
65185
|
+
continue;
|
|
65186
|
+
}
|
|
65187
|
+
string col_name = to_string(col_idx);
|
|
65188
|
+
if (col_idx < col_names.size()) {
|
|
65189
|
+
col_name = "\"" + col_names[col_idx] + "\"";
|
|
65190
|
+
}
|
|
65354
65191
|
|
|
65355
|
-
|
|
65356
|
-
|
|
65357
|
-
|
|
65358
|
-
|
|
65359
|
-
|
|
65360
|
-
|
|
65361
|
-
|
|
65362
|
-
|
|
65363
|
-
|
|
65364
|
-
|
|
65365
|
-
|
|
65192
|
+
if (options.auto_detect) {
|
|
65193
|
+
throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
|
|
65194
|
+
"options: %s. Consider either increasing the sample size "
|
|
65195
|
+
"(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
|
|
65196
|
+
"or skipping column conversion (ALL_VARCHAR=1)",
|
|
65197
|
+
error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
|
|
65198
|
+
options.ToString());
|
|
65199
|
+
} else {
|
|
65200
|
+
throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
|
|
65201
|
+
error_message, linenr - parse_chunk.size(), linenr, col_name,
|
|
65202
|
+
options.ToString());
|
|
65203
|
+
}
|
|
65204
|
+
}
|
|
65205
|
+
}
|
|
65206
|
+
if (conversion_error_ignored) {
|
|
65207
|
+
D_ASSERT(options.ignore_errors);
|
|
65208
|
+
SelectionVector succesful_rows;
|
|
65209
|
+
succesful_rows.Initialize(parse_chunk.size());
|
|
65210
|
+
idx_t sel_size = 0;
|
|
65211
|
+
|
|
65212
|
+
for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
|
65213
|
+
bool failed = false;
|
|
65214
|
+
for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
|
|
65215
|
+
|
|
65216
|
+
auto &inserted_column = insert_chunk.data[column_idx];
|
|
65217
|
+
auto &parsed_column = parse_chunk.data[column_idx];
|
|
65218
|
+
|
|
65219
|
+
bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
|
|
65220
|
+
if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
|
|
65221
|
+
failed = true;
|
|
65222
|
+
break;
|
|
65366
65223
|
}
|
|
65367
65224
|
}
|
|
65225
|
+
if (!failed) {
|
|
65226
|
+
succesful_rows.set_index(sel_size++, row_idx);
|
|
65227
|
+
}
|
|
65368
65228
|
}
|
|
65229
|
+
insert_chunk.Slice(succesful_rows, sel_size);
|
|
65369
65230
|
}
|
|
65370
65231
|
parse_chunk.Reset();
|
|
65371
65232
|
}
|
|
@@ -79724,6 +79585,11 @@ struct ListAggregateFun {
|
|
|
79724
79585
|
static void RegisterFunction(BuiltinFunctions &set);
|
|
79725
79586
|
};
|
|
79726
79587
|
|
|
79588
|
+
struct ListSortFun {
|
|
79589
|
+
static ScalarFunction GetFunction();
|
|
79590
|
+
static void RegisterFunction(BuiltinFunctions &set);
|
|
79591
|
+
};
|
|
79592
|
+
|
|
79727
79593
|
struct CardinalityFun {
|
|
79728
79594
|
static void RegisterFunction(BuiltinFunctions &set);
|
|
79729
79595
|
};
|
|
@@ -84973,9 +84839,6 @@ void DateSubFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
84973
84839
|
|
|
84974
84840
|
|
|
84975
84841
|
|
|
84976
|
-
// TODO date_trunc function should also handle interval data type when it is implemented. See
|
|
84977
|
-
// https://www.postgresql.org/docs/9.1/functions-datetime.html
|
|
84978
|
-
|
|
84979
84842
|
namespace duckdb {
|
|
84980
84843
|
|
|
84981
84844
|
struct DateTrunc {
|
|
@@ -85186,6 +85049,101 @@ timestamp_t DateTrunc::MicrosecondOperator::Operation(date_t input) {
|
|
|
85186
85049
|
return DayOperator::Operation<date_t, timestamp_t>(input);
|
|
85187
85050
|
}
|
|
85188
85051
|
|
|
85052
|
+
// INTERVAL specialisations
|
|
85053
|
+
template <>
|
|
85054
|
+
interval_t DateTrunc::MillenniumOperator::Operation(interval_t input) {
|
|
85055
|
+
input.days = 0;
|
|
85056
|
+
input.micros = 0;
|
|
85057
|
+
input.months = (input.months / Interval::MONTHS_PER_MILLENIUM) * Interval::MONTHS_PER_MILLENIUM;
|
|
85058
|
+
return input;
|
|
85059
|
+
}
|
|
85060
|
+
|
|
85061
|
+
template <>
|
|
85062
|
+
interval_t DateTrunc::CenturyOperator::Operation(interval_t input) {
|
|
85063
|
+
input.days = 0;
|
|
85064
|
+
input.micros = 0;
|
|
85065
|
+
input.months = (input.months / Interval::MONTHS_PER_CENTURY) * Interval::MONTHS_PER_CENTURY;
|
|
85066
|
+
return input;
|
|
85067
|
+
}
|
|
85068
|
+
|
|
85069
|
+
template <>
|
|
85070
|
+
interval_t DateTrunc::DecadeOperator::Operation(interval_t input) {
|
|
85071
|
+
input.days = 0;
|
|
85072
|
+
input.micros = 0;
|
|
85073
|
+
input.months = (input.months / Interval::MONTHS_PER_DECADE) * Interval::MONTHS_PER_DECADE;
|
|
85074
|
+
return input;
|
|
85075
|
+
}
|
|
85076
|
+
|
|
85077
|
+
template <>
|
|
85078
|
+
interval_t DateTrunc::YearOperator::Operation(interval_t input) {
|
|
85079
|
+
input.days = 0;
|
|
85080
|
+
input.micros = 0;
|
|
85081
|
+
input.months = (input.months / Interval::MONTHS_PER_YEAR) * Interval::MONTHS_PER_YEAR;
|
|
85082
|
+
return input;
|
|
85083
|
+
}
|
|
85084
|
+
|
|
85085
|
+
template <>
|
|
85086
|
+
interval_t DateTrunc::QuarterOperator::Operation(interval_t input) {
|
|
85087
|
+
input.days = 0;
|
|
85088
|
+
input.micros = 0;
|
|
85089
|
+
input.months = (input.months / Interval::MONTHS_PER_QUARTER) * Interval::MONTHS_PER_QUARTER;
|
|
85090
|
+
return input;
|
|
85091
|
+
}
|
|
85092
|
+
|
|
85093
|
+
template <>
|
|
85094
|
+
interval_t DateTrunc::MonthOperator::Operation(interval_t input) {
|
|
85095
|
+
input.days = 0;
|
|
85096
|
+
input.micros = 0;
|
|
85097
|
+
return input;
|
|
85098
|
+
}
|
|
85099
|
+
|
|
85100
|
+
template <>
|
|
85101
|
+
interval_t DateTrunc::WeekOperator::Operation(interval_t input) {
|
|
85102
|
+
input.micros = 0;
|
|
85103
|
+
input.days = (input.days / Interval::DAYS_PER_WEEK) * Interval::DAYS_PER_WEEK;
|
|
85104
|
+
return input;
|
|
85105
|
+
}
|
|
85106
|
+
|
|
85107
|
+
template <>
|
|
85108
|
+
interval_t DateTrunc::ISOYearOperator::Operation(interval_t input) {
|
|
85109
|
+
return YearOperator::Operation<interval_t, interval_t>(input);
|
|
85110
|
+
}
|
|
85111
|
+
|
|
85112
|
+
template <>
|
|
85113
|
+
interval_t DateTrunc::DayOperator::Operation(interval_t input) {
|
|
85114
|
+
input.micros = 0;
|
|
85115
|
+
return input;
|
|
85116
|
+
}
|
|
85117
|
+
|
|
85118
|
+
template <>
|
|
85119
|
+
interval_t DateTrunc::HourOperator::Operation(interval_t input) {
|
|
85120
|
+
input.micros = (input.micros / Interval::MICROS_PER_HOUR) * Interval::MICROS_PER_HOUR;
|
|
85121
|
+
return input;
|
|
85122
|
+
}
|
|
85123
|
+
|
|
85124
|
+
template <>
|
|
85125
|
+
interval_t DateTrunc::MinuteOperator::Operation(interval_t input) {
|
|
85126
|
+
input.micros = (input.micros / Interval::MICROS_PER_MINUTE) * Interval::MICROS_PER_MINUTE;
|
|
85127
|
+
return input;
|
|
85128
|
+
}
|
|
85129
|
+
|
|
85130
|
+
template <>
|
|
85131
|
+
interval_t DateTrunc::SecondOperator::Operation(interval_t input) {
|
|
85132
|
+
input.micros = (input.micros / Interval::MICROS_PER_SEC) * Interval::MICROS_PER_SEC;
|
|
85133
|
+
return input;
|
|
85134
|
+
}
|
|
85135
|
+
|
|
85136
|
+
template <>
|
|
85137
|
+
interval_t DateTrunc::MillisecondOperator::Operation(interval_t input) {
|
|
85138
|
+
input.micros = (input.micros / Interval::MICROS_PER_MSEC) * Interval::MICROS_PER_MSEC;
|
|
85139
|
+
return input;
|
|
85140
|
+
}
|
|
85141
|
+
|
|
85142
|
+
template <>
|
|
85143
|
+
interval_t DateTrunc::MicrosecondOperator::Operation(interval_t input) {
|
|
85144
|
+
return input;
|
|
85145
|
+
}
|
|
85146
|
+
|
|
85189
85147
|
template <class TA, class TR>
|
|
85190
85148
|
static TR TruncateElement(DatePartSpecifier type, TA element) {
|
|
85191
85149
|
switch (type) {
|
|
@@ -85289,7 +85247,7 @@ static void DateTruncUnaryExecutor(DatePartSpecifier type, Vector &left, Vector
|
|
|
85289
85247
|
}
|
|
85290
85248
|
}
|
|
85291
85249
|
|
|
85292
|
-
template <typename
|
|
85250
|
+
template <typename TA, typename TR>
|
|
85293
85251
|
static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
85294
85252
|
D_ASSERT(args.ColumnCount() == 2);
|
|
85295
85253
|
auto &part_arg = args.data[0];
|
|
@@ -85302,20 +85260,22 @@ static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
|
85302
85260
|
ConstantVector::SetNull(result, true);
|
|
85303
85261
|
} else {
|
|
85304
85262
|
const auto type = GetDatePartSpecifier(ConstantVector::GetData<string_t>(part_arg)->GetString());
|
|
85305
|
-
DateTruncUnaryExecutor<
|
|
85263
|
+
DateTruncUnaryExecutor<TA, TR>(type, date_arg, result, args.size());
|
|
85306
85264
|
}
|
|
85307
85265
|
} else {
|
|
85308
|
-
BinaryExecutor::ExecuteStandard<string_t,
|
|
85309
|
-
|
|
85266
|
+
BinaryExecutor::ExecuteStandard<string_t, TA, TR, DateTruncBinaryOperator>(part_arg, date_arg, result,
|
|
85267
|
+
args.size());
|
|
85310
85268
|
}
|
|
85311
85269
|
}
|
|
85312
85270
|
|
|
85313
85271
|
void DateTruncFun::RegisterFunction(BuiltinFunctions &set) {
|
|
85314
85272
|
ScalarFunctionSet date_trunc("date_trunc");
|
|
85315
85273
|
date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP,
|
|
85316
|
-
DateTruncFunction<timestamp_t>));
|
|
85317
|
-
date_trunc.AddFunction(
|
|
85318
|
-
|
|
85274
|
+
DateTruncFunction<timestamp_t, timestamp_t>));
|
|
85275
|
+
date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::TIMESTAMP,
|
|
85276
|
+
DateTruncFunction<date_t, timestamp_t>));
|
|
85277
|
+
date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTERVAL}, LogicalType::INTERVAL,
|
|
85278
|
+
DateTruncFunction<interval_t, interval_t>));
|
|
85319
85279
|
set.AddFunction(date_trunc);
|
|
85320
85280
|
date_trunc.name = "datetrunc";
|
|
85321
85281
|
set.AddFunction(date_trunc);
|
|
@@ -85647,7 +85607,8 @@ void StrfTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifi
|
|
|
85647
85607
|
StrTimeFormat::AddFormatSpecifier(move(preceding_literal), specifier);
|
|
85648
85608
|
}
|
|
85649
85609
|
|
|
85650
|
-
idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time
|
|
85610
|
+
idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time, int32_t utc_offset,
|
|
85611
|
+
const char *tz_name) {
|
|
85651
85612
|
switch (specifier) {
|
|
85652
85613
|
case StrTimeSpecifier::FULL_WEEKDAY_NAME:
|
|
85653
85614
|
return Date::DAY_NAMES[Date::ExtractISODayOfTheWeek(date) % 7].GetSize();
|
|
@@ -85664,9 +85625,12 @@ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date
|
|
|
85664
85625
|
return len;
|
|
85665
85626
|
}
|
|
85666
85627
|
case StrTimeSpecifier::UTC_OFFSET:
|
|
85667
|
-
//
|
|
85668
|
-
return 3;
|
|
85628
|
+
// ±HH or ±HH:MM
|
|
85629
|
+
return (utc_offset % 60) ? 6 : 3;
|
|
85669
85630
|
case StrTimeSpecifier::TZ_NAME:
|
|
85631
|
+
if (tz_name) {
|
|
85632
|
+
return strlen(tz_name);
|
|
85633
|
+
}
|
|
85670
85634
|
// empty for now
|
|
85671
85635
|
return 0;
|
|
85672
85636
|
case StrTimeSpecifier::HOUR_24_DECIMAL:
|
|
@@ -85711,11 +85675,11 @@ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date
|
|
|
85711
85675
|
}
|
|
85712
85676
|
|
|
85713
85677
|
//! Returns the total length of the date formatted by this format specifier
|
|
85714
|
-
idx_t StrfTimeFormat::GetLength(date_t date, dtime_t time) {
|
|
85678
|
+
idx_t StrfTimeFormat::GetLength(date_t date, dtime_t time, int32_t utc_offset, const char *tz_name) {
|
|
85715
85679
|
idx_t size = constant_size;
|
|
85716
85680
|
if (!var_length_specifiers.empty()) {
|
|
85717
85681
|
for (auto &specifier : var_length_specifiers) {
|
|
85718
|
-
size += GetSpecifierLength(specifier, date, time);
|
|
85682
|
+
size += GetSpecifierLength(specifier, date, time, utc_offset, tz_name);
|
|
85719
85683
|
}
|
|
85720
85684
|
}
|
|
85721
85685
|
return size;
|
|
@@ -85828,8 +85792,9 @@ char *StrfTimeFormat::WriteDateSpecifier(StrTimeSpecifier specifier, date_t date
|
|
|
85828
85792
|
return target;
|
|
85829
85793
|
}
|
|
85830
85794
|
|
|
85831
|
-
char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], char *
|
|
85832
|
-
|
|
85795
|
+
char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], const char *tz_name,
|
|
85796
|
+
char *target) {
|
|
85797
|
+
// data contains [0] year, [1] month, [2] day, [3] hour, [4] minute, [5] second, [6] msec, [7] utc
|
|
85833
85798
|
switch (specifier) {
|
|
85834
85799
|
case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
|
|
85835
85800
|
target = WritePadded2(target, data[2]);
|
|
@@ -85892,13 +85857,24 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t
|
|
|
85892
85857
|
case StrTimeSpecifier::MILLISECOND_PADDED:
|
|
85893
85858
|
target = WritePadded3(target, data[6] / 1000);
|
|
85894
85859
|
break;
|
|
85895
|
-
case StrTimeSpecifier::UTC_OFFSET:
|
|
85896
|
-
*target++ = '+';
|
|
85897
|
-
|
|
85898
|
-
|
|
85860
|
+
case StrTimeSpecifier::UTC_OFFSET: {
|
|
85861
|
+
*target++ = (data[7] < 0) ? '-' : '+';
|
|
85862
|
+
|
|
85863
|
+
auto offset = abs(data[7]);
|
|
85864
|
+
auto offset_hours = offset / Interval::MINS_PER_HOUR;
|
|
85865
|
+
auto offset_minutes = offset % Interval::MINS_PER_HOUR;
|
|
85866
|
+
target = WritePadded2(target, offset_hours);
|
|
85867
|
+
if (offset_minutes) {
|
|
85868
|
+
*target++ = ':';
|
|
85869
|
+
target = WritePadded2(target, offset_minutes);
|
|
85870
|
+
}
|
|
85899
85871
|
break;
|
|
85872
|
+
}
|
|
85900
85873
|
case StrTimeSpecifier::TZ_NAME:
|
|
85901
|
-
|
|
85874
|
+
if (tz_name) {
|
|
85875
|
+
strcpy(target, tz_name);
|
|
85876
|
+
target += strlen(tz_name);
|
|
85877
|
+
}
|
|
85902
85878
|
break;
|
|
85903
85879
|
case StrTimeSpecifier::DAY_OF_MONTH: {
|
|
85904
85880
|
target = Write2(target, data[2] % 100);
|
|
@@ -85938,7 +85914,7 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t
|
|
|
85938
85914
|
return target;
|
|
85939
85915
|
}
|
|
85940
85916
|
|
|
85941
|
-
void StrfTimeFormat::FormatString(date_t date, int32_t data[
|
|
85917
|
+
void StrfTimeFormat::FormatString(date_t date, int32_t data[8], const char *tz_name, char *target) {
|
|
85942
85918
|
D_ASSERT(specifiers.size() + 1 == literals.size());
|
|
85943
85919
|
idx_t i;
|
|
85944
85920
|
for (i = 0; i < specifiers.size(); i++) {
|
|
@@ -85949,7 +85925,7 @@ void StrfTimeFormat::FormatString(date_t date, int32_t data[7], char *target) {
|
|
|
85949
85925
|
if (is_date_specifier[i]) {
|
|
85950
85926
|
target = WriteDateSpecifier(specifiers[i], date, target);
|
|
85951
85927
|
} else {
|
|
85952
|
-
target = WriteStandardSpecifier(specifiers[i], data, target);
|
|
85928
|
+
target = WriteStandardSpecifier(specifiers[i], data, tz_name, target);
|
|
85953
85929
|
}
|
|
85954
85930
|
}
|
|
85955
85931
|
// copy the final literal into the target
|
|
@@ -85957,11 +85933,12 @@ void StrfTimeFormat::FormatString(date_t date, int32_t data[7], char *target) {
|
|
|
85957
85933
|
}
|
|
85958
85934
|
|
|
85959
85935
|
void StrfTimeFormat::FormatString(date_t date, dtime_t time, char *target) {
|
|
85960
|
-
int32_t data[
|
|
85936
|
+
int32_t data[8]; // year, month, day, hour, min, sec, µs, offset
|
|
85961
85937
|
Date::Convert(date, data[0], data[1], data[2]);
|
|
85962
85938
|
Time::Convert(time, data[3], data[4], data[5], data[6]);
|
|
85939
|
+
data[7] = 0;
|
|
85963
85940
|
|
|
85964
|
-
FormatString(date, data, target);
|
|
85941
|
+
FormatString(date, data, nullptr, target);
|
|
85965
85942
|
}
|
|
85966
85943
|
|
|
85967
85944
|
string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
|
|
@@ -85971,7 +85948,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
|
|
|
85971
85948
|
auto date = Timestamp::GetDate(timestamp);
|
|
85972
85949
|
auto time = Timestamp::GetTime(timestamp);
|
|
85973
85950
|
|
|
85974
|
-
auto len = format.GetLength(date, time);
|
|
85951
|
+
auto len = format.GetLength(date, time, 0, nullptr);
|
|
85975
85952
|
auto result = unique_ptr<char[]>(new char[len]);
|
|
85976
85953
|
format.FormatString(date, time, result.get());
|
|
85977
85954
|
return string(result.get(), len);
|
|
@@ -86187,7 +86164,7 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
|
|
|
86187
86164
|
}
|
|
86188
86165
|
UnaryExecutor::Execute<date_t, string_t>(args.data[REVERSED ? 1 : 0], result, args.size(), [&](date_t input) {
|
|
86189
86166
|
dtime_t time(0);
|
|
86190
|
-
idx_t len = info.format.GetLength(input, time);
|
|
86167
|
+
idx_t len = info.format.GetLength(input, time, 0, nullptr);
|
|
86191
86168
|
string_t target = StringVector::EmptyString(result, len);
|
|
86192
86169
|
info.format.FormatString(input, time, target.GetDataWriteable());
|
|
86193
86170
|
target.Finalize();
|
|
@@ -86211,7 +86188,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
|
|
|
86211
86188
|
date_t date;
|
|
86212
86189
|
dtime_t time;
|
|
86213
86190
|
Timestamp::Convert(input, date, time);
|
|
86214
|
-
idx_t len = info.format.GetLength(date, time);
|
|
86191
|
+
idx_t len = info.format.GetLength(date, time, 0, nullptr);
|
|
86215
86192
|
string_t target = StringVector::EmptyString(result, len);
|
|
86216
86193
|
info.format.FormatString(date, time, target.GetDataWriteable());
|
|
86217
86194
|
target.Finalize();
|
|
@@ -86319,6 +86296,7 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
|
|
|
86319
86296
|
result_data[4] = 0;
|
|
86320
86297
|
result_data[5] = 0;
|
|
86321
86298
|
result_data[6] = 0;
|
|
86299
|
+
result_data[7] = 0;
|
|
86322
86300
|
|
|
86323
86301
|
auto data = str.GetDataUnsafe();
|
|
86324
86302
|
idx_t size = str.GetSize();
|
|
@@ -86591,8 +86569,7 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
|
|
|
86591
86569
|
error_position = pos;
|
|
86592
86570
|
return false;
|
|
86593
86571
|
}
|
|
86594
|
-
result_data[
|
|
86595
|
-
result_data[4] -= minute_offset;
|
|
86572
|
+
result_data[7] = hour_offset * Interval::MINS_PER_HOUR + minute_offset;
|
|
86596
86573
|
break;
|
|
86597
86574
|
}
|
|
86598
86575
|
case StrTimeSpecifier::TZ_NAME: {
|
|
@@ -86600,11 +86577,20 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
|
|
|
86600
86577
|
while (pos < size && StringUtil::CharacterIsSpace(data[pos])) {
|
|
86601
86578
|
pos++;
|
|
86602
86579
|
}
|
|
86580
|
+
const auto tz_begin = data + pos;
|
|
86603
86581
|
// stop when we encounter a space or the end of the string
|
|
86604
86582
|
while (pos < size && !StringUtil::CharacterIsSpace(data[pos])) {
|
|
86605
86583
|
pos++;
|
|
86606
86584
|
}
|
|
86607
|
-
|
|
86585
|
+
const auto tz_end = data + pos;
|
|
86586
|
+
// Can't fully validate without a list - caller's responsibility.
|
|
86587
|
+
// But tz must not be empty.
|
|
86588
|
+
if (tz_end == tz_begin) {
|
|
86589
|
+
error_message = "Empty Time Zone name";
|
|
86590
|
+
error_position = tz_begin - data;
|
|
86591
|
+
return false;
|
|
86592
|
+
}
|
|
86593
|
+
result.tz.assign(tz_begin, tz_end);
|
|
86608
86594
|
break;
|
|
86609
86595
|
}
|
|
86610
86596
|
default:
|
|
@@ -86701,6 +86687,9 @@ static unique_ptr<FunctionData> StrpTimeBindFunction(ClientContext &context, Sca
|
|
|
86701
86687
|
if (!error.empty()) {
|
|
86702
86688
|
throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
|
|
86703
86689
|
}
|
|
86690
|
+
if (format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET)) {
|
|
86691
|
+
bound_function.return_type = LogicalType::TIMESTAMP_TZ;
|
|
86692
|
+
}
|
|
86704
86693
|
}
|
|
86705
86694
|
return make_unique<StrpTimeBindData>(format);
|
|
86706
86695
|
}
|
|
@@ -86732,7 +86721,9 @@ date_t StrpTimeFormat::ParseResult::ToDate() {
|
|
|
86732
86721
|
|
|
86733
86722
|
timestamp_t StrpTimeFormat::ParseResult::ToTimestamp() {
|
|
86734
86723
|
date_t date = Date::FromDate(data[0], data[1], data[2]);
|
|
86735
|
-
|
|
86724
|
+
const auto hour_offset = data[7] / Interval::MINS_PER_HOUR;
|
|
86725
|
+
const auto mins_offset = data[7] % Interval::MINS_PER_HOUR;
|
|
86726
|
+
dtime_t time = Time::FromTime(data[3] - hour_offset, data[4] - mins_offset, data[5], data[6]);
|
|
86736
86727
|
return Timestamp::FromDatetime(date, time);
|
|
86737
86728
|
}
|
|
86738
86729
|
|
|
@@ -88774,6 +88765,371 @@ void ListExtractFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
88774
88765
|
|
|
88775
88766
|
|
|
88776
88767
|
|
|
88768
|
+
namespace duckdb {
|
|
88769
|
+
|
|
88770
|
+
struct ListSortBindData : public FunctionData {
|
|
88771
|
+
ListSortBindData(OrderType order_type_p, OrderByNullType null_order_p, LogicalType &return_type_p,
|
|
88772
|
+
LogicalType &child_type_p, ClientContext &context_p);
|
|
88773
|
+
~ListSortBindData() override;
|
|
88774
|
+
|
|
88775
|
+
OrderType order_type;
|
|
88776
|
+
OrderByNullType null_order;
|
|
88777
|
+
LogicalType return_type;
|
|
88778
|
+
LogicalType child_type;
|
|
88779
|
+
|
|
88780
|
+
vector<LogicalType> types;
|
|
88781
|
+
vector<LogicalType> payload_types;
|
|
88782
|
+
|
|
88783
|
+
ClientContext &context;
|
|
88784
|
+
unique_ptr<GlobalSortState> global_sort_state;
|
|
88785
|
+
RowLayout payload_layout;
|
|
88786
|
+
vector<BoundOrderByNode> orders;
|
|
88787
|
+
|
|
88788
|
+
unique_ptr<FunctionData> Copy() override;
|
|
88789
|
+
};
|
|
88790
|
+
|
|
88791
|
+
ListSortBindData::ListSortBindData(OrderType order_type_p, OrderByNullType null_order_p, LogicalType &return_type_p,
|
|
88792
|
+
LogicalType &child_type_p, ClientContext &context_p)
|
|
88793
|
+
: order_type(order_type_p), null_order(null_order_p), return_type(return_type_p), child_type(child_type_p),
|
|
88794
|
+
context(context_p) {
|
|
88795
|
+
|
|
88796
|
+
// get the vector types
|
|
88797
|
+
types.emplace_back(LogicalType::USMALLINT);
|
|
88798
|
+
types.emplace_back(child_type);
|
|
88799
|
+
D_ASSERT(types.size() == 2);
|
|
88800
|
+
|
|
88801
|
+
// get the payload types
|
|
88802
|
+
payload_types.emplace_back(LogicalType::UINTEGER);
|
|
88803
|
+
D_ASSERT(payload_types.size() == 1);
|
|
88804
|
+
|
|
88805
|
+
// initialize the payload layout
|
|
88806
|
+
payload_layout.Initialize(payload_types);
|
|
88807
|
+
|
|
88808
|
+
// get the BoundOrderByNode
|
|
88809
|
+
auto idx_col_expr = make_unique_base<Expression, BoundReferenceExpression>(LogicalType::USMALLINT, 0);
|
|
88810
|
+
auto lists_col_expr = make_unique_base<Expression, BoundReferenceExpression>(child_type, 1);
|
|
88811
|
+
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, move(idx_col_expr));
|
|
88812
|
+
orders.emplace_back(order_type, null_order, move(lists_col_expr));
|
|
88813
|
+
}
|
|
88814
|
+
|
|
88815
|
+
unique_ptr<FunctionData> ListSortBindData::Copy() {
|
|
88816
|
+
return make_unique<ListSortBindData>(order_type, null_order, return_type, child_type, context);
|
|
88817
|
+
}
|
|
88818
|
+
|
|
88819
|
+
ListSortBindData::~ListSortBindData() {
|
|
88820
|
+
}
|
|
88821
|
+
|
|
88822
|
+
// create the key_chunk and the payload_chunk and sink them into the local_sort_state
|
|
88823
|
+
void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_lists_indices, vector<LogicalType> &types,
|
|
88824
|
+
vector<LogicalType> &payload_types, Vector &payload_vector, LocalSortState &local_sort_state,
|
|
88825
|
+
bool &data_to_sort, Vector &lists_indices) {
|
|
88826
|
+
|
|
88827
|
+
// slice the child vector
|
|
88828
|
+
Vector slice(*child_vector, sel, offset_lists_indices);
|
|
88829
|
+
|
|
88830
|
+
// initialize and fill key_chunk
|
|
88831
|
+
DataChunk key_chunk;
|
|
88832
|
+
key_chunk.InitializeEmpty(types);
|
|
88833
|
+
key_chunk.data[0].Reference(lists_indices);
|
|
88834
|
+
key_chunk.data[1].Reference(slice);
|
|
88835
|
+
key_chunk.SetCardinality(offset_lists_indices);
|
|
88836
|
+
|
|
88837
|
+
// initialize and fill key_chunk and payload_chunk
|
|
88838
|
+
DataChunk payload_chunk;
|
|
88839
|
+
payload_chunk.InitializeEmpty(payload_types);
|
|
88840
|
+
payload_chunk.data[0].Reference(payload_vector);
|
|
88841
|
+
payload_chunk.SetCardinality(offset_lists_indices);
|
|
88842
|
+
|
|
88843
|
+
// sink
|
|
88844
|
+
local_sort_state.SinkChunk(key_chunk, payload_chunk);
|
|
88845
|
+
data_to_sort = true;
|
|
88846
|
+
}
|
|
88847
|
+
|
|
88848
|
+
static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
88849
|
+
|
|
88850
|
+
D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
|
|
88851
|
+
auto count = args.size();
|
|
88852
|
+
Vector &lists = args.data[0];
|
|
88853
|
+
|
|
88854
|
+
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
88855
|
+
auto &result_validity = FlatVector::Validity(result);
|
|
88856
|
+
|
|
88857
|
+
if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
|
|
88858
|
+
result_validity.SetInvalid(0);
|
|
88859
|
+
return;
|
|
88860
|
+
}
|
|
88861
|
+
|
|
88862
|
+
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
88863
|
+
auto &info = (ListSortBindData &)*func_expr.bind_info;
|
|
88864
|
+
|
|
88865
|
+
// initialize the global and local sorting state
|
|
88866
|
+
auto &buffer_manager = BufferManager::GetBufferManager(info.context);
|
|
88867
|
+
info.global_sort_state = make_unique<GlobalSortState>(buffer_manager, info.orders, info.payload_layout);
|
|
88868
|
+
auto &global_sort_state = *info.global_sort_state;
|
|
88869
|
+
LocalSortState local_sort_state;
|
|
88870
|
+
local_sort_state.Initialize(global_sort_state, buffer_manager);
|
|
88871
|
+
|
|
88872
|
+
// get the child vector
|
|
88873
|
+
auto lists_size = ListVector::GetListSize(lists);
|
|
88874
|
+
auto &child_vector = ListVector::GetEntry(lists);
|
|
88875
|
+
VectorData child_data;
|
|
88876
|
+
child_vector.Orrify(lists_size, child_data);
|
|
88877
|
+
|
|
88878
|
+
// get the lists data
|
|
88879
|
+
VectorData lists_data;
|
|
88880
|
+
lists.Orrify(count, lists_data);
|
|
88881
|
+
auto list_entries = (list_entry_t *)lists_data.data;
|
|
88882
|
+
|
|
88883
|
+
// create the lists_indices vector, this contains an element for each list's entry,
|
|
88884
|
+
// the element corresponds to the list's index, e.g. for [1, 2, 4], [5, 4]
|
|
88885
|
+
// lists_indices contains [0, 0, 0, 1, 1]
|
|
88886
|
+
Vector lists_indices(LogicalType::USMALLINT);
|
|
88887
|
+
auto lists_indices_data = FlatVector::GetData<uint16_t>(lists_indices);
|
|
88888
|
+
|
|
88889
|
+
// create the payload_vector, this is just a vector containing incrementing integers
|
|
88890
|
+
// this will later be used as the 'new' selection vector of the child_vector, after
|
|
88891
|
+
// rearranging the payload according to the sorting order
|
|
88892
|
+
Vector payload_vector(LogicalType::UINTEGER);
|
|
88893
|
+
auto payload_vector_data = FlatVector::GetData<uint32_t>(payload_vector);
|
|
88894
|
+
|
|
88895
|
+
// selection vector pointing to the data of the child vector,
|
|
88896
|
+
// used for slicing the child_vector correctly
|
|
88897
|
+
SelectionVector sel(STANDARD_VECTOR_SIZE);
|
|
88898
|
+
|
|
88899
|
+
idx_t offset_lists_indices = 0;
|
|
88900
|
+
uint32_t incr_payload_count = 0;
|
|
88901
|
+
bool data_to_sort = false;
|
|
88902
|
+
|
|
88903
|
+
for (idx_t i = 0; i < count; i++) {
|
|
88904
|
+
|
|
88905
|
+
auto lists_index = lists_data.sel->get_index(i);
|
|
88906
|
+
const auto &list_entry = list_entries[lists_index];
|
|
88907
|
+
|
|
88908
|
+
// nothing to do for this list
|
|
88909
|
+
if (!lists_data.validity.RowIsValid(lists_index)) {
|
|
88910
|
+
result_validity.SetInvalid(i);
|
|
88911
|
+
continue;
|
|
88912
|
+
}
|
|
88913
|
+
|
|
88914
|
+
// empty list, no sorting required
|
|
88915
|
+
if (list_entry.length == 0) {
|
|
88916
|
+
continue;
|
|
88917
|
+
}
|
|
88918
|
+
|
|
88919
|
+
for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
|
|
88920
|
+
|
|
88921
|
+
// lists_indices vector is full, sink
|
|
88922
|
+
if (offset_lists_indices == STANDARD_VECTOR_SIZE) {
|
|
88923
|
+
SinkDataChunk(&child_vector, sel, offset_lists_indices, info.types, info.payload_types, payload_vector,
|
|
88924
|
+
local_sort_state, data_to_sort, lists_indices);
|
|
88925
|
+
offset_lists_indices = 0;
|
|
88926
|
+
}
|
|
88927
|
+
|
|
88928
|
+
auto source_idx = child_data.sel->get_index(list_entry.offset + child_idx);
|
|
88929
|
+
sel.set_index(offset_lists_indices, source_idx);
|
|
88930
|
+
lists_indices_data[offset_lists_indices] = (uint32_t)i;
|
|
88931
|
+
payload_vector_data[offset_lists_indices] = incr_payload_count;
|
|
88932
|
+
offset_lists_indices++;
|
|
88933
|
+
incr_payload_count++;
|
|
88934
|
+
}
|
|
88935
|
+
}
|
|
88936
|
+
|
|
88937
|
+
if (offset_lists_indices != 0) {
|
|
88938
|
+
SinkDataChunk(&child_vector, sel, offset_lists_indices, info.types, info.payload_types, payload_vector,
|
|
88939
|
+
local_sort_state, data_to_sort, lists_indices);
|
|
88940
|
+
}
|
|
88941
|
+
|
|
88942
|
+
if (data_to_sort) {
|
|
88943
|
+
|
|
88944
|
+
// add local state to global state, which sorts the data
|
|
88945
|
+
global_sort_state.AddLocalState(local_sort_state);
|
|
88946
|
+
global_sort_state.PrepareMergePhase();
|
|
88947
|
+
|
|
88948
|
+
// selection vector that is to be filled with the 'sorted' payload
|
|
88949
|
+
SelectionVector sel_sorted(incr_payload_count);
|
|
88950
|
+
idx_t sel_sorted_idx = 0;
|
|
88951
|
+
|
|
88952
|
+
// scan the sorted row data
|
|
88953
|
+
PayloadScanner scanner(*global_sort_state.sorted_blocks[0]->payload_data, global_sort_state);
|
|
88954
|
+
for (;;) {
|
|
88955
|
+
DataChunk result_chunk;
|
|
88956
|
+
result_chunk.Initialize(info.payload_types);
|
|
88957
|
+
result_chunk.SetCardinality(0);
|
|
88958
|
+
scanner.Scan(result_chunk);
|
|
88959
|
+
if (result_chunk.size() == 0) {
|
|
88960
|
+
break;
|
|
88961
|
+
}
|
|
88962
|
+
|
|
88963
|
+
// construct the selection vector with the new order from the result vectors
|
|
88964
|
+
Vector result_vector(result_chunk.data[0]);
|
|
88965
|
+
auto result_data = FlatVector::GetData<uint32_t>(result_vector);
|
|
88966
|
+
auto row_count = result_chunk.size();
|
|
88967
|
+
|
|
88968
|
+
for (idx_t i = 0; i < row_count; i++) {
|
|
88969
|
+
sel_sorted.set_index(sel_sorted_idx, result_data[i]);
|
|
88970
|
+
sel_sorted_idx++;
|
|
88971
|
+
}
|
|
88972
|
+
}
|
|
88973
|
+
|
|
88974
|
+
D_ASSERT(sel_sorted_idx == incr_payload_count);
|
|
88975
|
+
child_vector.Slice(sel_sorted, sel_sorted_idx);
|
|
88976
|
+
child_vector.Normalify(sel_sorted_idx);
|
|
88977
|
+
}
|
|
88978
|
+
|
|
88979
|
+
result.Reference(lists);
|
|
88980
|
+
}
|
|
88981
|
+
|
|
88982
|
+
static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunction &bound_function,
|
|
88983
|
+
vector<unique_ptr<Expression>> &arguments, OrderType &order,
|
|
88984
|
+
OrderByNullType &null_order) {
|
|
88985
|
+
|
|
88986
|
+
if (arguments[0]->return_type.id() == LogicalTypeId::SQLNULL) {
|
|
88987
|
+
bound_function.arguments[0] = LogicalType::SQLNULL;
|
|
88988
|
+
bound_function.return_type = LogicalType::SQLNULL;
|
|
88989
|
+
return make_unique<VariableReturnBindData>(bound_function.return_type);
|
|
88990
|
+
}
|
|
88991
|
+
|
|
88992
|
+
bound_function.arguments[0] = arguments[0]->return_type;
|
|
88993
|
+
bound_function.return_type = arguments[0]->return_type;
|
|
88994
|
+
auto child_type = ListType::GetChildType(arguments[0]->return_type);
|
|
88995
|
+
|
|
88996
|
+
return make_unique<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
|
|
88997
|
+
}
|
|
88998
|
+
|
|
88999
|
+
OrderByNullType GetNullOrder(vector<unique_ptr<Expression>> &arguments, idx_t idx) {
|
|
89000
|
+
|
|
89001
|
+
if (!arguments[idx]->IsFoldable()) {
|
|
89002
|
+
throw InvalidInputException("Null sorting order must be a constant");
|
|
89003
|
+
}
|
|
89004
|
+
Value null_order_value = ExpressionExecutor::EvaluateScalar(*arguments[idx]);
|
|
89005
|
+
auto null_order_name = null_order_value.ToString();
|
|
89006
|
+
std::transform(null_order_name.begin(), null_order_name.end(), null_order_name.begin(), ::toupper);
|
|
89007
|
+
if (null_order_name != "NULLS FIRST" && null_order_name != "NULLS LAST") {
|
|
89008
|
+
throw InvalidInputException("Null sorting order must be either NULLS FIRST or NULLS LAST");
|
|
89009
|
+
}
|
|
89010
|
+
|
|
89011
|
+
if (null_order_name == "NULLS LAST") {
|
|
89012
|
+
return OrderByNullType::NULLS_LAST;
|
|
89013
|
+
}
|
|
89014
|
+
return OrderByNullType::NULLS_FIRST;
|
|
89015
|
+
}
|
|
89016
|
+
|
|
89017
|
+
static unique_ptr<FunctionData> ListNormalSortBind(ClientContext &context, ScalarFunction &bound_function,
|
|
89018
|
+
vector<unique_ptr<Expression>> &arguments) {
|
|
89019
|
+
|
|
89020
|
+
D_ASSERT(bound_function.arguments.size() >= 1 && bound_function.arguments.size() <= 3);
|
|
89021
|
+
D_ASSERT(arguments.size() >= 1 && arguments.size() <= 3);
|
|
89022
|
+
|
|
89023
|
+
// set default values
|
|
89024
|
+
auto &config = DBConfig::GetConfig(context);
|
|
89025
|
+
auto order = config.default_order_type;
|
|
89026
|
+
auto null_order = config.default_null_order;
|
|
89027
|
+
|
|
89028
|
+
// get the sorting order
|
|
89029
|
+
if (arguments.size() >= 2) {
|
|
89030
|
+
|
|
89031
|
+
if (!arguments[1]->IsFoldable()) {
|
|
89032
|
+
throw InvalidInputException("Sorting order must be a constant");
|
|
89033
|
+
}
|
|
89034
|
+
Value order_value = ExpressionExecutor::EvaluateScalar(*arguments[1]);
|
|
89035
|
+
auto order_name = order_value.ToString();
|
|
89036
|
+
std::transform(order_name.begin(), order_name.end(), order_name.begin(), ::toupper);
|
|
89037
|
+
if (order_name != "DESC" && order_name != "ASC") {
|
|
89038
|
+
throw InvalidInputException("Sorting order must be either ASC or DESC");
|
|
89039
|
+
}
|
|
89040
|
+
if (order_name == "DESC") {
|
|
89041
|
+
order = OrderType::DESCENDING;
|
|
89042
|
+
} else {
|
|
89043
|
+
order = OrderType::ASCENDING;
|
|
89044
|
+
}
|
|
89045
|
+
}
|
|
89046
|
+
|
|
89047
|
+
// get the null sorting order
|
|
89048
|
+
if (arguments.size() == 3) {
|
|
89049
|
+
null_order = GetNullOrder(arguments, 2);
|
|
89050
|
+
}
|
|
89051
|
+
|
|
89052
|
+
return ListSortBind(context, bound_function, arguments, order, null_order);
|
|
89053
|
+
}
|
|
89054
|
+
|
|
89055
|
+
static unique_ptr<FunctionData> ListReverseSortBind(ClientContext &context, ScalarFunction &bound_function,
|
|
89056
|
+
vector<unique_ptr<Expression>> &arguments) {
|
|
89057
|
+
|
|
89058
|
+
D_ASSERT(bound_function.arguments.size() == 1 || bound_function.arguments.size() == 2);
|
|
89059
|
+
D_ASSERT(arguments.size() == 1 || arguments.size() == 2);
|
|
89060
|
+
|
|
89061
|
+
// set (reverse) default values
|
|
89062
|
+
auto &config = DBConfig::GetConfig(context);
|
|
89063
|
+
auto order = (config.default_order_type == OrderType::ASCENDING) ? OrderType::DESCENDING : OrderType::ASCENDING;
|
|
89064
|
+
auto null_order = config.default_null_order;
|
|
89065
|
+
|
|
89066
|
+
// get the null sorting order
|
|
89067
|
+
if (arguments.size() == 2) {
|
|
89068
|
+
null_order = GetNullOrder(arguments, 1);
|
|
89069
|
+
}
|
|
89070
|
+
|
|
89071
|
+
return ListSortBind(context, bound_function, arguments, order, null_order);
|
|
89072
|
+
}
|
|
89073
|
+
|
|
89074
|
+
void ListSortFun::RegisterFunction(BuiltinFunctions &set) {
|
|
89075
|
+
|
|
89076
|
+
// normal sort
|
|
89077
|
+
|
|
89078
|
+
// one parameter: list
|
|
89079
|
+
ScalarFunction sort({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY), ListSortFunction,
|
|
89080
|
+
false, false, ListNormalSortBind);
|
|
89081
|
+
|
|
89082
|
+
// two parameters: list, order
|
|
89083
|
+
ScalarFunction sort_order({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR},
|
|
89084
|
+
LogicalType::LIST(LogicalType::ANY), ListSortFunction, false, false, ListNormalSortBind);
|
|
89085
|
+
|
|
89086
|
+
// three parameters: list, order, null order
|
|
89087
|
+
ScalarFunction sort_orders({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR, LogicalType::VARCHAR},
|
|
89088
|
+
LogicalType::LIST(LogicalType::ANY), ListSortFunction, false, false, ListNormalSortBind);
|
|
89089
|
+
|
|
89090
|
+
ScalarFunctionSet list_sort("list_sort");
|
|
89091
|
+
list_sort.AddFunction(sort);
|
|
89092
|
+
list_sort.AddFunction(sort_order);
|
|
89093
|
+
list_sort.AddFunction(sort_orders);
|
|
89094
|
+
set.AddFunction(list_sort);
|
|
89095
|
+
|
|
89096
|
+
ScalarFunctionSet array_sort("array_sort");
|
|
89097
|
+
array_sort.AddFunction(sort);
|
|
89098
|
+
array_sort.AddFunction(sort_order);
|
|
89099
|
+
array_sort.AddFunction(sort_orders);
|
|
89100
|
+
set.AddFunction(array_sort);
|
|
89101
|
+
|
|
89102
|
+
// reverse sort
|
|
89103
|
+
|
|
89104
|
+
// one parameter: list
|
|
89105
|
+
ScalarFunction sort_reverse({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
|
|
89106
|
+
ListSortFunction, false, false, ListReverseSortBind);
|
|
89107
|
+
|
|
89108
|
+
// two parameters: list, null order
|
|
89109
|
+
ScalarFunction sort_reverse_null_order({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR},
|
|
89110
|
+
LogicalType::LIST(LogicalType::ANY), ListSortFunction, false, false,
|
|
89111
|
+
ListReverseSortBind);
|
|
89112
|
+
|
|
89113
|
+
ScalarFunctionSet list_reverse_sort("list_reverse_sort");
|
|
89114
|
+
list_reverse_sort.AddFunction(sort_reverse);
|
|
89115
|
+
list_reverse_sort.AddFunction(sort_reverse_null_order);
|
|
89116
|
+
set.AddFunction(list_reverse_sort);
|
|
89117
|
+
|
|
89118
|
+
ScalarFunctionSet array_reverse_sort("array_reverse_sort");
|
|
89119
|
+
array_reverse_sort.AddFunction(sort_reverse);
|
|
89120
|
+
array_reverse_sort.AddFunction(sort_reverse_null_order);
|
|
89121
|
+
set.AddFunction(array_reverse_sort);
|
|
89122
|
+
}
|
|
89123
|
+
|
|
89124
|
+
} // namespace duckdb
|
|
89125
|
+
|
|
89126
|
+
|
|
89127
|
+
|
|
89128
|
+
|
|
89129
|
+
|
|
89130
|
+
|
|
89131
|
+
|
|
89132
|
+
|
|
88777
89133
|
namespace duckdb {
|
|
88778
89134
|
|
|
88779
89135
|
static void ListValueFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
@@ -90668,6 +91024,7 @@ void BuiltinFunctions::RegisterNestedFunctions() {
|
|
|
90668
91024
|
Register<ListAggregateFun>();
|
|
90669
91025
|
Register<ListValueFun>();
|
|
90670
91026
|
Register<ListExtractFun>();
|
|
91027
|
+
Register<ListSortFun>();
|
|
90671
91028
|
Register<ListRangeFun>();
|
|
90672
91029
|
Register<ListFlattenFun>();
|
|
90673
91030
|
Register<MapFun>();
|
|
@@ -98561,6 +98918,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
|
|
|
98561
98918
|
options.skip_rows = ParseInteger(set);
|
|
98562
98919
|
} else if (loption == "max_line_size" || loption == "maximum_line_size") {
|
|
98563
98920
|
options.maximum_line_size = ParseInteger(set);
|
|
98921
|
+
} else if (loption == "ignore_errors") {
|
|
98922
|
+
options.ignore_errors = ParseBoolean(set);
|
|
98564
98923
|
} else {
|
|
98565
98924
|
// unrecognized option in base CSV
|
|
98566
98925
|
return false;
|
|
@@ -103872,44 +104231,74 @@ void BaseAppender::AppendValueInternal(T input) {
|
|
|
103872
104231
|
throw InvalidInputException("Too many appends for chunk!");
|
|
103873
104232
|
}
|
|
103874
104233
|
auto &col = chunk->data[column];
|
|
103875
|
-
switch (col.GetType().
|
|
103876
|
-
case
|
|
104234
|
+
switch (col.GetType().id()) {
|
|
104235
|
+
case LogicalTypeId::BOOLEAN:
|
|
103877
104236
|
AppendValueInternal<T, bool>(col, input);
|
|
103878
104237
|
break;
|
|
103879
|
-
case
|
|
104238
|
+
case LogicalTypeId::UTINYINT:
|
|
103880
104239
|
AppendValueInternal<T, uint8_t>(col, input);
|
|
103881
104240
|
break;
|
|
103882
|
-
case
|
|
104241
|
+
case LogicalTypeId::TINYINT:
|
|
103883
104242
|
AppendValueInternal<T, int8_t>(col, input);
|
|
103884
104243
|
break;
|
|
103885
|
-
case
|
|
104244
|
+
case LogicalTypeId::USMALLINT:
|
|
103886
104245
|
AppendValueInternal<T, uint16_t>(col, input);
|
|
103887
104246
|
break;
|
|
103888
|
-
case
|
|
104247
|
+
case LogicalTypeId::SMALLINT:
|
|
103889
104248
|
AppendValueInternal<T, int16_t>(col, input);
|
|
103890
104249
|
break;
|
|
103891
|
-
case
|
|
104250
|
+
case LogicalTypeId::UINTEGER:
|
|
103892
104251
|
AppendValueInternal<T, uint32_t>(col, input);
|
|
103893
104252
|
break;
|
|
103894
|
-
case
|
|
104253
|
+
case LogicalTypeId::INTEGER:
|
|
103895
104254
|
AppendValueInternal<T, int32_t>(col, input);
|
|
103896
104255
|
break;
|
|
103897
|
-
case
|
|
104256
|
+
case LogicalTypeId::UBIGINT:
|
|
103898
104257
|
AppendValueInternal<T, uint64_t>(col, input);
|
|
103899
104258
|
break;
|
|
103900
|
-
case
|
|
104259
|
+
case LogicalTypeId::BIGINT:
|
|
103901
104260
|
AppendValueInternal<T, int64_t>(col, input);
|
|
103902
104261
|
break;
|
|
103903
|
-
case
|
|
104262
|
+
case LogicalTypeId::HUGEINT:
|
|
103904
104263
|
AppendValueInternal<T, hugeint_t>(col, input);
|
|
103905
104264
|
break;
|
|
103906
|
-
case
|
|
104265
|
+
case LogicalTypeId::FLOAT:
|
|
103907
104266
|
AppendValueInternal<T, float>(col, input);
|
|
103908
104267
|
break;
|
|
103909
|
-
case
|
|
104268
|
+
case LogicalTypeId::DOUBLE:
|
|
103910
104269
|
AppendValueInternal<T, double>(col, input);
|
|
103911
104270
|
break;
|
|
103912
|
-
case
|
|
104271
|
+
case LogicalTypeId::DECIMAL:
|
|
104272
|
+
switch (col.GetType().InternalType()) {
|
|
104273
|
+
case PhysicalType::INT8:
|
|
104274
|
+
AppendValueInternal<T, int8_t>(col, input);
|
|
104275
|
+
break;
|
|
104276
|
+
case PhysicalType::INT16:
|
|
104277
|
+
AppendValueInternal<T, int16_t>(col, input);
|
|
104278
|
+
break;
|
|
104279
|
+
case PhysicalType::INT32:
|
|
104280
|
+
AppendValueInternal<T, int32_t>(col, input);
|
|
104281
|
+
break;
|
|
104282
|
+
default:
|
|
104283
|
+
AppendValueInternal<T, int64_t>(col, input);
|
|
104284
|
+
break;
|
|
104285
|
+
}
|
|
104286
|
+
break;
|
|
104287
|
+
case LogicalTypeId::DATE:
|
|
104288
|
+
AppendValueInternal<T, date_t>(col, input);
|
|
104289
|
+
break;
|
|
104290
|
+
case LogicalTypeId::TIMESTAMP:
|
|
104291
|
+
case LogicalTypeId::TIMESTAMP_TZ:
|
|
104292
|
+
AppendValueInternal<T, timestamp_t>(col, input);
|
|
104293
|
+
break;
|
|
104294
|
+
case LogicalTypeId::TIME:
|
|
104295
|
+
case LogicalTypeId::TIME_TZ:
|
|
104296
|
+
AppendValueInternal<T, dtime_t>(col, input);
|
|
104297
|
+
break;
|
|
104298
|
+
case LogicalTypeId::INTERVAL:
|
|
104299
|
+
AppendValueInternal<T, interval_t>(col, input);
|
|
104300
|
+
break;
|
|
104301
|
+
case LogicalTypeId::VARCHAR:
|
|
103913
104302
|
FlatVector::GetData<string_t>(col)[chunk->size()] = StringCast::Operation<T>(input, col);
|
|
103914
104303
|
break;
|
|
103915
104304
|
default:
|
|
@@ -103995,17 +104384,17 @@ void BaseAppender::Append(double value) {
|
|
|
103995
104384
|
|
|
103996
104385
|
template <>
|
|
103997
104386
|
void BaseAppender::Append(date_t value) {
|
|
103998
|
-
AppendValueInternal<
|
|
104387
|
+
AppendValueInternal<date_t>(value);
|
|
103999
104388
|
}
|
|
104000
104389
|
|
|
104001
104390
|
template <>
|
|
104002
104391
|
void BaseAppender::Append(dtime_t value) {
|
|
104003
|
-
AppendValueInternal<
|
|
104392
|
+
AppendValueInternal<dtime_t>(value);
|
|
104004
104393
|
}
|
|
104005
104394
|
|
|
104006
104395
|
template <>
|
|
104007
104396
|
void BaseAppender::Append(timestamp_t value) {
|
|
104008
|
-
AppendValueInternal<
|
|
104397
|
+
AppendValueInternal<timestamp_t>(value);
|
|
104009
104398
|
}
|
|
104010
104399
|
|
|
104011
104400
|
template <>
|
|
@@ -105104,6 +105493,24 @@ duckdb_logical_type duckdb_create_logical_type(duckdb_type type) {
|
|
|
105104
105493
|
return new duckdb::LogicalType(duckdb::ConvertCTypeToCPP(type));
|
|
105105
105494
|
}
|
|
105106
105495
|
|
|
105496
|
+
duckdb_logical_type duckdb_create_list_type(duckdb_logical_type type) {
|
|
105497
|
+
if (!type) {
|
|
105498
|
+
return nullptr;
|
|
105499
|
+
}
|
|
105500
|
+
duckdb::LogicalType *ltype = new duckdb::LogicalType;
|
|
105501
|
+
*ltype = duckdb::LogicalType::LIST(*(duckdb::LogicalType *)type);
|
|
105502
|
+
return ltype;
|
|
105503
|
+
}
|
|
105504
|
+
|
|
105505
|
+
duckdb_logical_type duckdb_create_map_type(duckdb_logical_type key_type, duckdb_logical_type value_type) {
|
|
105506
|
+
if (!key_type || !value_type) {
|
|
105507
|
+
return nullptr;
|
|
105508
|
+
}
|
|
105509
|
+
duckdb::LogicalType *mtype = new duckdb::LogicalType;
|
|
105510
|
+
*mtype = duckdb::LogicalType::MAP(*(duckdb::LogicalType *)key_type, *(duckdb::LogicalType *)value_type);
|
|
105511
|
+
return mtype;
|
|
105512
|
+
}
|
|
105513
|
+
|
|
105107
105514
|
duckdb_logical_type duckdb_create_decimal_type(uint8_t width, uint8_t scale) {
|
|
105108
105515
|
return new duckdb::LogicalType(duckdb::LogicalType::DECIMAL(width, scale));
|
|
105109
105516
|
}
|
|
@@ -105223,6 +105630,28 @@ duckdb_logical_type duckdb_list_type_child_type(duckdb_logical_type type) {
|
|
|
105223
105630
|
return new duckdb::LogicalType(duckdb::ListType::GetChildType(ltype));
|
|
105224
105631
|
}
|
|
105225
105632
|
|
|
105633
|
+
duckdb_logical_type duckdb_map_type_key_type(duckdb_logical_type type) {
|
|
105634
|
+
if (!type) {
|
|
105635
|
+
return nullptr;
|
|
105636
|
+
}
|
|
105637
|
+
auto &mtype = *((duckdb::LogicalType *)type);
|
|
105638
|
+
if (mtype.id() != duckdb::LogicalTypeId::MAP) {
|
|
105639
|
+
return nullptr;
|
|
105640
|
+
}
|
|
105641
|
+
return new duckdb::LogicalType(duckdb::MapType::KeyType(mtype));
|
|
105642
|
+
}
|
|
105643
|
+
|
|
105644
|
+
duckdb_logical_type duckdb_map_type_value_type(duckdb_logical_type type) {
|
|
105645
|
+
if (!type) {
|
|
105646
|
+
return nullptr;
|
|
105647
|
+
}
|
|
105648
|
+
auto &mtype = *((duckdb::LogicalType *)type);
|
|
105649
|
+
if (mtype.id() != duckdb::LogicalTypeId::MAP) {
|
|
105650
|
+
return nullptr;
|
|
105651
|
+
}
|
|
105652
|
+
return new duckdb::LogicalType(duckdb::MapType::ValueType(mtype));
|
|
105653
|
+
}
|
|
105654
|
+
|
|
105226
105655
|
idx_t duckdb_struct_type_child_count(duckdb_logical_type type) {
|
|
105227
105656
|
if (!type) {
|
|
105228
105657
|
return 0;
|
|
@@ -121694,6 +122123,18 @@ string Relation::RenderWhitespace(idx_t depth) {
|
|
|
121694
122123
|
return string(depth * 2, ' ');
|
|
121695
122124
|
}
|
|
121696
122125
|
|
|
122126
|
+
vector<shared_ptr<ExternalDependency>> Relation::GetAllDependencies() {
|
|
122127
|
+
vector<shared_ptr<ExternalDependency>> all_dependencies;
|
|
122128
|
+
Relation *cur = this;
|
|
122129
|
+
while (cur) {
|
|
122130
|
+
if (cur->extra_dependencies) {
|
|
122131
|
+
all_dependencies.push_back(cur->extra_dependencies);
|
|
122132
|
+
}
|
|
122133
|
+
cur = ChildRelation();
|
|
122134
|
+
}
|
|
122135
|
+
return all_dependencies;
|
|
122136
|
+
}
|
|
122137
|
+
|
|
121697
122138
|
} // namespace duckdb
|
|
121698
122139
|
|
|
121699
122140
|
|
|
@@ -129244,7 +129685,7 @@ unique_ptr<Expression> EnumComparisonRule::Apply(LogicalOperator &op, vector<Exp
|
|
|
129244
129685
|
}
|
|
129245
129686
|
|
|
129246
129687
|
auto cast_left_to_right =
|
|
129247
|
-
make_unique<BoundCastExpression>(move(left_child->child), right_child->child->return_type);
|
|
129688
|
+
make_unique<BoundCastExpression>(move(left_child->child), right_child->child->return_type, true);
|
|
129248
129689
|
|
|
129249
129690
|
return make_unique<BoundComparisonExpression>(root->type, move(cast_left_to_right), move(right_child->child));
|
|
129250
129691
|
}
|