duckdb 0.4.1-dev801.0 → 0.4.1-dev815.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +148 -149
- package/src/duckdb.hpp +554 -554
- package/src/parquet-amalgamation.cpp +36436 -36436
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -60336,7 +60336,6 @@ public:
|
|
|
60336
60336
|
partition_cols = wexpr->partitions.size();
|
|
60337
60337
|
for (idx_t prt_idx = 0; prt_idx < partition_cols; prt_idx++) {
|
|
60338
60338
|
auto &pexpr = wexpr->partitions[prt_idx];
|
|
60339
|
-
payload_types.push_back(pexpr->return_type);
|
|
60340
60339
|
|
|
60341
60340
|
if (wexpr->partitions_stats.empty() || !wexpr->partitions_stats[prt_idx]) {
|
|
60342
60341
|
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(), nullptr);
|
|
@@ -60348,8 +60347,6 @@ public:
|
|
|
60348
60347
|
}
|
|
60349
60348
|
|
|
60350
60349
|
for (const auto &order : wexpr->orders) {
|
|
60351
|
-
auto &oexpr = order.expression;
|
|
60352
|
-
payload_types.push_back(oexpr->return_type);
|
|
60353
60350
|
orders.emplace_back(order.Copy());
|
|
60354
60351
|
}
|
|
60355
60352
|
|
|
@@ -60453,16 +60450,16 @@ public:
|
|
|
60453
60450
|
idx_t count;
|
|
60454
60451
|
};
|
|
60455
60452
|
|
|
60456
|
-
bool WindowLocalHashGroup::SinkChunk(DataChunk &sort_buffer, DataChunk &
|
|
60457
|
-
D_ASSERT(sort_buffer.size() ==
|
|
60458
|
-
count +=
|
|
60453
|
+
bool WindowLocalHashGroup::SinkChunk(DataChunk &sort_buffer, DataChunk &input_chunk) {
|
|
60454
|
+
D_ASSERT(sort_buffer.size() == input_chunk.size());
|
|
60455
|
+
count += input_chunk.size();
|
|
60459
60456
|
auto &global_sort = *global_group.global_sort;
|
|
60460
60457
|
if (!local_sort) {
|
|
60461
60458
|
local_sort = make_unique<LocalSortState>();
|
|
60462
60459
|
local_sort->Initialize(global_sort, global_sort.buffer_manager);
|
|
60463
60460
|
}
|
|
60464
60461
|
|
|
60465
|
-
local_sort->SinkChunk(sort_buffer,
|
|
60462
|
+
local_sort->SinkChunk(sort_buffer, input_chunk);
|
|
60466
60463
|
|
|
60467
60464
|
if (local_sort->SizeInBytes() >= global_group.memory_per_thread) {
|
|
60468
60465
|
local_sort->Sort(global_sort, true);
|
|
@@ -60492,19 +60489,17 @@ public:
|
|
|
60492
60489
|
partition_cols = wexpr->partitions.size();
|
|
60493
60490
|
|
|
60494
60491
|
// we sort by both 1) partition by expression list and 2) order by expressions
|
|
60495
|
-
|
|
60492
|
+
auto &payload_types = op.children[0]->types;
|
|
60496
60493
|
vector<LogicalType> over_types;
|
|
60497
60494
|
for (idx_t prt_idx = 0; prt_idx < wexpr->partitions.size(); prt_idx++) {
|
|
60498
60495
|
auto &pexpr = wexpr->partitions[prt_idx];
|
|
60499
60496
|
over_types.push_back(pexpr->return_type);
|
|
60500
|
-
payload_types.push_back(pexpr->return_type);
|
|
60501
60497
|
executor.AddExpression(*pexpr);
|
|
60502
60498
|
}
|
|
60503
60499
|
|
|
60504
60500
|
for (const auto &order : wexpr->orders) {
|
|
60505
60501
|
auto &oexpr = order.expression;
|
|
60506
60502
|
over_types.push_back(oexpr->return_type);
|
|
60507
|
-
payload_types.push_back(oexpr->return_type);
|
|
60508
60503
|
executor.AddExpression(*oexpr);
|
|
60509
60504
|
}
|
|
60510
60505
|
|
|
@@ -60545,15 +60540,24 @@ public:
|
|
|
60545
60540
|
|
|
60546
60541
|
//! Switch to grouping the data
|
|
60547
60542
|
void Group(WindowGlobalSinkState &gstate);
|
|
60543
|
+
//! Compute the OVER values
|
|
60544
|
+
void Over(DataChunk &input_chunk);
|
|
60548
60545
|
//! Hash the data and group it
|
|
60549
|
-
void Hash(WindowGlobalSinkState &gstate);
|
|
60546
|
+
void Hash(WindowGlobalSinkState &gstate, DataChunk &input_chunk);
|
|
60550
60547
|
//! Sink an input chunk
|
|
60551
60548
|
void Sink(DataChunk &input_chunk, WindowGlobalSinkState &gstate);
|
|
60552
60549
|
//! Merge the state into the global state.
|
|
60553
60550
|
void Combine(WindowGlobalSinkState &gstate);
|
|
60554
60551
|
};
|
|
60555
60552
|
|
|
60556
|
-
void WindowLocalSinkState::
|
|
60553
|
+
void WindowLocalSinkState::Over(DataChunk &input_chunk) {
|
|
60554
|
+
if (over_chunk.ColumnCount() > 0) {
|
|
60555
|
+
executor.Execute(input_chunk, over_chunk);
|
|
60556
|
+
over_chunk.Verify();
|
|
60557
|
+
}
|
|
60558
|
+
}
|
|
60559
|
+
|
|
60560
|
+
void WindowLocalSinkState::Hash(WindowGlobalSinkState &gstate, DataChunk &input_chunk) {
|
|
60557
60561
|
// There are three types of hash grouping:
|
|
60558
60562
|
// 1. No partitions (no sorting)
|
|
60559
60563
|
// 2. One group (sorting, but no hash grouping)
|
|
@@ -60618,11 +60622,11 @@ void WindowLocalSinkState::Hash(WindowGlobalSinkState &gstate) {
|
|
|
60618
60622
|
}
|
|
60619
60623
|
|
|
60620
60624
|
if (counts.size() == 1) {
|
|
60621
|
-
local_group->SinkChunk(over_chunk,
|
|
60625
|
+
local_group->SinkChunk(over_chunk, input_chunk);
|
|
60622
60626
|
} else {
|
|
60623
60627
|
SelectionVector psel(sel.data() + group_offset);
|
|
60624
60628
|
over_subset.Slice(over_chunk, psel, group_size);
|
|
60625
|
-
payload_subset.Slice(
|
|
60629
|
+
payload_subset.Slice(input_chunk, psel, group_size);
|
|
60626
60630
|
local_group->SinkChunk(over_subset, payload_subset);
|
|
60627
60631
|
group_offset += group_size;
|
|
60628
60632
|
}
|
|
@@ -60653,18 +60657,12 @@ void WindowLocalSinkState::Group(WindowGlobalSinkState &gstate) {
|
|
|
60653
60657
|
|
|
60654
60658
|
RowDataCollectionScanner::SwizzleBlocks(*rows, *heap, payload_data, payload_heap, payload_layout);
|
|
60655
60659
|
RowDataCollectionScanner scanner(*rows, *heap, payload_layout);
|
|
60656
|
-
const auto input_count = payload_chunk.ColumnCount() - over_chunk.ColumnCount();
|
|
60657
60660
|
while (scanner.Remaining()) {
|
|
60658
60661
|
payload_chunk.Reset();
|
|
60659
60662
|
scanner.Scan(payload_chunk);
|
|
60660
60663
|
|
|
60661
|
-
|
|
60662
|
-
|
|
60663
|
-
over_chunk.data[c].Reference(payload_chunk.data[input_count + c]);
|
|
60664
|
-
}
|
|
60665
|
-
over_chunk.SetCardinality(payload_chunk);
|
|
60666
|
-
|
|
60667
|
-
Hash(gstate);
|
|
60664
|
+
Over(payload_chunk);
|
|
60665
|
+
Hash(gstate, payload_chunk);
|
|
60668
60666
|
}
|
|
60669
60667
|
|
|
60670
60668
|
ungrouped.reset();
|
|
@@ -60674,18 +60672,7 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
|
|
|
60674
60672
|
gstate.count += input_chunk.size();
|
|
60675
60673
|
count += input_chunk.size();
|
|
60676
60674
|
|
|
60677
|
-
|
|
60678
|
-
executor.Execute(input_chunk, over_chunk);
|
|
60679
|
-
over_chunk.Verify();
|
|
60680
|
-
}
|
|
60681
|
-
|
|
60682
|
-
for (idx_t col_idx = 0; col_idx < input_chunk.ColumnCount(); ++col_idx) {
|
|
60683
|
-
payload_chunk.data[col_idx].Reference(input_chunk.data[col_idx]);
|
|
60684
|
-
}
|
|
60685
|
-
for (idx_t col_idx = 0; col_idx < over_chunk.ColumnCount(); ++col_idx) {
|
|
60686
|
-
payload_chunk.data[input_chunk.ColumnCount() + col_idx].Reference(over_chunk.data[col_idx]);
|
|
60687
|
-
}
|
|
60688
|
-
payload_chunk.SetCardinality(input_chunk);
|
|
60675
|
+
Over(input_chunk);
|
|
60689
60676
|
|
|
60690
60677
|
// OVER()
|
|
60691
60678
|
if (over_chunk.ColumnCount() == 0) {
|
|
@@ -60696,19 +60683,19 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
|
|
|
60696
60683
|
rows = make_unique<RowDataCollection>(gstate.buffer_manager, capacity, entry_size);
|
|
60697
60684
|
strings = make_unique<RowDataCollection>(gstate.buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
|
60698
60685
|
}
|
|
60699
|
-
const auto row_count =
|
|
60686
|
+
const auto row_count = input_chunk.size();
|
|
60700
60687
|
const auto row_sel = FlatVector::IncrementalSelectionVector();
|
|
60701
60688
|
Vector addresses(LogicalType::POINTER);
|
|
60702
60689
|
auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
|
|
60703
60690
|
auto handles = rows->Build(row_count, key_locations, nullptr, row_sel);
|
|
60704
|
-
vector<UnifiedVectorFormat>
|
|
60705
|
-
|
|
60706
|
-
for (idx_t i = 0; i <
|
|
60691
|
+
vector<UnifiedVectorFormat> input_data;
|
|
60692
|
+
input_data.reserve(input_chunk.ColumnCount());
|
|
60693
|
+
for (idx_t i = 0; i < input_chunk.ColumnCount(); i++) {
|
|
60707
60694
|
UnifiedVectorFormat pdata;
|
|
60708
|
-
|
|
60709
|
-
|
|
60695
|
+
input_chunk.data[i].ToUnifiedFormat(row_count, pdata);
|
|
60696
|
+
input_data.emplace_back(move(pdata));
|
|
60710
60697
|
}
|
|
60711
|
-
RowOperations::Scatter(
|
|
60698
|
+
RowOperations::Scatter(input_chunk, input_data.data(), payload_layout, addresses, *strings, *row_sel,
|
|
60712
60699
|
row_count);
|
|
60713
60700
|
return;
|
|
60714
60701
|
}
|
|
@@ -60721,14 +60708,14 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
|
|
|
60721
60708
|
}
|
|
60722
60709
|
|
|
60723
60710
|
// If we pass our thread memory budget, then switch to hash grouping.
|
|
60724
|
-
if (ungrouped->SinkChunk(over_chunk,
|
|
60711
|
+
if (ungrouped->SinkChunk(over_chunk, input_chunk) || gstate.count > 100000) {
|
|
60725
60712
|
Group(gstate);
|
|
60726
60713
|
}
|
|
60727
60714
|
return;
|
|
60728
60715
|
}
|
|
60729
60716
|
|
|
60730
60717
|
// Grouped, so hash
|
|
60731
|
-
Hash(gstate);
|
|
60718
|
+
Hash(gstate, input_chunk);
|
|
60732
60719
|
}
|
|
60733
60720
|
|
|
60734
60721
|
void WindowLocalSinkState::Combine(WindowGlobalSinkState &gstate) {
|
|
@@ -60830,8 +60817,6 @@ void WindowGlobalSinkState::Finalize() {
|
|
|
60830
60817
|
|
|
60831
60818
|
// Write into the state chunks directly to hash them
|
|
60832
60819
|
auto &payload_chunk = lstate->payload_chunk;
|
|
60833
|
-
auto &over_chunk = lstate->over_chunk;
|
|
60834
|
-
const auto input_count = payload_chunk.ColumnCount() - over_chunk.ColumnCount();
|
|
60835
60820
|
|
|
60836
60821
|
// Now scan the sorted data
|
|
60837
60822
|
PayloadScanner scanner(global_sort);
|
|
@@ -60843,13 +60828,8 @@ void WindowGlobalSinkState::Finalize() {
|
|
|
60843
60828
|
}
|
|
60844
60829
|
lstate->count += payload_chunk.size();
|
|
60845
60830
|
|
|
60846
|
-
|
|
60847
|
-
|
|
60848
|
-
over_chunk.data[c].Reference(payload_chunk.data[input_count + c]);
|
|
60849
|
-
}
|
|
60850
|
-
over_chunk.SetCardinality(payload_chunk);
|
|
60851
|
-
|
|
60852
|
-
lstate->Hash(*this);
|
|
60831
|
+
lstate->Over(payload_chunk);
|
|
60832
|
+
lstate->Hash(*this, payload_chunk);
|
|
60853
60833
|
}
|
|
60854
60834
|
|
|
60855
60835
|
// Merge the grouped data in.
|
|
@@ -60927,39 +60907,52 @@ static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx
|
|
|
60927
60907
|
return l;
|
|
60928
60908
|
}
|
|
60929
60909
|
|
|
60930
|
-
static void
|
|
60931
|
-
|
|
60910
|
+
static void PrepareInputExpressions(Expression **exprs, idx_t expr_count, ChunkCollection &output,
|
|
60911
|
+
ExpressionExecutor &executor, DataChunk &chunk) {
|
|
60932
60912
|
if (expr_count == 0) {
|
|
60933
60913
|
return;
|
|
60934
60914
|
}
|
|
60935
60915
|
|
|
60936
|
-
auto &allocator = input.GetAllocator();
|
|
60937
60916
|
vector<LogicalType> types;
|
|
60938
|
-
ExpressionExecutor executor(allocator);
|
|
60939
60917
|
for (idx_t expr_idx = 0; expr_idx < expr_count; ++expr_idx) {
|
|
60940
60918
|
types.push_back(exprs[expr_idx]->return_type);
|
|
60941
60919
|
executor.AddExpression(*exprs[expr_idx]);
|
|
60942
60920
|
}
|
|
60943
60921
|
|
|
60944
|
-
|
|
60945
|
-
|
|
60946
|
-
|
|
60922
|
+
if (!types.empty()) {
|
|
60923
|
+
chunk.Initialize(executor.allocator, types);
|
|
60924
|
+
}
|
|
60925
|
+
}
|
|
60947
60926
|
|
|
60948
|
-
|
|
60927
|
+
static void PrepareInputExpression(Expression *expr, ChunkCollection &output, ExpressionExecutor &executor,
|
|
60928
|
+
DataChunk &chunk) {
|
|
60929
|
+
PrepareInputExpressions(&expr, 1, output, executor, chunk);
|
|
60930
|
+
}
|
|
60949
60931
|
|
|
60950
|
-
|
|
60951
|
-
|
|
60932
|
+
struct WindowInputExpression {
|
|
60933
|
+
WindowInputExpression(Expression *expr_p, Allocator &allocator)
|
|
60934
|
+
: expr(expr_p), scalar(false), collection(allocator), executor(allocator) {
|
|
60935
|
+
if (expr) {
|
|
60936
|
+
PrepareInputExpression(expr, collection, executor, chunk);
|
|
60937
|
+
scalar = expr->IsScalar();
|
|
60938
|
+
}
|
|
60939
|
+
}
|
|
60952
60940
|
|
|
60953
|
-
|
|
60954
|
-
|
|
60941
|
+
void Execute(DataChunk &input_chunk) {
|
|
60942
|
+
if (expr && (!scalar || collection.Count() == 0)) {
|
|
60943
|
+
chunk.Reset();
|
|
60944
|
+
executor.Execute(input_chunk, chunk);
|
|
60945
|
+
chunk.Verify();
|
|
60946
|
+
collection.Append(chunk);
|
|
60955
60947
|
}
|
|
60956
60948
|
}
|
|
60957
|
-
}
|
|
60958
60949
|
|
|
60959
|
-
|
|
60960
|
-
|
|
60961
|
-
|
|
60962
|
-
|
|
60950
|
+
Expression *expr;
|
|
60951
|
+
bool scalar;
|
|
60952
|
+
ChunkCollection collection;
|
|
60953
|
+
ExpressionExecutor executor;
|
|
60954
|
+
DataChunk chunk;
|
|
60955
|
+
};
|
|
60963
60956
|
|
|
60964
60957
|
static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap, ChunkCollection &cols,
|
|
60965
60958
|
const vector<LogicalType> &types) {
|
|
@@ -60986,8 +60979,7 @@ static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap,
|
|
|
60986
60979
|
}
|
|
60987
60980
|
|
|
60988
60981
|
static void ScanSortedPartition(WindowLocalSourceState &state, ChunkCollection &input,
|
|
60989
|
-
const vector<LogicalType> &
|
|
60990
|
-
const vector<LogicalType> &over_types) {
|
|
60982
|
+
const vector<LogicalType> &payload_types) {
|
|
60991
60983
|
auto &allocator = input.GetAllocator();
|
|
60992
60984
|
|
|
60993
60985
|
auto &global_sort_state = *state.hash_group->global_sort;
|
|
@@ -60995,14 +60987,10 @@ static void ScanSortedPartition(WindowLocalSourceState &state, ChunkCollection &
|
|
|
60995
60987
|
return;
|
|
60996
60988
|
}
|
|
60997
60989
|
|
|
60998
|
-
auto payload_types = input_types;
|
|
60999
|
-
payload_types.insert(payload_types.end(), over_types.begin(), over_types.end());
|
|
61000
|
-
|
|
61001
60990
|
// scan the sorted row data
|
|
61002
60991
|
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
|
61003
60992
|
PayloadScanner scanner(*global_sort_state.sorted_blocks[0]->payload_data, global_sort_state);
|
|
61004
60993
|
DataChunk payload_chunk;
|
|
61005
|
-
DataChunk over_chunk;
|
|
61006
60994
|
payload_chunk.Initialize(allocator, payload_types);
|
|
61007
60995
|
for (;;) {
|
|
61008
60996
|
payload_chunk.Reset();
|
|
@@ -61011,14 +60999,8 @@ static void ScanSortedPartition(WindowLocalSourceState &state, ChunkCollection &
|
|
|
61011
60999
|
break;
|
|
61012
61000
|
}
|
|
61013
61001
|
|
|
61014
|
-
// split into two
|
|
61015
|
-
payload_chunk.Split(over_chunk, input_types.size());
|
|
61016
|
-
|
|
61017
61002
|
// append back to collection
|
|
61018
61003
|
input.Append(payload_chunk);
|
|
61019
|
-
over.Append(over_chunk);
|
|
61020
|
-
|
|
61021
|
-
payload_chunk.Fuse(over_chunk);
|
|
61022
61004
|
}
|
|
61023
61005
|
|
|
61024
61006
|
state.hash_group.reset();
|
|
@@ -61219,13 +61201,10 @@ static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col,
|
|
|
61219
61201
|
}
|
|
61220
61202
|
|
|
61221
61203
|
static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t input_size, const idx_t row_idx,
|
|
61222
|
-
ChunkCollection &
|
|
61204
|
+
ChunkCollection &range_collection, ChunkCollection &boundary_start_collection,
|
|
61223
61205
|
ChunkCollection &boundary_end_collection, const ValidityMask &partition_mask,
|
|
61224
61206
|
const ValidityMask &order_mask) {
|
|
61225
61207
|
|
|
61226
|
-
// RANGE sorting parameters
|
|
61227
|
-
const auto order_col = bounds.partition_count;
|
|
61228
|
-
|
|
61229
61208
|
if (bounds.partition_count + bounds.order_count > 0) {
|
|
61230
61209
|
|
|
61231
61210
|
// determine partition and peer group boundaries to ultimately figure out window size
|
|
@@ -61251,7 +61230,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61251
61230
|
|
|
61252
61231
|
if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
|
|
61253
61232
|
// Exclude any leading NULLs
|
|
61254
|
-
if (CellIsNull(
|
|
61233
|
+
if (CellIsNull(range_collection, 0, bounds.valid_start)) {
|
|
61255
61234
|
idx_t n = 1;
|
|
61256
61235
|
bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
|
|
61257
61236
|
}
|
|
@@ -61259,7 +61238,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61259
61238
|
|
|
61260
61239
|
if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
|
|
61261
61240
|
// Exclude any trailing NULLs
|
|
61262
|
-
if (CellIsNull(
|
|
61241
|
+
if (CellIsNull(range_collection, 0, bounds.valid_end - 1)) {
|
|
61263
61242
|
idx_t n = 1;
|
|
61264
61243
|
bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
|
|
61265
61244
|
}
|
|
@@ -61314,7 +61293,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61314
61293
|
bounds.window_start = bounds.peer_start;
|
|
61315
61294
|
} else {
|
|
61316
61295
|
bounds.window_start =
|
|
61317
|
-
FindOrderedRangeBound<true>(
|
|
61296
|
+
FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, bounds.valid_start, row_idx,
|
|
61318
61297
|
boundary_start_collection, expr_idx);
|
|
61319
61298
|
}
|
|
61320
61299
|
break;
|
|
@@ -61324,7 +61303,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61324
61303
|
if (CellIsNull(boundary_start_collection, 0, expr_idx)) {
|
|
61325
61304
|
bounds.window_start = bounds.peer_start;
|
|
61326
61305
|
} else {
|
|
61327
|
-
bounds.window_start = FindOrderedRangeBound<true>(
|
|
61306
|
+
bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, row_idx,
|
|
61328
61307
|
bounds.valid_end, boundary_start_collection, expr_idx);
|
|
61329
61308
|
}
|
|
61330
61309
|
break;
|
|
@@ -61356,8 +61335,8 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61356
61335
|
bounds.window_end = bounds.peer_end;
|
|
61357
61336
|
} else {
|
|
61358
61337
|
bounds.window_end =
|
|
61359
|
-
FindOrderedRangeBound<false>(
|
|
61360
|
-
|
|
61338
|
+
FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, bounds.valid_start, row_idx,
|
|
61339
|
+
boundary_end_collection, expr_idx);
|
|
61361
61340
|
}
|
|
61362
61341
|
break;
|
|
61363
61342
|
}
|
|
@@ -61366,7 +61345,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61366
61345
|
if (CellIsNull(boundary_end_collection, 0, expr_idx)) {
|
|
61367
61346
|
bounds.window_end = bounds.peer_end;
|
|
61368
61347
|
} else {
|
|
61369
|
-
bounds.window_end = FindOrderedRangeBound<false>(
|
|
61348
|
+
bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, row_idx,
|
|
61370
61349
|
bounds.valid_end, boundary_end_collection, expr_idx);
|
|
61371
61350
|
}
|
|
61372
61351
|
break;
|
|
@@ -61395,77 +61374,91 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61395
61374
|
}
|
|
61396
61375
|
|
|
61397
61376
|
static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollection &input, ChunkCollection &output,
|
|
61398
|
-
|
|
61399
|
-
|
|
61377
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask,
|
|
61378
|
+
WindowAggregationMode mode) {
|
|
61400
61379
|
|
|
61401
61380
|
// TODO we could evaluate those expressions in parallel
|
|
61381
|
+
WindowBoundariesState bounds(wexpr);
|
|
61382
|
+
uint64_t dense_rank = 1, rank_equal = 0, rank = 1;
|
|
61383
|
+
|
|
61384
|
+
// Single pass over the input to produce the payload columns.
|
|
61385
|
+
// Vectorisation for the win...
|
|
61386
|
+
|
|
61402
61387
|
auto &allocator = input.GetAllocator();
|
|
61388
|
+
|
|
61403
61389
|
// evaluate inner expressions of window functions, could be more complex
|
|
61404
61390
|
ChunkCollection payload_collection(allocator);
|
|
61405
61391
|
vector<Expression *> exprs;
|
|
61406
61392
|
for (auto &child : wexpr->children) {
|
|
61407
61393
|
exprs.push_back(child.get());
|
|
61408
61394
|
}
|
|
61395
|
+
|
|
61409
61396
|
// TODO: child may be a scalar, don't need to materialize the whole collection then
|
|
61410
|
-
|
|
61397
|
+
ExpressionExecutor payload_executor(allocator);
|
|
61398
|
+
DataChunk payload_chunk;
|
|
61399
|
+
PrepareInputExpressions(exprs.data(), exprs.size(), payload_collection, payload_executor, payload_chunk);
|
|
61411
61400
|
|
|
61412
|
-
|
|
61413
|
-
|
|
61414
|
-
if (wexpr->type == ExpressionType::WINDOW_LEAD || wexpr->type == ExpressionType::WINDOW_LAG) {
|
|
61415
|
-
if (wexpr->offset_expr) {
|
|
61416
|
-
MaterializeExpression(wexpr->offset_expr.get(), input, leadlag_offset_collection,
|
|
61417
|
-
wexpr->offset_expr->IsScalar());
|
|
61418
|
-
}
|
|
61419
|
-
if (wexpr->default_expr) {
|
|
61420
|
-
MaterializeExpression(wexpr->default_expr.get(), input, leadlag_default_collection,
|
|
61421
|
-
wexpr->default_expr->IsScalar());
|
|
61422
|
-
}
|
|
61423
|
-
}
|
|
61401
|
+
WindowInputExpression leadlag_offset(wexpr->offset_expr.get(), allocator);
|
|
61402
|
+
WindowInputExpression leadlag_default(wexpr->default_expr.get(), allocator);
|
|
61424
61403
|
|
|
61425
61404
|
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
|
61426
61405
|
ValidityMask filter_mask;
|
|
61427
61406
|
vector<validity_t> filter_bits;
|
|
61407
|
+
ExpressionExecutor filter_executor(allocator);
|
|
61408
|
+
SelectionVector filter_sel;
|
|
61428
61409
|
if (wexpr->filter_expr) {
|
|
61429
61410
|
// Start with all invalid and set the ones that pass
|
|
61430
61411
|
filter_bits.resize(ValidityMask::ValidityMaskSize(input.Count()), 0);
|
|
61431
61412
|
filter_mask.Initialize(filter_bits.data());
|
|
61432
|
-
|
|
61433
|
-
|
|
61434
|
-
idx_t base_idx = 0;
|
|
61435
|
-
for (auto &chunk : input.Chunks()) {
|
|
61436
|
-
const auto filtered = filter_execution.SelectExpression(*chunk, true_sel);
|
|
61437
|
-
for (idx_t f = 0; f < filtered; ++f) {
|
|
61438
|
-
filter_mask.SetValid(base_idx + true_sel[f]);
|
|
61439
|
-
}
|
|
61440
|
-
base_idx += chunk->size();
|
|
61441
|
-
}
|
|
61413
|
+
filter_executor.AddExpression(*wexpr->filter_expr);
|
|
61414
|
+
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
|
61442
61415
|
}
|
|
61443
61416
|
|
|
61444
61417
|
// evaluate boundaries if present. Parser has checked boundary types.
|
|
61445
|
-
|
|
61446
|
-
|
|
61447
|
-
MaterializeExpression(wexpr->start_expr.get(), input, boundary_start_collection, wexpr->start_expr->IsScalar());
|
|
61448
|
-
}
|
|
61418
|
+
WindowInputExpression boundary_start(wexpr->start_expr.get(), allocator);
|
|
61419
|
+
WindowInputExpression boundary_end(wexpr->end_expr.get(), allocator);
|
|
61449
61420
|
|
|
61450
|
-
|
|
61451
|
-
|
|
61452
|
-
|
|
61421
|
+
// evaluate RANGE expressions, if needed
|
|
61422
|
+
Expression *range_expr = nullptr;
|
|
61423
|
+
if (bounds.has_preceding_range || bounds.has_following_range) {
|
|
61424
|
+
D_ASSERT(wexpr->orders.size() == 1);
|
|
61425
|
+
range_expr = wexpr->orders[0].expression.get();
|
|
61453
61426
|
}
|
|
61427
|
+
WindowInputExpression range(range_expr, allocator);
|
|
61454
61428
|
|
|
61455
61429
|
// Set up a validity mask for IGNORE NULLS
|
|
61456
61430
|
ValidityMask ignore_nulls;
|
|
61431
|
+
bool check_nulls = false;
|
|
61457
61432
|
if (wexpr->ignore_nulls) {
|
|
61458
61433
|
switch (wexpr->type) {
|
|
61459
61434
|
case ExpressionType::WINDOW_LEAD:
|
|
61460
61435
|
case ExpressionType::WINDOW_LAG:
|
|
61461
61436
|
case ExpressionType::WINDOW_FIRST_VALUE:
|
|
61462
61437
|
case ExpressionType::WINDOW_LAST_VALUE:
|
|
61463
|
-
case ExpressionType::WINDOW_NTH_VALUE:
|
|
61464
|
-
|
|
61465
|
-
|
|
61466
|
-
|
|
61438
|
+
case ExpressionType::WINDOW_NTH_VALUE:
|
|
61439
|
+
check_nulls = true;
|
|
61440
|
+
break;
|
|
61441
|
+
default:
|
|
61442
|
+
break;
|
|
61443
|
+
}
|
|
61444
|
+
}
|
|
61445
|
+
|
|
61446
|
+
// Single pass over the input to produce the payload columns.
|
|
61447
|
+
// Vectorisation for the win...
|
|
61448
|
+
idx_t input_idx = 0;
|
|
61449
|
+
for (auto &input_chunk : input.Chunks()) {
|
|
61450
|
+
const auto count = input_chunk->size();
|
|
61451
|
+
|
|
61452
|
+
if (!exprs.empty()) {
|
|
61453
|
+
payload_chunk.Reset();
|
|
61454
|
+
payload_executor.Execute(*input_chunk, payload_chunk);
|
|
61455
|
+
payload_chunk.Verify();
|
|
61456
|
+
payload_collection.Append(payload_chunk);
|
|
61457
|
+
|
|
61458
|
+
// process payload chunks while they are still piping hot
|
|
61459
|
+
if (check_nulls) {
|
|
61467
61460
|
UnifiedVectorFormat vdata;
|
|
61468
|
-
|
|
61461
|
+
payload_chunk.data[0].ToUnifiedFormat(count, vdata);
|
|
61469
61462
|
if (!vdata.validity.AllValid()) {
|
|
61470
61463
|
// Lazily materialise the contents when we find the first NULL
|
|
61471
61464
|
if (ignore_nulls.AllValid()) {
|
|
@@ -61473,19 +61466,31 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61473
61466
|
}
|
|
61474
61467
|
// Write to the current position
|
|
61475
61468
|
// Chunks in a collection are full, so we don't have to worry about raggedness
|
|
61476
|
-
auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(
|
|
61469
|
+
auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
|
|
61477
61470
|
auto src = vdata.validity.GetData();
|
|
61478
61471
|
for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
|
|
61479
61472
|
*dst++ = *src++;
|
|
61480
61473
|
}
|
|
61481
61474
|
}
|
|
61482
|
-
pos += count;
|
|
61483
61475
|
}
|
|
61484
|
-
break;
|
|
61485
61476
|
}
|
|
61486
|
-
|
|
61487
|
-
|
|
61477
|
+
|
|
61478
|
+
leadlag_offset.Execute(*input_chunk);
|
|
61479
|
+
leadlag_default.Execute(*input_chunk);
|
|
61480
|
+
|
|
61481
|
+
if (wexpr->filter_expr) {
|
|
61482
|
+
const auto filtered = filter_executor.SelectExpression(*input_chunk, filter_sel);
|
|
61483
|
+
for (idx_t f = 0; f < filtered; ++f) {
|
|
61484
|
+
filter_mask.SetValid(input_idx + filter_sel[f]);
|
|
61485
|
+
}
|
|
61488
61486
|
}
|
|
61487
|
+
|
|
61488
|
+
boundary_start.Execute(*input_chunk);
|
|
61489
|
+
boundary_end.Execute(*input_chunk);
|
|
61490
|
+
|
|
61491
|
+
range.Execute(*input_chunk);
|
|
61492
|
+
|
|
61493
|
+
input_idx += count;
|
|
61489
61494
|
}
|
|
61490
61495
|
|
|
61491
61496
|
// build a segment tree for frame-adhering aggregates
|
|
@@ -61497,9 +61502,6 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61497
61502
|
&payload_collection, filter_mask, mode);
|
|
61498
61503
|
}
|
|
61499
61504
|
|
|
61500
|
-
WindowBoundariesState bounds(wexpr);
|
|
61501
|
-
uint64_t dense_rank = 1, rank_equal = 0, rank = 1;
|
|
61502
|
-
|
|
61503
61505
|
// this is the main loop, go through all sorted rows and compute window function result
|
|
61504
61506
|
const vector<LogicalType> output_types(1, wexpr->return_type);
|
|
61505
61507
|
DataChunk output_chunk;
|
|
@@ -61515,8 +61517,8 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61515
61517
|
auto &result = output_chunk.data[0];
|
|
61516
61518
|
|
|
61517
61519
|
// special case, OVER (), aggregate over everything
|
|
61518
|
-
UpdateWindowBoundaries(bounds, input.Count(), row_idx,
|
|
61519
|
-
partition_mask, order_mask);
|
|
61520
|
+
UpdateWindowBoundaries(bounds, input.Count(), row_idx, range.collection, boundary_start.collection,
|
|
61521
|
+
boundary_end.collection, partition_mask, order_mask);
|
|
61520
61522
|
if (WindowNeedsRank(wexpr)) {
|
|
61521
61523
|
if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
|
|
61522
61524
|
dense_rank = 1;
|
|
@@ -61613,7 +61615,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61613
61615
|
case ExpressionType::WINDOW_LAG: {
|
|
61614
61616
|
int64_t offset = 1;
|
|
61615
61617
|
if (wexpr->offset_expr) {
|
|
61616
|
-
offset = GetCell<int64_t>(
|
|
61618
|
+
offset = GetCell<int64_t>(leadlag_offset.collection, 0, wexpr->offset_expr->IsScalar() ? 0 : row_idx);
|
|
61617
61619
|
}
|
|
61618
61620
|
int64_t val_idx = (int64_t)row_idx;
|
|
61619
61621
|
if (wexpr->type == ExpressionType::WINDOW_LEAD) {
|
|
@@ -61637,7 +61639,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61637
61639
|
payload_collection.CopyCell(0, val_idx, result, output_offset);
|
|
61638
61640
|
} else if (wexpr->default_expr) {
|
|
61639
61641
|
const auto source_row = wexpr->default_expr->IsScalar() ? 0 : row_idx;
|
|
61640
|
-
|
|
61642
|
+
leadlag_default.collection.CopyCell(0, source_row, result, output_offset);
|
|
61641
61643
|
} else {
|
|
61642
61644
|
FlatVector::SetNull(result, output_offset, true);
|
|
61643
61645
|
}
|
|
@@ -61689,9 +61691,8 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61689
61691
|
using WindowExpressions = vector<BoundWindowExpression *>;
|
|
61690
61692
|
|
|
61691
61693
|
static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkCollection &input,
|
|
61692
|
-
ChunkCollection &window_results,
|
|
61693
|
-
const ValidityMask &
|
|
61694
|
-
WindowAggregationMode mode) {
|
|
61694
|
+
ChunkCollection &window_results, const ValidityMask &partition_mask,
|
|
61695
|
+
const ValidityMask &order_mask, WindowAggregationMode mode) {
|
|
61695
61696
|
// Idempotency
|
|
61696
61697
|
if (input.Count() == 0) {
|
|
61697
61698
|
return;
|
|
@@ -61699,7 +61700,7 @@ static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkColle
|
|
|
61699
61700
|
// Compute the functions columnwise
|
|
61700
61701
|
for (idx_t expr_idx = 0; expr_idx < window_exprs.size(); ++expr_idx) {
|
|
61701
61702
|
ChunkCollection output(input.GetAllocator());
|
|
61702
|
-
ComputeWindowExpression(window_exprs[expr_idx], input, output,
|
|
61703
|
+
ComputeWindowExpression(window_exprs[expr_idx], input, output, partition_mask, order_mask, mode);
|
|
61703
61704
|
window_results.Fuse(output);
|
|
61704
61705
|
}
|
|
61705
61706
|
}
|
|
@@ -61750,7 +61751,6 @@ static void GeneratePartition(WindowLocalSourceState &state, WindowGlobalSinkSta
|
|
|
61750
61751
|
// Scan the sorted data into new Collections
|
|
61751
61752
|
auto &allocator = gstate.allocator;
|
|
61752
61753
|
ChunkCollection input(allocator);
|
|
61753
|
-
ChunkCollection over(allocator);
|
|
61754
61754
|
if (gstate.rows && !hash_bin) {
|
|
61755
61755
|
// No partition - convert row collection to chunk collection
|
|
61756
61756
|
ScanRowCollection(*gstate.rows, *gstate.strings, input, input_types);
|
|
@@ -61760,14 +61760,13 @@ static void GeneratePartition(WindowLocalSourceState &state, WindowGlobalSinkSta
|
|
|
61760
61760
|
// Overwrite the collections with the sorted data
|
|
61761
61761
|
state.hash_group = move(gstate.hash_groups[hash_bin]);
|
|
61762
61762
|
state.hash_group->ComputeMasks(partition_mask, order_mask);
|
|
61763
|
-
|
|
61764
|
-
ScanSortedPartition(state, input, input_types, over, over_types);
|
|
61763
|
+
ScanSortedPartition(state, input, input_types);
|
|
61765
61764
|
} else {
|
|
61766
61765
|
return;
|
|
61767
61766
|
}
|
|
61768
61767
|
|
|
61769
61768
|
ChunkCollection output(allocator);
|
|
61770
|
-
ComputeWindowExpressions(window_exprs, input, output,
|
|
61769
|
+
ComputeWindowExpressions(window_exprs, input, output, partition_mask, order_mask, gstate.mode);
|
|
61771
61770
|
state.chunks.Merge(input);
|
|
61772
61771
|
state.window_results.Merge(output);
|
|
61773
61772
|
}
|