duckdb 0.7.2-dev1671.0 → 0.7.2-dev1734.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datefunc.cpp +20 -8
- package/src/duckdb/extension/icu/icu-strptime.cpp +117 -29
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -0
- package/src/duckdb/src/common/local_file_system.cpp +13 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +644 -0
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor_state.cpp +2 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +77 -849
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +247 -0
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -3
- package/src/duckdb/src/include/duckdb/planner/pragma_handler.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +77 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +12 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -59
- package/src/duckdb/src/main/extension/extension_install.cpp +11 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +29 -3
- package/src/duckdb/src/main/query_profiler.cpp +1 -1
- package/src/duckdb/src/planner/pragma_handler.cpp +7 -5
- package/src/duckdb/src/storage/buffer/block_handle.cpp +128 -0
- package/src/duckdb/src/storage/buffer/block_manager.cpp +81 -0
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +132 -0
- package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +32 -0
- package/src/duckdb/src/storage/buffer_manager.cpp +0 -351
- package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +3 -5
- package/src/duckdb/ub_src_common_sort.cpp +2 -0
- package/src/duckdb/ub_src_storage_buffer.cpp +8 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#include "duckdb/common/operator/cast_operators.hpp"
|
4
4
|
#include "duckdb/common/operator/comparison_operators.hpp"
|
5
5
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
6
|
-
#include "duckdb/common/sort/
|
6
|
+
#include "duckdb/common/sort/partition_state.hpp"
|
7
7
|
#include "duckdb/common/types/chunk_collection.hpp"
|
8
8
|
#include "duckdb/common/types/column_data_consumer.hpp"
|
9
9
|
#include "duckdb/common/types/row_data_collection_scanner.hpp"
|
@@ -25,427 +25,41 @@
|
|
25
25
|
|
26
26
|
namespace duckdb {
|
27
27
|
|
28
|
-
class WindowGlobalHashGroup {
|
29
|
-
public:
|
30
|
-
using GlobalSortStatePtr = unique_ptr<GlobalSortState>;
|
31
|
-
using LocalSortStatePtr = unique_ptr<LocalSortState>;
|
32
|
-
using Orders = vector<BoundOrderByNode>;
|
33
|
-
using Types = vector<LogicalType>;
|
34
|
-
|
35
|
-
WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
|
36
|
-
const Types &payload_types, bool external)
|
37
|
-
: count(0) {
|
38
|
-
|
39
|
-
RowLayout payload_layout;
|
40
|
-
payload_layout.Initialize(payload_types);
|
41
|
-
global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
|
42
|
-
global_sort->external = external;
|
43
|
-
|
44
|
-
partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
|
45
|
-
}
|
46
|
-
|
47
|
-
void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask);
|
48
|
-
|
49
|
-
GlobalSortStatePtr global_sort;
|
50
|
-
atomic<idx_t> count;
|
51
|
-
|
52
|
-
// Mask computation
|
53
|
-
SortLayout partition_layout;
|
54
|
-
};
|
55
|
-
|
56
|
-
void WindowGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask) {
|
57
|
-
D_ASSERT(count > 0);
|
58
|
-
|
59
|
-
// Set up a comparator for the partition subset
|
60
|
-
const auto partition_size = partition_layout.comparison_size;
|
61
|
-
|
62
|
-
SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
63
|
-
SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
64
|
-
|
65
|
-
partition_mask.SetValidUnsafe(0);
|
66
|
-
order_mask.SetValidUnsafe(0);
|
67
|
-
for (++curr; curr.GetIndex() < count; ++curr) {
|
68
|
-
// Compare the partition subset first because if that differs, then so does the full ordering
|
69
|
-
int part_cmp = 0;
|
70
|
-
if (partition_layout.all_constant) {
|
71
|
-
part_cmp = FastMemcmp(prev.entry_ptr, curr.entry_ptr, partition_size);
|
72
|
-
} else {
|
73
|
-
part_cmp = Comparators::CompareTuple(prev.scan, curr.scan, prev.entry_ptr, curr.entry_ptr, partition_layout,
|
74
|
-
prev.external);
|
75
|
-
}
|
76
|
-
|
77
|
-
if (part_cmp) {
|
78
|
-
partition_mask.SetValidUnsafe(curr.GetIndex());
|
79
|
-
order_mask.SetValidUnsafe(curr.GetIndex());
|
80
|
-
} else if (prev.Compare(curr)) {
|
81
|
-
order_mask.SetValidUnsafe(curr.GetIndex());
|
82
|
-
}
|
83
|
-
++prev;
|
84
|
-
}
|
85
|
-
}
|
86
|
-
|
87
28
|
// Global sink state
|
88
29
|
class WindowGlobalSinkState : public GlobalSinkState {
|
89
30
|
public:
|
90
|
-
|
91
|
-
|
92
|
-
using Types = vector<LogicalType>;
|
93
|
-
|
94
|
-
using GroupingPartition = unique_ptr<PartitionedColumnData>;
|
95
|
-
using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
|
96
|
-
|
97
|
-
WindowGlobalSinkState(const PhysicalWindow &op_p, ClientContext &context)
|
98
|
-
: op(op_p), context(context), buffer_manager(BufferManager::GetBufferManager(context)),
|
99
|
-
allocator(Allocator::Get(context)), payload_types(op.children[0]->types), memory_per_thread(0), count(0),
|
100
|
-
mode(DBConfig::GetConfig(context).options.window_mode) {
|
31
|
+
WindowGlobalSinkState(const PhysicalWindow &op, ClientContext &context)
|
32
|
+
: mode(DBConfig::GetConfig(context).options.window_mode) {
|
101
33
|
|
102
34
|
D_ASSERT(op.select_list[0]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
103
|
-
auto wexpr =
|
104
|
-
|
105
|
-
// we sort by both 1) partition by expression list and 2) order by expressions
|
106
|
-
const auto partition_cols = wexpr->partitions.size();
|
107
|
-
for (idx_t prt_idx = 0; prt_idx < partition_cols; prt_idx++) {
|
108
|
-
auto &pexpr = wexpr->partitions[prt_idx];
|
109
|
-
|
110
|
-
if (wexpr->partitions_stats.empty() || !wexpr->partitions_stats[prt_idx]) {
|
111
|
-
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(), nullptr);
|
112
|
-
} else {
|
113
|
-
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(),
|
114
|
-
wexpr->partitions_stats[prt_idx]->ToUnique());
|
115
|
-
}
|
116
|
-
partitions.emplace_back(orders.back().Copy());
|
117
|
-
}
|
118
|
-
|
119
|
-
for (const auto &order : wexpr->orders) {
|
120
|
-
orders.emplace_back(order.Copy());
|
121
|
-
}
|
122
|
-
|
123
|
-
memory_per_thread = op.GetMaxThreadMemory(context);
|
124
|
-
external = ClientConfig::GetConfig(context).force_external;
|
35
|
+
auto &wexpr = op.select_list[0]->Cast<BoundWindowExpression>();
|
125
36
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
ResizeGroupingData(op.estimated_cardinality);
|
131
|
-
}
|
37
|
+
global_partition =
|
38
|
+
make_uniq<PartitionGlobalSinkState>(context, wexpr.partitions, wexpr.orders, op.children[0]->types,
|
39
|
+
wexpr.partitions_stats, op.estimated_cardinality);
|
132
40
|
}
|
133
41
|
|
134
|
-
|
135
|
-
void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
136
|
-
|
137
|
-
void BuildSortState(ColumnDataCollection &group_data, WindowGlobalHashGroup &global_sort);
|
138
|
-
|
139
|
-
const PhysicalWindow &op;
|
140
|
-
ClientContext &context;
|
141
|
-
BufferManager &buffer_manager;
|
142
|
-
Allocator &allocator;
|
143
|
-
mutex lock;
|
144
|
-
|
145
|
-
// OVER(PARTITION BY...) (hash grouping)
|
146
|
-
unique_ptr<RadixPartitionedColumnData> grouping_data;
|
147
|
-
//! Payload plus hash column
|
148
|
-
Types grouping_types;
|
149
|
-
|
150
|
-
// OVER(...) (sorting)
|
151
|
-
Orders partitions;
|
152
|
-
Orders orders;
|
153
|
-
const Types payload_types;
|
154
|
-
vector<HashGroupPtr> hash_groups;
|
155
|
-
bool external;
|
156
|
-
|
157
|
-
// OVER() (no sorting)
|
158
|
-
unique_ptr<RowDataCollection> rows;
|
159
|
-
unique_ptr<RowDataCollection> strings;
|
160
|
-
|
161
|
-
// Threading
|
162
|
-
idx_t memory_per_thread;
|
163
|
-
atomic<idx_t> count;
|
42
|
+
unique_ptr<PartitionGlobalSinkState> global_partition;
|
164
43
|
WindowAggregationMode mode;
|
165
|
-
|
166
|
-
private:
|
167
|
-
void ResizeGroupingData(idx_t cardinality);
|
168
|
-
void SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
169
44
|
};
|
170
45
|
|
171
|
-
void WindowGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
|
172
|
-
// Have we started to combine? Then just live with it.
|
173
|
-
if (grouping_data && !grouping_data->GetPartitions().empty()) {
|
174
|
-
return;
|
175
|
-
}
|
176
|
-
// Is the average partition size too large?
|
177
|
-
const idx_t partition_size = STANDARD_ROW_GROUPS_SIZE;
|
178
|
-
const auto bits = grouping_data ? grouping_data->GetRadixBits() : 0;
|
179
|
-
auto new_bits = bits ? bits : 4;
|
180
|
-
while (new_bits < 10 && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
|
181
|
-
++new_bits;
|
182
|
-
}
|
183
|
-
|
184
|
-
// Repartition the grouping data
|
185
|
-
if (new_bits != bits) {
|
186
|
-
const auto hash_col_idx = payload_types.size();
|
187
|
-
grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, new_bits, hash_col_idx);
|
188
|
-
}
|
189
|
-
}
|
190
|
-
|
191
|
-
void WindowGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
|
192
|
-
// We are done if the local_partition is right sized.
|
193
|
-
auto local_radix = (RadixPartitionedColumnData *)local_partition.get();
|
194
|
-
if (local_radix->GetRadixBits() == grouping_data->GetRadixBits()) {
|
195
|
-
return;
|
196
|
-
}
|
197
|
-
|
198
|
-
// If the local partition is now too small, flush it and reallocate
|
199
|
-
auto new_partition = grouping_data->CreateShared();
|
200
|
-
auto new_append = make_uniq<PartitionedColumnDataAppendState>();
|
201
|
-
new_partition->InitializeAppendState(*new_append);
|
202
|
-
|
203
|
-
local_partition->FlushAppendState(*local_append);
|
204
|
-
auto &local_groups = local_partition->GetPartitions();
|
205
|
-
for (auto &local_group : local_groups) {
|
206
|
-
ColumnDataScanState scanner;
|
207
|
-
local_group->InitializeScan(scanner);
|
208
|
-
|
209
|
-
DataChunk scan_chunk;
|
210
|
-
local_group->InitializeScanChunk(scan_chunk);
|
211
|
-
for (scan_chunk.Reset(); local_group->Scan(scanner, scan_chunk); scan_chunk.Reset()) {
|
212
|
-
new_partition->Append(*new_append, scan_chunk);
|
213
|
-
}
|
214
|
-
}
|
215
|
-
|
216
|
-
// The append state has stale pointers to the old local partition, so nuke it from orbit.
|
217
|
-
new_partition->FlushAppendState(*new_append);
|
218
|
-
|
219
|
-
local_partition = std::move(new_partition);
|
220
|
-
local_append = make_uniq<PartitionedColumnDataAppendState>();
|
221
|
-
local_partition->InitializeAppendState(*local_append);
|
222
|
-
}
|
223
|
-
|
224
|
-
void WindowGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
|
225
|
-
// Make sure grouping_data doesn't change under us.
|
226
|
-
lock_guard<mutex> guard(lock);
|
227
|
-
|
228
|
-
if (!local_partition) {
|
229
|
-
local_partition = grouping_data->CreateShared();
|
230
|
-
local_append = make_uniq<PartitionedColumnDataAppendState>();
|
231
|
-
local_partition->InitializeAppendState(*local_append);
|
232
|
-
return;
|
233
|
-
}
|
234
|
-
|
235
|
-
// Grow the groups if they are too big
|
236
|
-
ResizeGroupingData(count);
|
237
|
-
|
238
|
-
// Sync local partition to have the same bit count
|
239
|
-
SyncLocalPartition(local_partition, local_append);
|
240
|
-
}
|
241
|
-
|
242
|
-
void WindowGlobalSinkState::CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
|
243
|
-
if (!local_partition) {
|
244
|
-
return;
|
245
|
-
}
|
246
|
-
local_partition->FlushAppendState(*local_append);
|
247
|
-
|
248
|
-
// Make sure grouping_data doesn't change under us.
|
249
|
-
// Combine has an internal mutex, so this is single-threaded anyway.
|
250
|
-
lock_guard<mutex> guard(lock);
|
251
|
-
SyncLocalPartition(local_partition, local_append);
|
252
|
-
grouping_data->Combine(*local_partition);
|
253
|
-
}
|
254
|
-
|
255
|
-
void WindowGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, WindowGlobalHashGroup &hash_group) {
|
256
|
-
auto &global_sort = *hash_group.global_sort;
|
257
|
-
|
258
|
-
// Set up the sort expression computation.
|
259
|
-
vector<LogicalType> sort_types;
|
260
|
-
ExpressionExecutor executor(context);
|
261
|
-
for (auto &order : orders) {
|
262
|
-
auto &oexpr = order.expression;
|
263
|
-
sort_types.emplace_back(oexpr->return_type);
|
264
|
-
executor.AddExpression(*oexpr);
|
265
|
-
}
|
266
|
-
DataChunk sort_chunk;
|
267
|
-
sort_chunk.Initialize(allocator, sort_types);
|
268
|
-
|
269
|
-
// Copy the data from the group into the sort code.
|
270
|
-
LocalSortState local_sort;
|
271
|
-
local_sort.Initialize(global_sort, global_sort.buffer_manager);
|
272
|
-
|
273
|
-
// Strip hash column
|
274
|
-
DataChunk payload_chunk;
|
275
|
-
payload_chunk.Initialize(allocator, payload_types);
|
276
|
-
|
277
|
-
vector<column_t> column_ids;
|
278
|
-
column_ids.reserve(payload_types.size());
|
279
|
-
for (column_t i = 0; i < payload_types.size(); ++i) {
|
280
|
-
column_ids.emplace_back(i);
|
281
|
-
}
|
282
|
-
ColumnDataConsumer scanner(group_data, column_ids);
|
283
|
-
ColumnDataConsumerScanState chunk_state;
|
284
|
-
chunk_state.current_chunk_state.properties = ColumnDataScanProperties::ALLOW_ZERO_COPY;
|
285
|
-
scanner.InitializeScan();
|
286
|
-
for (auto chunk_idx = scanner.ChunkCount(); chunk_idx-- > 0;) {
|
287
|
-
if (!scanner.AssignChunk(chunk_state)) {
|
288
|
-
break;
|
289
|
-
}
|
290
|
-
scanner.ScanChunk(chunk_state, payload_chunk);
|
291
|
-
|
292
|
-
sort_chunk.Reset();
|
293
|
-
executor.Execute(payload_chunk, sort_chunk);
|
294
|
-
|
295
|
-
local_sort.SinkChunk(sort_chunk, payload_chunk);
|
296
|
-
if (local_sort.SizeInBytes() > memory_per_thread) {
|
297
|
-
local_sort.Sort(global_sort, true);
|
298
|
-
}
|
299
|
-
scanner.FinishChunk(chunk_state);
|
300
|
-
}
|
301
|
-
|
302
|
-
global_sort.AddLocalState(local_sort);
|
303
|
-
|
304
|
-
hash_group.count += group_data.Count();
|
305
|
-
}
|
306
|
-
|
307
46
|
// Per-thread sink state
|
308
47
|
class WindowLocalSinkState : public LocalSinkState {
|
309
48
|
public:
|
310
|
-
WindowLocalSinkState(ClientContext &context, const
|
311
|
-
:
|
312
|
-
D_ASSERT(op.select_list[0]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
313
|
-
auto wexpr = reinterpret_cast<BoundWindowExpression *>(op.select_list[0].get());
|
314
|
-
|
315
|
-
vector<LogicalType> group_types;
|
316
|
-
for (idx_t prt_idx = 0; prt_idx < wexpr->partitions.size(); prt_idx++) {
|
317
|
-
auto &pexpr = wexpr->partitions[prt_idx];
|
318
|
-
group_types.push_back(pexpr->return_type);
|
319
|
-
executor.AddExpression(*pexpr);
|
320
|
-
}
|
321
|
-
sort_cols = wexpr->orders.size() + group_types.size();
|
322
|
-
|
323
|
-
if (sort_cols) {
|
324
|
-
if (!group_types.empty()) {
|
325
|
-
// OVER(PARTITION BY...)
|
326
|
-
group_chunk.Initialize(allocator, group_types);
|
327
|
-
}
|
328
|
-
// OVER(...)
|
329
|
-
auto payload_types = op.children[0]->types;
|
330
|
-
payload_types.emplace_back(LogicalType::HASH);
|
331
|
-
payload_chunk.Initialize(allocator, payload_types);
|
332
|
-
} else {
|
333
|
-
// OVER()
|
334
|
-
payload_layout.Initialize(op.children[0]->types);
|
335
|
-
}
|
49
|
+
WindowLocalSinkState(ClientContext &context, const WindowGlobalSinkState &gstate)
|
50
|
+
: local_partition(context, *gstate.global_partition) {
|
336
51
|
}
|
337
52
|
|
338
|
-
|
339
|
-
|
340
|
-
Allocator &allocator;
|
341
|
-
|
342
|
-
// OVER(PARTITION BY...) (hash grouping)
|
343
|
-
ExpressionExecutor executor;
|
344
|
-
DataChunk group_chunk;
|
345
|
-
DataChunk payload_chunk;
|
346
|
-
unique_ptr<PartitionedColumnData> local_partition;
|
347
|
-
unique_ptr<PartitionedColumnDataAppendState> local_append;
|
348
|
-
|
349
|
-
// OVER(...) (sorting)
|
350
|
-
size_t sort_cols;
|
351
|
-
|
352
|
-
// OVER() (no sorting)
|
353
|
-
RowLayout payload_layout;
|
354
|
-
unique_ptr<RowDataCollection> rows;
|
355
|
-
unique_ptr<RowDataCollection> strings;
|
356
|
-
|
357
|
-
//! Compute the hash values
|
358
|
-
void Hash(DataChunk &input_chunk, Vector &hash_vector);
|
359
|
-
//! Sink an input chunk
|
360
|
-
void Sink(DataChunk &input_chunk, WindowGlobalSinkState &gstate);
|
361
|
-
//! Merge the state into the global state.
|
362
|
-
void Combine(WindowGlobalSinkState &gstate);
|
363
|
-
};
|
364
|
-
|
365
|
-
void WindowLocalSinkState::Hash(DataChunk &input_chunk, Vector &hash_vector) {
|
366
|
-
const auto count = input_chunk.size();
|
367
|
-
if (group_chunk.ColumnCount() > 0) {
|
368
|
-
// OVER(PARTITION BY...) (hash grouping)
|
369
|
-
group_chunk.Reset();
|
370
|
-
executor.Execute(input_chunk, group_chunk);
|
371
|
-
VectorOperations::Hash(group_chunk.data[0], hash_vector, count);
|
372
|
-
for (idx_t prt_idx = 1; prt_idx < group_chunk.ColumnCount(); ++prt_idx) {
|
373
|
-
VectorOperations::CombineHash(hash_vector, group_chunk.data[prt_idx], count);
|
374
|
-
}
|
375
|
-
} else {
|
376
|
-
// OVER(...) (sorting)
|
377
|
-
// Single partition => single hash value
|
378
|
-
hash_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
|
379
|
-
auto hashes = ConstantVector::GetData<hash_t>(hash_vector);
|
380
|
-
hashes[0] = 0;
|
381
|
-
}
|
382
|
-
}
|
383
|
-
|
384
|
-
void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &gstate) {
|
385
|
-
gstate.count += input_chunk.size();
|
386
|
-
|
387
|
-
// OVER()
|
388
|
-
if (sort_cols == 0) {
|
389
|
-
// No sorts, so build paged row chunks
|
390
|
-
if (!rows) {
|
391
|
-
const auto entry_size = payload_layout.GetRowWidth();
|
392
|
-
const auto capacity = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, (Storage::BLOCK_SIZE / entry_size) + 1);
|
393
|
-
rows = make_uniq<RowDataCollection>(gstate.buffer_manager, capacity, entry_size);
|
394
|
-
strings = make_uniq<RowDataCollection>(gstate.buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
395
|
-
}
|
396
|
-
const auto row_count = input_chunk.size();
|
397
|
-
const auto row_sel = FlatVector::IncrementalSelectionVector();
|
398
|
-
Vector addresses(LogicalType::POINTER);
|
399
|
-
auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
|
400
|
-
const auto prev_rows_blocks = rows->blocks.size();
|
401
|
-
auto handles = rows->Build(row_count, key_locations, nullptr, row_sel);
|
402
|
-
auto input_data = input_chunk.ToUnifiedFormat();
|
403
|
-
RowOperations::Scatter(input_chunk, input_data.get(), payload_layout, addresses, *strings, *row_sel, row_count);
|
404
|
-
// Mark that row blocks contain pointers (heap blocks are pinned)
|
405
|
-
if (!payload_layout.AllConstant()) {
|
406
|
-
D_ASSERT(strings->keep_pinned);
|
407
|
-
for (size_t i = prev_rows_blocks; i < rows->blocks.size(); ++i) {
|
408
|
-
rows->blocks[i]->block->SetSwizzling("WindowLocalSinkState::Sink");
|
409
|
-
}
|
410
|
-
}
|
411
|
-
return;
|
53
|
+
void Sink(DataChunk &input_chunk) {
|
54
|
+
local_partition.Sink(input_chunk);
|
412
55
|
}
|
413
56
|
|
414
|
-
|
415
|
-
|
416
|
-
auto &hash_vector = payload_chunk.data.back();
|
417
|
-
Hash(input_chunk, hash_vector);
|
418
|
-
for (idx_t col_idx = 0; col_idx < input_chunk.ColumnCount(); ++col_idx) {
|
419
|
-
payload_chunk.data[col_idx].Reference(input_chunk.data[col_idx]);
|
57
|
+
void Combine() {
|
58
|
+
local_partition.Combine();
|
420
59
|
}
|
421
|
-
payload_chunk.SetCardinality(input_chunk);
|
422
|
-
|
423
|
-
gstate.UpdateLocalPartition(local_partition, local_append);
|
424
|
-
local_partition->Append(*local_append, payload_chunk);
|
425
|
-
}
|
426
60
|
|
427
|
-
|
428
|
-
|
429
|
-
if (sort_cols == 0) {
|
430
|
-
// Only one partition again, so need a global lock.
|
431
|
-
lock_guard<mutex> glock(gstate.lock);
|
432
|
-
if (gstate.rows) {
|
433
|
-
if (rows) {
|
434
|
-
gstate.rows->Merge(*rows);
|
435
|
-
gstate.strings->Merge(*strings);
|
436
|
-
rows.reset();
|
437
|
-
strings.reset();
|
438
|
-
}
|
439
|
-
} else {
|
440
|
-
gstate.rows = std::move(rows);
|
441
|
-
gstate.strings = std::move(strings);
|
442
|
-
}
|
443
|
-
return;
|
444
|
-
}
|
445
|
-
|
446
|
-
// OVER(...)
|
447
|
-
gstate.CombineLocalPartition(local_partition, local_append);
|
448
|
-
}
|
61
|
+
PartitionLocalSinkState local_partition;
|
62
|
+
};
|
449
63
|
|
450
64
|
// this implements a sorted window functions variant
|
451
65
|
PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list_p,
|
@@ -1423,325 +1037,51 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
|
|
1423
1037
|
//===--------------------------------------------------------------------===//
|
1424
1038
|
SinkResultType PhysicalWindow::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
|
1425
1039
|
DataChunk &input) const {
|
1426
|
-
auto &gstate = gstate_p.Cast<WindowGlobalSinkState>();
|
1427
1040
|
auto &lstate = lstate_p.Cast<WindowLocalSinkState>();
|
1428
1041
|
|
1429
|
-
lstate.Sink(input
|
1042
|
+
lstate.Sink(input);
|
1430
1043
|
|
1431
1044
|
return SinkResultType::NEED_MORE_INPUT;
|
1432
1045
|
}
|
1433
1046
|
|
1434
1047
|
void PhysicalWindow::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
|
1435
|
-
auto &gstate = gstate_p.Cast<WindowGlobalSinkState>();
|
1436
1048
|
auto &lstate = lstate_p.Cast<WindowLocalSinkState>();
|
1437
|
-
lstate.Combine(
|
1049
|
+
lstate.Combine();
|
1438
1050
|
}
|
1439
1051
|
|
1440
1052
|
unique_ptr<LocalSinkState> PhysicalWindow::GetLocalSinkState(ExecutionContext &context) const {
|
1441
|
-
|
1053
|
+
auto &gstate = sink_state->Cast<WindowGlobalSinkState>();
|
1054
|
+
return make_uniq<WindowLocalSinkState>(context.client, gstate);
|
1442
1055
|
}
|
1443
1056
|
|
1444
1057
|
unique_ptr<GlobalSinkState> PhysicalWindow::GetGlobalSinkState(ClientContext &context) const {
|
1445
1058
|
return make_uniq<WindowGlobalSinkState>(*this, context);
|
1446
1059
|
}
|
1447
1060
|
|
1448
|
-
enum class WindowSortStage : uint8_t { INIT, PREPARE, MERGE, SORTED };
|
1449
|
-
|
1450
|
-
class WindowGlobalMergeState;
|
1451
|
-
|
1452
|
-
class WindowLocalMergeState {
|
1453
|
-
public:
|
1454
|
-
WindowLocalMergeState() : merge_state(nullptr), stage(WindowSortStage::INIT) {
|
1455
|
-
finished = true;
|
1456
|
-
}
|
1457
|
-
|
1458
|
-
bool TaskFinished() {
|
1459
|
-
return finished;
|
1460
|
-
}
|
1461
|
-
|
1462
|
-
void Prepare();
|
1463
|
-
void Merge();
|
1464
|
-
|
1465
|
-
void ExecuteTask();
|
1466
|
-
|
1467
|
-
WindowGlobalMergeState *merge_state;
|
1468
|
-
WindowSortStage stage;
|
1469
|
-
atomic<bool> finished;
|
1470
|
-
};
|
1471
|
-
|
1472
|
-
class WindowGlobalMergeState {
|
1473
|
-
public:
|
1474
|
-
using GroupDataPtr = unique_ptr<ColumnDataCollection>;
|
1475
|
-
|
1476
|
-
explicit WindowGlobalMergeState(WindowGlobalSinkState &sink, GroupDataPtr group_data)
|
1477
|
-
: sink(sink), group_data(std::move(group_data)), stage(WindowSortStage::INIT), total_tasks(0),
|
1478
|
-
tasks_assigned(0), tasks_completed(0) {
|
1479
|
-
|
1480
|
-
const auto group_idx = sink.hash_groups.size();
|
1481
|
-
auto new_group = make_uniq<WindowGlobalHashGroup>(sink.buffer_manager, sink.partitions, sink.orders,
|
1482
|
-
sink.payload_types, sink.external);
|
1483
|
-
sink.hash_groups.emplace_back(std::move(new_group));
|
1484
|
-
|
1485
|
-
hash_group = sink.hash_groups[group_idx].get();
|
1486
|
-
global_sort = sink.hash_groups[group_idx]->global_sort.get();
|
1487
|
-
}
|
1488
|
-
|
1489
|
-
bool IsSorted() const {
|
1490
|
-
lock_guard<mutex> guard(lock);
|
1491
|
-
return stage == WindowSortStage::SORTED;
|
1492
|
-
}
|
1493
|
-
|
1494
|
-
bool AssignTask(WindowLocalMergeState &local_state);
|
1495
|
-
bool TryPrepareNextStage();
|
1496
|
-
void CompleteTask();
|
1497
|
-
|
1498
|
-
WindowGlobalSinkState &sink;
|
1499
|
-
GroupDataPtr group_data;
|
1500
|
-
WindowGlobalHashGroup *hash_group;
|
1501
|
-
GlobalSortState *global_sort;
|
1502
|
-
|
1503
|
-
private:
|
1504
|
-
mutable mutex lock;
|
1505
|
-
WindowSortStage stage;
|
1506
|
-
idx_t total_tasks;
|
1507
|
-
idx_t tasks_assigned;
|
1508
|
-
idx_t tasks_completed;
|
1509
|
-
};
|
1510
|
-
|
1511
|
-
void WindowLocalMergeState::Prepare() {
|
1512
|
-
auto &global_sort = *merge_state->global_sort;
|
1513
|
-
merge_state->sink.BuildSortState(*merge_state->group_data, *merge_state->hash_group);
|
1514
|
-
merge_state->group_data.reset();
|
1515
|
-
|
1516
|
-
global_sort.PrepareMergePhase();
|
1517
|
-
}
|
1518
|
-
|
1519
|
-
void WindowLocalMergeState::Merge() {
|
1520
|
-
auto &global_sort = *merge_state->global_sort;
|
1521
|
-
MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
|
1522
|
-
merge_sorter.PerformInMergeRound();
|
1523
|
-
}
|
1524
|
-
|
1525
|
-
void WindowLocalMergeState::ExecuteTask() {
|
1526
|
-
switch (stage) {
|
1527
|
-
case WindowSortStage::PREPARE:
|
1528
|
-
Prepare();
|
1529
|
-
break;
|
1530
|
-
case WindowSortStage::MERGE:
|
1531
|
-
Merge();
|
1532
|
-
break;
|
1533
|
-
default:
|
1534
|
-
throw InternalException("Unexpected WindowGlobalMergeState in ExecuteTask!");
|
1535
|
-
}
|
1536
|
-
|
1537
|
-
merge_state->CompleteTask();
|
1538
|
-
finished = true;
|
1539
|
-
}
|
1540
|
-
|
1541
|
-
bool WindowGlobalMergeState::AssignTask(WindowLocalMergeState &local_state) {
|
1542
|
-
lock_guard<mutex> guard(lock);
|
1543
|
-
|
1544
|
-
if (tasks_assigned >= total_tasks) {
|
1545
|
-
return false;
|
1546
|
-
}
|
1547
|
-
|
1548
|
-
local_state.merge_state = this;
|
1549
|
-
local_state.stage = stage;
|
1550
|
-
local_state.finished = false;
|
1551
|
-
tasks_assigned++;
|
1552
|
-
|
1553
|
-
return true;
|
1554
|
-
}
|
1555
|
-
|
1556
|
-
void WindowGlobalMergeState::CompleteTask() {
|
1557
|
-
lock_guard<mutex> guard(lock);
|
1558
|
-
|
1559
|
-
++tasks_completed;
|
1560
|
-
}
|
1561
|
-
|
1562
|
-
bool WindowGlobalMergeState::TryPrepareNextStage() {
|
1563
|
-
lock_guard<mutex> guard(lock);
|
1564
|
-
|
1565
|
-
if (tasks_completed < total_tasks) {
|
1566
|
-
return false;
|
1567
|
-
}
|
1568
|
-
|
1569
|
-
tasks_assigned = tasks_completed = 0;
|
1570
|
-
|
1571
|
-
switch (stage) {
|
1572
|
-
case WindowSortStage::INIT:
|
1573
|
-
total_tasks = 1;
|
1574
|
-
stage = WindowSortStage::PREPARE;
|
1575
|
-
return true;
|
1576
|
-
|
1577
|
-
case WindowSortStage::PREPARE:
|
1578
|
-
total_tasks = global_sort->sorted_blocks.size() / 2;
|
1579
|
-
if (!total_tasks) {
|
1580
|
-
break;
|
1581
|
-
}
|
1582
|
-
stage = WindowSortStage::MERGE;
|
1583
|
-
global_sort->InitializeMergeRound();
|
1584
|
-
return true;
|
1585
|
-
|
1586
|
-
case WindowSortStage::MERGE:
|
1587
|
-
global_sort->CompleteMergeRound(true);
|
1588
|
-
total_tasks = global_sort->sorted_blocks.size() / 2;
|
1589
|
-
if (!total_tasks) {
|
1590
|
-
break;
|
1591
|
-
}
|
1592
|
-
global_sort->InitializeMergeRound();
|
1593
|
-
return true;
|
1594
|
-
|
1595
|
-
case WindowSortStage::SORTED:
|
1596
|
-
break;
|
1597
|
-
}
|
1598
|
-
|
1599
|
-
stage = WindowSortStage::SORTED;
|
1600
|
-
|
1601
|
-
return false;
|
1602
|
-
}
|
1603
|
-
|
1604
|
-
class WindowGlobalMergeStates {
|
1605
|
-
public:
|
1606
|
-
using WindowGlobalMergeStatePtr = unique_ptr<WindowGlobalMergeState>;
|
1607
|
-
|
1608
|
-
explicit WindowGlobalMergeStates(WindowGlobalSinkState &sink) {
|
1609
|
-
// Schedule all the sorts for maximum thread utilisation
|
1610
|
-
for (auto &group_data : sink.grouping_data->GetPartitions()) {
|
1611
|
-
// Prepare for merge sort phase
|
1612
|
-
if (group_data->Count()) {
|
1613
|
-
auto state = make_uniq<WindowGlobalMergeState>(sink, std::move(group_data));
|
1614
|
-
states.emplace_back(std::move(state));
|
1615
|
-
}
|
1616
|
-
}
|
1617
|
-
}
|
1618
|
-
|
1619
|
-
vector<WindowGlobalMergeStatePtr> states;
|
1620
|
-
};
|
1621
|
-
|
1622
|
-
class WindowMergeTask : public ExecutorTask {
|
1623
|
-
public:
|
1624
|
-
WindowMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, WindowGlobalMergeStates &hash_groups_p)
|
1625
|
-
: ExecutorTask(context_p), event(std::move(event_p)), hash_groups(hash_groups_p) {
|
1626
|
-
}
|
1627
|
-
|
1628
|
-
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
|
1629
|
-
|
1630
|
-
private:
|
1631
|
-
shared_ptr<Event> event;
|
1632
|
-
WindowLocalMergeState local_state;
|
1633
|
-
WindowGlobalMergeStates &hash_groups;
|
1634
|
-
};
|
1635
|
-
|
1636
|
-
TaskExecutionResult WindowMergeTask::ExecuteTask(TaskExecutionMode mode) {
|
1637
|
-
// Loop until all hash groups are done
|
1638
|
-
size_t sorted = 0;
|
1639
|
-
while (sorted < hash_groups.states.size()) {
|
1640
|
-
// First check if there is an unfinished task for this thread
|
1641
|
-
if (executor.HasError()) {
|
1642
|
-
return TaskExecutionResult::TASK_ERROR;
|
1643
|
-
}
|
1644
|
-
if (!local_state.TaskFinished()) {
|
1645
|
-
local_state.ExecuteTask();
|
1646
|
-
continue;
|
1647
|
-
}
|
1648
|
-
|
1649
|
-
// Thread is done with its assigned task, try to fetch new work
|
1650
|
-
for (auto group = sorted; group < hash_groups.states.size(); ++group) {
|
1651
|
-
auto &global_state = hash_groups.states[group];
|
1652
|
-
if (global_state->IsSorted()) {
|
1653
|
-
// This hash group is done
|
1654
|
-
// Update the high water mark of densely completed groups
|
1655
|
-
if (sorted == group) {
|
1656
|
-
++sorted;
|
1657
|
-
}
|
1658
|
-
continue;
|
1659
|
-
}
|
1660
|
-
|
1661
|
-
// Try to assign work for this hash group to this thread
|
1662
|
-
if (global_state->AssignTask(local_state)) {
|
1663
|
-
// We assigned a task to this thread!
|
1664
|
-
// Break out of this loop to re-enter the top-level loop and execute the task
|
1665
|
-
break;
|
1666
|
-
}
|
1667
|
-
|
1668
|
-
// Hash group global state couldn't assign a task to this thread
|
1669
|
-
// Try to prepare the next stage
|
1670
|
-
if (!global_state->TryPrepareNextStage()) {
|
1671
|
-
// This current hash group is not yet done
|
1672
|
-
// But we were not able to assign a task for it to this thread
|
1673
|
-
// See if the next hash group is better
|
1674
|
-
continue;
|
1675
|
-
}
|
1676
|
-
|
1677
|
-
// We were able to prepare the next stage for this hash group!
|
1678
|
-
// Try to assign a task once more
|
1679
|
-
if (global_state->AssignTask(local_state)) {
|
1680
|
-
// We assigned a task to this thread!
|
1681
|
-
// Break out of this loop to re-enter the top-level loop and execute the task
|
1682
|
-
break;
|
1683
|
-
}
|
1684
|
-
|
1685
|
-
// We were able to prepare the next merge round,
|
1686
|
-
// but we were not able to assign a task for it to this thread
|
1687
|
-
// The tasks were assigned to other threads while this thread waited for the lock
|
1688
|
-
// Go to the next iteration to see if another hash group has a task
|
1689
|
-
}
|
1690
|
-
}
|
1691
|
-
|
1692
|
-
event->FinishTask();
|
1693
|
-
return TaskExecutionResult::TASK_FINISHED;
|
1694
|
-
}
|
1695
|
-
|
1696
|
-
class WindowMergeEvent : public BasePipelineEvent {
|
1697
|
-
public:
|
1698
|
-
WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p)
|
1699
|
-
: BasePipelineEvent(pipeline_p), gstate(gstate_p), merge_states(gstate_p) {
|
1700
|
-
}
|
1701
|
-
|
1702
|
-
WindowGlobalSinkState &gstate;
|
1703
|
-
WindowGlobalMergeStates merge_states;
|
1704
|
-
|
1705
|
-
public:
|
1706
|
-
void Schedule() override {
|
1707
|
-
auto &context = pipeline->GetClientContext();
|
1708
|
-
|
1709
|
-
// Schedule tasks equal to the number of threads, which will each merge multiple partitions
|
1710
|
-
auto &ts = TaskScheduler::GetScheduler(context);
|
1711
|
-
idx_t num_threads = ts.NumberOfThreads();
|
1712
|
-
|
1713
|
-
vector<unique_ptr<Task>> merge_tasks;
|
1714
|
-
for (idx_t tnum = 0; tnum < num_threads; tnum++) {
|
1715
|
-
merge_tasks.push_back(make_uniq<WindowMergeTask>(shared_from_this(), context, merge_states));
|
1716
|
-
}
|
1717
|
-
SetTasks(std::move(merge_tasks));
|
1718
|
-
}
|
1719
|
-
};
|
1720
|
-
|
1721
1061
|
SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
1722
1062
|
GlobalSinkState &gstate_p) const {
|
1723
1063
|
auto &state = gstate_p.Cast<WindowGlobalSinkState>();
|
1724
1064
|
|
1725
1065
|
// Did we get any data?
|
1726
|
-
if (!state.count) {
|
1066
|
+
if (!state.global_partition->count) {
|
1727
1067
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
1728
1068
|
}
|
1729
1069
|
|
1730
1070
|
// Do we have any sorting to schedule?
|
1731
|
-
if (state.rows) {
|
1732
|
-
D_ASSERT(!state.grouping_data);
|
1733
|
-
return state.rows->count ? SinkFinalizeType::READY : SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
1071
|
+
if (state.global_partition->rows) {
|
1072
|
+
D_ASSERT(!state.global_partition->grouping_data);
|
1073
|
+
return state.global_partition->rows->count ? SinkFinalizeType::READY : SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
1734
1074
|
}
|
1735
1075
|
|
1736
1076
|
// Find the first group to sort
|
1737
|
-
auto &groups = state.grouping_data->GetPartitions();
|
1077
|
+
auto &groups = state.global_partition->grouping_data->GetPartitions();
|
1738
1078
|
if (groups.empty()) {
|
1739
1079
|
// Empty input!
|
1740
1080
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
1741
1081
|
}
|
1742
1082
|
|
1743
1083
|
// Schedule all the sorts for maximum thread utilisation
|
1744
|
-
auto new_event = make_shared<
|
1084
|
+
auto new_event = make_shared<PartitionMergeEvent>(*state.global_partition, pipeline);
|
1745
1085
|
event.InsertEvent(std::move(new_event));
|
1746
1086
|
|
1747
1087
|
return SinkFinalizeType::READY;
|
@@ -1752,199 +1092,80 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
1752
1092
|
//===--------------------------------------------------------------------===//
|
1753
1093
|
class WindowGlobalSourceState : public GlobalSourceState {
|
1754
1094
|
public:
|
1755
|
-
explicit WindowGlobalSourceState(
|
1095
|
+
explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : partition_source(*gsink.global_partition) {
|
1756
1096
|
}
|
1757
1097
|
|
1758
|
-
|
1759
|
-
//! The output read position.
|
1760
|
-
atomic<idx_t> next_bin;
|
1098
|
+
PartitionGlobalSourceState partition_source;
|
1761
1099
|
|
1762
1100
|
public:
|
1763
1101
|
idx_t MaxThreads() override {
|
1764
|
-
|
1765
|
-
|
1766
|
-
// If there is only one partition, we have to process it on one thread.
|
1767
|
-
if (!state.grouping_data) {
|
1768
|
-
return 1;
|
1769
|
-
}
|
1770
|
-
|
1771
|
-
// If there is not a lot of data, process serially.
|
1772
|
-
if (state.count < STANDARD_ROW_GROUPS_SIZE) {
|
1773
|
-
return 1;
|
1774
|
-
}
|
1775
|
-
|
1776
|
-
return state.hash_groups.size();
|
1102
|
+
return partition_source.MaxThreads();
|
1777
1103
|
}
|
1778
1104
|
};
|
1779
1105
|
|
1780
1106
|
// Per-thread read state
|
1781
1107
|
class WindowLocalSourceState : public LocalSourceState {
|
1782
1108
|
public:
|
1783
|
-
using HashGroupPtr = unique_ptr<
|
1109
|
+
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
1784
1110
|
using WindowExecutorPtr = unique_ptr<WindowExecutor>;
|
1785
1111
|
using WindowExecutors = vector<WindowExecutorPtr>;
|
1786
1112
|
|
1787
|
-
WindowLocalSourceState(const PhysicalWindow &
|
1788
|
-
:
|
1113
|
+
WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
|
1114
|
+
: partition_source(gsource.partition_source.gsink), context(context.client), op(op_p) {
|
1115
|
+
|
1789
1116
|
vector<LogicalType> output_types;
|
1790
1117
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1791
1118
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
1792
|
-
auto wexpr =
|
1793
|
-
output_types.emplace_back(wexpr
|
1119
|
+
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
1120
|
+
output_types.emplace_back(wexpr.return_type);
|
1794
1121
|
}
|
1795
|
-
output_chunk.Initialize(
|
1796
|
-
|
1797
|
-
const auto &input_types = op.children[0]->types;
|
1798
|
-
layout.Initialize(input_types);
|
1799
|
-
input_chunk.Initialize(allocator, input_types);
|
1122
|
+
output_chunk.Initialize(Allocator::Get(context.client), output_types);
|
1800
1123
|
}
|
1801
1124
|
|
1802
|
-
void MaterializeSortedData();
|
1803
1125
|
void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
|
1804
1126
|
void Scan(DataChunk &chunk);
|
1805
1127
|
|
1806
|
-
|
1128
|
+
PartitionLocalSourceState partition_source;
|
1807
1129
|
ClientContext &context;
|
1808
|
-
|
1809
|
-
|
1810
|
-
//! The generated input chunks
|
1811
|
-
unique_ptr<RowDataCollection> rows;
|
1812
|
-
unique_ptr<RowDataCollection> heap;
|
1813
|
-
RowLayout layout;
|
1814
|
-
//! The partition boundary mask
|
1815
|
-
vector<validity_t> partition_bits;
|
1816
|
-
ValidityMask partition_mask;
|
1817
|
-
//! The order boundary mask
|
1818
|
-
vector<validity_t> order_bits;
|
1819
|
-
ValidityMask order_mask;
|
1130
|
+
const PhysicalWindow &op;
|
1131
|
+
|
1820
1132
|
//! The current execution functions
|
1821
1133
|
WindowExecutors window_execs;
|
1822
|
-
|
1823
|
-
//! The read partition
|
1824
|
-
idx_t hash_bin;
|
1825
|
-
//! The read cursor
|
1826
|
-
unique_ptr<RowDataCollectionScanner> scanner;
|
1827
|
-
//! Buffer for the inputs
|
1828
|
-
DataChunk input_chunk;
|
1829
1134
|
//! Buffer for window results
|
1830
1135
|
DataChunk output_chunk;
|
1831
1136
|
};
|
1832
1137
|
|
1833
|
-
void WindowLocalSourceState::MaterializeSortedData() {
|
1834
|
-
auto &global_sort_state = *hash_group->global_sort;
|
1835
|
-
if (global_sort_state.sorted_blocks.empty()) {
|
1836
|
-
return;
|
1837
|
-
}
|
1838
|
-
|
1839
|
-
// scan the sorted row data
|
1840
|
-
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
1841
|
-
auto &sb = *global_sort_state.sorted_blocks[0];
|
1842
|
-
|
1843
|
-
// Free up some memory before allocating more
|
1844
|
-
sb.radix_sorting_data.clear();
|
1845
|
-
sb.blob_sorting_data = nullptr;
|
1846
|
-
|
1847
|
-
// Move the sorting row blocks into our RDCs
|
1848
|
-
auto &buffer_manager = global_sort_state.buffer_manager;
|
1849
|
-
auto &sd = *sb.payload_data;
|
1850
|
-
|
1851
|
-
// Data blocks are required
|
1852
|
-
D_ASSERT(!sd.data_blocks.empty());
|
1853
|
-
auto &block = sd.data_blocks[0];
|
1854
|
-
rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
1855
|
-
rows->blocks = std::move(sd.data_blocks);
|
1856
|
-
rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
|
1857
|
-
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1858
|
-
|
1859
|
-
// Heap blocks are optional, but we want both for iteration.
|
1860
|
-
if (!sd.heap_blocks.empty()) {
|
1861
|
-
auto &block = sd.heap_blocks[0];
|
1862
|
-
heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
1863
|
-
heap->blocks = std::move(sd.heap_blocks);
|
1864
|
-
hash_group.reset();
|
1865
|
-
} else {
|
1866
|
-
heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
1867
|
-
}
|
1868
|
-
heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
|
1869
|
-
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1870
|
-
}
|
1871
|
-
|
1872
1138
|
void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
|
1873
|
-
auto
|
1874
|
-
|
1875
|
-
// Get rid of any stale data
|
1876
|
-
hash_bin = hash_bin_p;
|
1877
|
-
|
1878
|
-
// There are three types of partitions:
|
1879
|
-
// 1. No partition (no sorting)
|
1880
|
-
// 2. One partition (sorting, but no hashing)
|
1881
|
-
// 3. Multiple partitions (sorting and hashing)
|
1882
|
-
|
1883
|
-
// How big is the partition?
|
1884
|
-
idx_t count = 0;
|
1885
|
-
if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
1886
|
-
count = gstate.hash_groups[hash_bin]->count;
|
1887
|
-
} else if (gstate.rows && !hash_bin) {
|
1888
|
-
count = gstate.count;
|
1889
|
-
} else {
|
1890
|
-
return;
|
1891
|
-
}
|
1892
|
-
|
1893
|
-
// Initialise masks to false
|
1894
|
-
const auto bit_count = ValidityMask::ValidityMaskSize(count);
|
1895
|
-
partition_bits.clear();
|
1896
|
-
partition_bits.resize(bit_count, 0);
|
1897
|
-
partition_mask.Initialize(partition_bits.data());
|
1898
|
-
|
1899
|
-
order_bits.clear();
|
1900
|
-
order_bits.resize(bit_count, 0);
|
1901
|
-
order_mask.Initialize(order_bits.data());
|
1902
|
-
|
1903
|
-
// Scan the sorted data into new Collections
|
1904
|
-
auto external = gstate.external;
|
1905
|
-
if (gstate.rows && !hash_bin) {
|
1906
|
-
// Simple mask
|
1907
|
-
partition_mask.SetValidUnsafe(0);
|
1908
|
-
order_mask.SetValidUnsafe(0);
|
1909
|
-
// No partition - align the heap blocks with the row blocks
|
1910
|
-
rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
|
1911
|
-
heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
|
1912
|
-
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
|
1913
|
-
external = true;
|
1914
|
-
} else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
1915
|
-
// Overwrite the collections with the sorted data
|
1916
|
-
hash_group = std::move(gstate.hash_groups[hash_bin]);
|
1917
|
-
hash_group->ComputeMasks(partition_mask, order_mask);
|
1918
|
-
MaterializeSortedData();
|
1919
|
-
} else {
|
1139
|
+
const auto count = partition_source.GeneratePartition(hash_bin_p);
|
1140
|
+
if (!count) {
|
1920
1141
|
return;
|
1921
1142
|
}
|
1922
1143
|
|
1923
1144
|
// Create the executors for each function
|
1145
|
+
auto &partition_mask = partition_source.partition_mask;
|
1924
1146
|
window_execs.clear();
|
1925
1147
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1926
1148
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
1927
|
-
auto wexpr =
|
1928
|
-
auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count);
|
1149
|
+
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
1150
|
+
auto wexec = make_uniq<WindowExecutor>(&wexpr, context, partition_mask, count);
|
1929
1151
|
window_execs.emplace_back(std::move(wexec));
|
1930
1152
|
}
|
1931
1153
|
|
1932
1154
|
// First pass over the input without flushing
|
1933
1155
|
// TODO: Factor out the constructor data as global state
|
1934
|
-
scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
|
1935
1156
|
idx_t input_idx = 0;
|
1936
1157
|
while (true) {
|
1937
|
-
input_chunk.Reset();
|
1938
|
-
scanner->Scan(input_chunk);
|
1939
|
-
if (input_chunk.size() == 0) {
|
1158
|
+
partition_source.input_chunk.Reset();
|
1159
|
+
partition_source.scanner->Scan(partition_source.input_chunk);
|
1160
|
+
if (partition_source.input_chunk.size() == 0) {
|
1940
1161
|
break;
|
1941
1162
|
}
|
1942
1163
|
|
1943
1164
|
// TODO: Parallelization opportunity
|
1944
1165
|
for (auto &wexec : window_execs) {
|
1945
|
-
wexec->Sink(input_chunk, input_idx, scanner->Count());
|
1166
|
+
wexec->Sink(partition_source.input_chunk, input_idx, partition_source.scanner->Count());
|
1946
1167
|
}
|
1947
|
-
input_idx += input_chunk.size();
|
1168
|
+
input_idx += partition_source.input_chunk.size();
|
1948
1169
|
}
|
1949
1170
|
|
1950
1171
|
// TODO: Parallelization opportunity
|
@@ -1953,22 +1174,25 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1953
1174
|
}
|
1954
1175
|
|
1955
1176
|
// External scanning assumes all blocks are swizzled.
|
1956
|
-
scanner->ReSwizzle();
|
1177
|
+
partition_source.scanner->ReSwizzle();
|
1957
1178
|
|
1958
1179
|
// Second pass can flush
|
1959
|
-
scanner->Reset(true);
|
1180
|
+
partition_source.scanner->Reset(true);
|
1960
1181
|
}
|
1961
1182
|
|
1962
1183
|
void WindowLocalSourceState::Scan(DataChunk &result) {
|
1963
|
-
D_ASSERT(scanner);
|
1964
|
-
if (!scanner->Remaining()) {
|
1184
|
+
D_ASSERT(partition_source.scanner);
|
1185
|
+
if (!partition_source.scanner->Remaining()) {
|
1965
1186
|
return;
|
1966
1187
|
}
|
1967
1188
|
|
1968
|
-
const auto position = scanner->Scanned();
|
1189
|
+
const auto position = partition_source.scanner->Scanned();
|
1190
|
+
auto &input_chunk = partition_source.input_chunk;
|
1969
1191
|
input_chunk.Reset();
|
1970
|
-
scanner->Scan(input_chunk);
|
1192
|
+
partition_source.scanner->Scan(input_chunk);
|
1971
1193
|
|
1194
|
+
auto &partition_mask = partition_source.partition_mask;
|
1195
|
+
auto &order_mask = partition_source.order_mask;
|
1972
1196
|
output_chunk.Reset();
|
1973
1197
|
for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
|
1974
1198
|
auto &executor = *window_execs[expr_idx];
|
@@ -1995,38 +1219,42 @@ unique_ptr<LocalSourceState> PhysicalWindow::GetLocalSourceState(ExecutionContex
|
|
1995
1219
|
}
|
1996
1220
|
|
1997
1221
|
unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext &context) const {
|
1998
|
-
|
1222
|
+
auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
|
1223
|
+
return make_uniq<WindowGlobalSourceState>(gsink);
|
1999
1224
|
}
|
2000
1225
|
|
2001
1226
|
void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
|
2002
1227
|
LocalSourceState &lstate_p) const {
|
2003
|
-
auto &
|
2004
|
-
auto &
|
2005
|
-
auto &
|
1228
|
+
auto &lsource = lstate_p.Cast<WindowLocalSourceState>();
|
1229
|
+
auto &lpsource = lsource.partition_source;
|
1230
|
+
auto &gsource = gstate_p.Cast<WindowGlobalSourceState>();
|
1231
|
+
auto &gpsource = gsource.partition_source;
|
1232
|
+
auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
|
2006
1233
|
|
2007
|
-
|
1234
|
+
auto &hash_groups = gsink.global_partition->hash_groups;
|
1235
|
+
const auto bin_count = hash_groups.empty() ? 1 : hash_groups.size();
|
2008
1236
|
|
2009
1237
|
while (chunk.size() == 0) {
|
2010
1238
|
// Move to the next bin if we are done.
|
2011
|
-
while (!
|
2012
|
-
|
2013
|
-
|
2014
|
-
|
2015
|
-
|
2016
|
-
auto hash_bin =
|
1239
|
+
while (!lpsource.scanner || !lpsource.scanner->Remaining()) {
|
1240
|
+
lpsource.scanner.reset();
|
1241
|
+
lpsource.rows.reset();
|
1242
|
+
lpsource.heap.reset();
|
1243
|
+
lpsource.hash_group.reset();
|
1244
|
+
auto hash_bin = gpsource.next_bin++;
|
2017
1245
|
if (hash_bin >= bin_count) {
|
2018
1246
|
return;
|
2019
1247
|
}
|
2020
1248
|
|
2021
|
-
for (; hash_bin <
|
2022
|
-
if (
|
1249
|
+
for (; hash_bin < hash_groups.size(); hash_bin = gpsource.next_bin++) {
|
1250
|
+
if (hash_groups[hash_bin]) {
|
2023
1251
|
break;
|
2024
1252
|
}
|
2025
1253
|
}
|
2026
|
-
|
1254
|
+
lsource.GeneratePartition(gsink, hash_bin);
|
2027
1255
|
}
|
2028
1256
|
|
2029
|
-
|
1257
|
+
lsource.Scan(chunk);
|
2030
1258
|
}
|
2031
1259
|
}
|
2032
1260
|
|