duckdb 0.7.2-dev1671.0 → 0.7.2-dev1734.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-datefunc.cpp +20 -8
  3. package/src/duckdb/extension/icu/icu-strptime.cpp +117 -29
  4. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -0
  5. package/src/duckdb/src/common/local_file_system.cpp +13 -2
  6. package/src/duckdb/src/common/sort/partition_state.cpp +644 -0
  7. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  8. package/src/duckdb/src/execution/expression_executor_state.cpp +2 -3
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +77 -849
  10. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +2 -2
  11. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  12. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +247 -0
  13. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -3
  14. package/src/duckdb/src/include/duckdb/planner/pragma_handler.hpp +3 -2
  15. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +1 -2
  16. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +77 -0
  17. package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +12 -0
  18. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -59
  19. package/src/duckdb/src/main/extension/extension_install.cpp +11 -0
  20. package/src/duckdb/src/main/extension/extension_load.cpp +29 -3
  21. package/src/duckdb/src/main/query_profiler.cpp +1 -1
  22. package/src/duckdb/src/planner/pragma_handler.cpp +7 -5
  23. package/src/duckdb/src/storage/buffer/block_handle.cpp +128 -0
  24. package/src/duckdb/src/storage/buffer/block_manager.cpp +81 -0
  25. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +132 -0
  26. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +32 -0
  27. package/src/duckdb/src/storage/buffer_manager.cpp +0 -351
  28. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +3 -5
  29. package/src/duckdb/ub_src_common_sort.cpp +2 -0
  30. package/src/duckdb/ub_src_storage_buffer.cpp +8 -0
@@ -0,0 +1,644 @@
1
+ #include "duckdb/common/sort/partition_state.hpp"
2
+
3
+ #include "duckdb/common/types/column_data_consumer.hpp"
4
+ #include "duckdb/common/row_operations/row_operations.hpp"
5
+ #include "duckdb/main/config.hpp"
6
+ #include "duckdb/parallel/event.hpp"
7
+
8
+ #include <numeric>
9
+
10
+ namespace duckdb {
11
+
12
+ PartitionGlobalHashGroup::PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions,
13
+ const Orders &orders, const Types &payload_types, bool external)
14
+ : count(0) {
15
+
16
+ RowLayout payload_layout;
17
+ payload_layout.Initialize(payload_types);
18
+ global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
19
+ global_sort->external = external;
20
+
21
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
22
+ }
23
+
24
+ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask) {
25
+ D_ASSERT(count > 0);
26
+
27
+ // Set up a comparator for the partition subset
28
+ const auto partition_size = partition_layout.comparison_size;
29
+
30
+ SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
31
+ SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
32
+
33
+ partition_mask.SetValidUnsafe(0);
34
+ order_mask.SetValidUnsafe(0);
35
+ for (++curr; curr.GetIndex() < count; ++curr) {
36
+ // Compare the partition subset first because if that differs, then so does the full ordering
37
+ int part_cmp = 0;
38
+ if (partition_layout.all_constant) {
39
+ part_cmp = FastMemcmp(prev.entry_ptr, curr.entry_ptr, partition_size);
40
+ } else {
41
+ part_cmp = Comparators::CompareTuple(prev.scan, curr.scan, prev.entry_ptr, curr.entry_ptr, partition_layout,
42
+ prev.external);
43
+ }
44
+
45
+ if (part_cmp) {
46
+ partition_mask.SetValidUnsafe(curr.GetIndex());
47
+ order_mask.SetValidUnsafe(curr.GetIndex());
48
+ } else if (prev.Compare(curr)) {
49
+ order_mask.SetValidUnsafe(curr.GetIndex());
50
+ }
51
+ ++prev;
52
+ }
53
+ }
54
+
55
+ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
56
+ const vector<unique_ptr<Expression>> &partitions_p,
57
+ const vector<BoundOrderByNode> &orders_p, const Types &payload_types,
58
+ const vector<unique_ptr<BaseStatistics>> &partitions_stats,
59
+ idx_t estimated_cardinality)
60
+ : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
61
+ payload_types(payload_types), memory_per_thread(0), count(0) {
62
+
63
+ // we sort by both 1) partition by expression list and 2) order by expressions
64
+ const auto partition_cols = partitions_p.size();
65
+ for (idx_t prt_idx = 0; prt_idx < partition_cols; prt_idx++) {
66
+ auto &pexpr = partitions_p[prt_idx];
67
+
68
+ if (partitions_stats.empty() || !partitions_stats[prt_idx]) {
69
+ orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(), nullptr);
70
+ } else {
71
+ orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(),
72
+ partitions_stats[prt_idx]->ToUnique());
73
+ }
74
+ partitions.emplace_back(orders.back().Copy());
75
+ }
76
+
77
+ for (const auto &order : orders_p) {
78
+ orders.emplace_back(order.Copy());
79
+ }
80
+
81
+ memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
82
+ external = ClientConfig::GetConfig(context).force_external;
83
+
84
+ if (!orders.empty()) {
85
+ grouping_types = payload_types;
86
+ grouping_types.push_back(LogicalType::HASH);
87
+
88
+ ResizeGroupingData(estimated_cardinality);
89
+ }
90
+ }
91
+
92
+ void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
93
+ // Have we started to combine? Then just live with it.
94
+ if (grouping_data && !grouping_data->GetPartitions().empty()) {
95
+ return;
96
+ }
97
+ // Is the average partition size too large?
98
+ const idx_t partition_size = STANDARD_ROW_GROUPS_SIZE;
99
+ const auto bits = grouping_data ? grouping_data->GetRadixBits() : 0;
100
+ auto new_bits = bits ? bits : 4;
101
+ while (new_bits < 10 && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
102
+ ++new_bits;
103
+ }
104
+
105
+ // Repartition the grouping data
106
+ if (new_bits != bits) {
107
+ const auto hash_col_idx = payload_types.size();
108
+ grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, new_bits, hash_col_idx);
109
+ }
110
+ }
111
+
112
+ void PartitionGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
113
+ // We are done if the local_partition is right sized.
114
+ auto local_radix = (RadixPartitionedColumnData *)local_partition.get();
115
+ if (local_radix->GetRadixBits() == grouping_data->GetRadixBits()) {
116
+ return;
117
+ }
118
+
119
+ // If the local partition is now too small, flush it and reallocate
120
+ auto new_partition = grouping_data->CreateShared();
121
+ auto new_append = make_uniq<PartitionedColumnDataAppendState>();
122
+ new_partition->InitializeAppendState(*new_append);
123
+
124
+ local_partition->FlushAppendState(*local_append);
125
+ auto &local_groups = local_partition->GetPartitions();
126
+ for (auto &local_group : local_groups) {
127
+ ColumnDataScanState scanner;
128
+ local_group->InitializeScan(scanner);
129
+
130
+ DataChunk scan_chunk;
131
+ local_group->InitializeScanChunk(scan_chunk);
132
+ for (scan_chunk.Reset(); local_group->Scan(scanner, scan_chunk); scan_chunk.Reset()) {
133
+ new_partition->Append(*new_append, scan_chunk);
134
+ }
135
+ }
136
+
137
+ // The append state has stale pointers to the old local partition, so nuke it from orbit.
138
+ new_partition->FlushAppendState(*new_append);
139
+
140
+ local_partition = std::move(new_partition);
141
+ local_append = make_uniq<PartitionedColumnDataAppendState>();
142
+ local_partition->InitializeAppendState(*local_append);
143
+ }
144
+
145
+ void PartitionGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
146
+ // Make sure grouping_data doesn't change under us.
147
+ lock_guard<mutex> guard(lock);
148
+
149
+ if (!local_partition) {
150
+ local_partition = grouping_data->CreateShared();
151
+ local_append = make_uniq<PartitionedColumnDataAppendState>();
152
+ local_partition->InitializeAppendState(*local_append);
153
+ return;
154
+ }
155
+
156
+ // Grow the groups if they are too big
157
+ ResizeGroupingData(count);
158
+
159
+ // Sync local partition to have the same bit count
160
+ SyncLocalPartition(local_partition, local_append);
161
+ }
162
+
163
+ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
164
+ if (!local_partition) {
165
+ return;
166
+ }
167
+ local_partition->FlushAppendState(*local_append);
168
+
169
+ // Make sure grouping_data doesn't change under us.
170
+ // Combine has an internal mutex, so this is single-threaded anyway.
171
+ lock_guard<mutex> guard(lock);
172
+ SyncLocalPartition(local_partition, local_append);
173
+ grouping_data->Combine(*local_partition);
174
+ }
175
+
176
+ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
177
+ auto &global_sort = *hash_group.global_sort;
178
+
179
+ // Set up the sort expression computation.
180
+ vector<LogicalType> sort_types;
181
+ ExpressionExecutor executor(context);
182
+ for (auto &order : orders) {
183
+ auto &oexpr = order.expression;
184
+ sort_types.emplace_back(oexpr->return_type);
185
+ executor.AddExpression(*oexpr);
186
+ }
187
+ DataChunk sort_chunk;
188
+ sort_chunk.Initialize(allocator, sort_types);
189
+
190
+ // Copy the data from the group into the sort code.
191
+ LocalSortState local_sort;
192
+ local_sort.Initialize(global_sort, global_sort.buffer_manager);
193
+
194
+ // Strip hash column
195
+ DataChunk payload_chunk;
196
+ payload_chunk.Initialize(allocator, payload_types);
197
+
198
+ vector<column_t> column_ids;
199
+ column_ids.reserve(payload_types.size());
200
+ for (column_t i = 0; i < payload_types.size(); ++i) {
201
+ column_ids.emplace_back(i);
202
+ }
203
+ ColumnDataConsumer scanner(group_data, column_ids);
204
+ ColumnDataConsumerScanState chunk_state;
205
+ chunk_state.current_chunk_state.properties = ColumnDataScanProperties::ALLOW_ZERO_COPY;
206
+ scanner.InitializeScan();
207
+ for (auto chunk_idx = scanner.ChunkCount(); chunk_idx-- > 0;) {
208
+ if (!scanner.AssignChunk(chunk_state)) {
209
+ break;
210
+ }
211
+ scanner.ScanChunk(chunk_state, payload_chunk);
212
+
213
+ sort_chunk.Reset();
214
+ executor.Execute(payload_chunk, sort_chunk);
215
+
216
+ local_sort.SinkChunk(sort_chunk, payload_chunk);
217
+ if (local_sort.SizeInBytes() > memory_per_thread) {
218
+ local_sort.Sort(global_sort, true);
219
+ }
220
+ scanner.FinishChunk(chunk_state);
221
+ }
222
+
223
+ global_sort.AddLocalState(local_sort);
224
+
225
+ hash_group.count += group_data.Count();
226
+ }
227
+
228
+ // Per-thread sink state
229
+ PartitionLocalSinkState::PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p)
230
+ : gstate(gstate_p), allocator(Allocator::Get(context)), executor(context) {
231
+
232
+ vector<LogicalType> group_types;
233
+ for (idx_t prt_idx = 0; prt_idx < gstate.partitions.size(); prt_idx++) {
234
+ auto &pexpr = *gstate.partitions[prt_idx].expression.get();
235
+ group_types.push_back(pexpr.return_type);
236
+ executor.AddExpression(pexpr);
237
+ }
238
+ sort_cols = gstate.orders.size() + group_types.size();
239
+
240
+ if (sort_cols) {
241
+ if (!group_types.empty()) {
242
+ // OVER(PARTITION BY...)
243
+ group_chunk.Initialize(allocator, group_types);
244
+ }
245
+ // OVER(...)
246
+ auto payload_types = gstate.payload_types;
247
+ payload_types.emplace_back(LogicalType::HASH);
248
+ payload_chunk.Initialize(allocator, payload_types);
249
+ } else {
250
+ // OVER()
251
+ payload_layout.Initialize(gstate.payload_types);
252
+ }
253
+ }
254
+
255
+ void PartitionLocalSinkState::Hash(DataChunk &input_chunk, Vector &hash_vector) {
256
+ const auto count = input_chunk.size();
257
+ if (group_chunk.ColumnCount() > 0) {
258
+ // OVER(PARTITION BY...) (hash grouping)
259
+ group_chunk.Reset();
260
+ executor.Execute(input_chunk, group_chunk);
261
+ VectorOperations::Hash(group_chunk.data[0], hash_vector, count);
262
+ for (idx_t prt_idx = 1; prt_idx < group_chunk.ColumnCount(); ++prt_idx) {
263
+ VectorOperations::CombineHash(hash_vector, group_chunk.data[prt_idx], count);
264
+ }
265
+ } else {
266
+ // OVER(...) (sorting)
267
+ // Single partition => single hash value
268
+ hash_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
269
+ auto hashes = ConstantVector::GetData<hash_t>(hash_vector);
270
+ hashes[0] = 0;
271
+ }
272
+ }
273
+
274
+ void PartitionLocalSinkState::Sink(DataChunk &input_chunk) {
275
+ gstate.count += input_chunk.size();
276
+
277
+ // OVER()
278
+ if (sort_cols == 0) {
279
+ // No sorts, so build paged row chunks
280
+ if (!rows) {
281
+ const auto entry_size = payload_layout.GetRowWidth();
282
+ const auto capacity = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, (Storage::BLOCK_SIZE / entry_size) + 1);
283
+ rows = make_uniq<RowDataCollection>(gstate.buffer_manager, capacity, entry_size);
284
+ strings = make_uniq<RowDataCollection>(gstate.buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
285
+ }
286
+ const auto row_count = input_chunk.size();
287
+ const auto row_sel = FlatVector::IncrementalSelectionVector();
288
+ Vector addresses(LogicalType::POINTER);
289
+ auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
290
+ const auto prev_rows_blocks = rows->blocks.size();
291
+ auto handles = rows->Build(row_count, key_locations, nullptr, row_sel);
292
+ auto input_data = input_chunk.ToUnifiedFormat();
293
+ RowOperations::Scatter(input_chunk, input_data.get(), payload_layout, addresses, *strings, *row_sel, row_count);
294
+ // Mark that row blocks contain pointers (heap blocks are pinned)
295
+ if (!payload_layout.AllConstant()) {
296
+ D_ASSERT(strings->keep_pinned);
297
+ for (size_t i = prev_rows_blocks; i < rows->blocks.size(); ++i) {
298
+ rows->blocks[i]->block->SetSwizzling("PartitionLocalSinkState::Sink");
299
+ }
300
+ }
301
+ return;
302
+ }
303
+
304
+ // OVER(...)
305
+ payload_chunk.Reset();
306
+ auto &hash_vector = payload_chunk.data.back();
307
+ Hash(input_chunk, hash_vector);
308
+ for (idx_t col_idx = 0; col_idx < input_chunk.ColumnCount(); ++col_idx) {
309
+ payload_chunk.data[col_idx].Reference(input_chunk.data[col_idx]);
310
+ }
311
+ payload_chunk.SetCardinality(input_chunk);
312
+
313
+ gstate.UpdateLocalPartition(local_partition, local_append);
314
+ local_partition->Append(*local_append, payload_chunk);
315
+ }
316
+
317
+ void PartitionLocalSinkState::Combine() {
318
+ // OVER()
319
+ if (sort_cols == 0) {
320
+ // Only one partition again, so need a global lock.
321
+ lock_guard<mutex> glock(gstate.lock);
322
+ if (gstate.rows) {
323
+ if (rows) {
324
+ gstate.rows->Merge(*rows);
325
+ gstate.strings->Merge(*strings);
326
+ rows.reset();
327
+ strings.reset();
328
+ }
329
+ } else {
330
+ gstate.rows = std::move(rows);
331
+ gstate.strings = std::move(strings);
332
+ }
333
+ return;
334
+ }
335
+
336
+ // OVER(...)
337
+ gstate.CombineLocalPartition(local_partition, local_append);
338
+ }
339
+
340
+ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data)
341
+ : sink(sink), group_data(std::move(group_data)), stage(PartitionSortStage::INIT), total_tasks(0), tasks_assigned(0),
342
+ tasks_completed(0) {
343
+
344
+ const auto group_idx = sink.hash_groups.size();
345
+ auto new_group = make_uniq<PartitionGlobalHashGroup>(sink.buffer_manager, sink.partitions, sink.orders,
346
+ sink.payload_types, sink.external);
347
+ sink.hash_groups.emplace_back(std::move(new_group));
348
+
349
+ hash_group = sink.hash_groups[group_idx].get();
350
+ global_sort = sink.hash_groups[group_idx]->global_sort.get();
351
+ }
352
+
353
+ void PartitionLocalMergeState::Prepare() {
354
+ auto &global_sort = *merge_state->global_sort;
355
+ merge_state->sink.BuildSortState(*merge_state->group_data, *merge_state->hash_group);
356
+ merge_state->group_data.reset();
357
+
358
+ global_sort.PrepareMergePhase();
359
+ }
360
+
361
+ void PartitionLocalMergeState::Merge() {
362
+ auto &global_sort = *merge_state->global_sort;
363
+ MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
364
+ merge_sorter.PerformInMergeRound();
365
+ }
366
+
367
+ void PartitionLocalMergeState::ExecuteTask() {
368
+ switch (stage) {
369
+ case PartitionSortStage::PREPARE:
370
+ Prepare();
371
+ break;
372
+ case PartitionSortStage::MERGE:
373
+ Merge();
374
+ break;
375
+ default:
376
+ throw InternalException("Unexpected PartitionGlobalMergeState in ExecuteTask!");
377
+ }
378
+
379
+ merge_state->CompleteTask();
380
+ finished = true;
381
+ }
382
+
383
+ bool PartitionGlobalMergeState::AssignTask(PartitionLocalMergeState &local_state) {
384
+ lock_guard<mutex> guard(lock);
385
+
386
+ if (tasks_assigned >= total_tasks) {
387
+ return false;
388
+ }
389
+
390
+ local_state.merge_state = this;
391
+ local_state.stage = stage;
392
+ local_state.finished = false;
393
+ tasks_assigned++;
394
+
395
+ return true;
396
+ }
397
+
398
+ void PartitionGlobalMergeState::CompleteTask() {
399
+ lock_guard<mutex> guard(lock);
400
+
401
+ ++tasks_completed;
402
+ }
403
+
404
+ bool PartitionGlobalMergeState::TryPrepareNextStage() {
405
+ lock_guard<mutex> guard(lock);
406
+
407
+ if (tasks_completed < total_tasks) {
408
+ return false;
409
+ }
410
+
411
+ tasks_assigned = tasks_completed = 0;
412
+
413
+ switch (stage) {
414
+ case PartitionSortStage::INIT:
415
+ total_tasks = 1;
416
+ stage = PartitionSortStage::PREPARE;
417
+ return true;
418
+
419
+ case PartitionSortStage::PREPARE:
420
+ total_tasks = global_sort->sorted_blocks.size() / 2;
421
+ if (!total_tasks) {
422
+ break;
423
+ }
424
+ stage = PartitionSortStage::MERGE;
425
+ global_sort->InitializeMergeRound();
426
+ return true;
427
+
428
+ case PartitionSortStage::MERGE:
429
+ global_sort->CompleteMergeRound(true);
430
+ total_tasks = global_sort->sorted_blocks.size() / 2;
431
+ if (!total_tasks) {
432
+ break;
433
+ }
434
+ global_sort->InitializeMergeRound();
435
+ return true;
436
+
437
+ case PartitionSortStage::SORTED:
438
+ break;
439
+ }
440
+
441
+ stage = PartitionSortStage::SORTED;
442
+
443
+ return false;
444
+ }
445
+
446
+ PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState &sink) {
447
+ // Schedule all the sorts for maximum thread utilisation
448
+ for (auto &group_data : sink.grouping_data->GetPartitions()) {
449
+ // Prepare for merge sort phase
450
+ if (group_data->Count()) {
451
+ auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data));
452
+ states.emplace_back(std::move(state));
453
+ }
454
+ }
455
+ }
456
+
457
+ class PartitionMergeTask : public ExecutorTask {
458
+ public:
459
+ PartitionMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, PartitionGlobalMergeStates &hash_groups_p)
460
+ : ExecutorTask(context_p), event(std::move(event_p)), hash_groups(hash_groups_p) {
461
+ }
462
+
463
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
464
+
465
+ private:
466
+ shared_ptr<Event> event;
467
+ PartitionLocalMergeState local_state;
468
+ PartitionGlobalMergeStates &hash_groups;
469
+ };
470
+
471
+ TaskExecutionResult PartitionMergeTask::ExecuteTask(TaskExecutionMode mode) {
472
+ // Loop until all hash groups are done
473
+ size_t sorted = 0;
474
+ while (sorted < hash_groups.states.size()) {
475
+ // First check if there is an unfinished task for this thread
476
+ if (executor.HasError()) {
477
+ return TaskExecutionResult::TASK_ERROR;
478
+ }
479
+ if (!local_state.TaskFinished()) {
480
+ local_state.ExecuteTask();
481
+ continue;
482
+ }
483
+
484
+ // Thread is done with its assigned task, try to fetch new work
485
+ for (auto group = sorted; group < hash_groups.states.size(); ++group) {
486
+ auto &global_state = hash_groups.states[group];
487
+ if (global_state->IsSorted()) {
488
+ // This hash group is done
489
+ // Update the high water mark of densely completed groups
490
+ if (sorted == group) {
491
+ ++sorted;
492
+ }
493
+ continue;
494
+ }
495
+
496
+ // Try to assign work for this hash group to this thread
497
+ if (global_state->AssignTask(local_state)) {
498
+ // We assigned a task to this thread!
499
+ // Break out of this loop to re-enter the top-level loop and execute the task
500
+ break;
501
+ }
502
+
503
+ // Hash group global state couldn't assign a task to this thread
504
+ // Try to prepare the next stage
505
+ if (!global_state->TryPrepareNextStage()) {
506
+ // This current hash group is not yet done
507
+ // But we were not able to assign a task for it to this thread
508
+ // See if the next hash group is better
509
+ continue;
510
+ }
511
+
512
+ // We were able to prepare the next stage for this hash group!
513
+ // Try to assign a task once more
514
+ if (global_state->AssignTask(local_state)) {
515
+ // We assigned a task to this thread!
516
+ // Break out of this loop to re-enter the top-level loop and execute the task
517
+ break;
518
+ }
519
+
520
+ // We were able to prepare the next merge round,
521
+ // but we were not able to assign a task for it to this thread
522
+ // The tasks were assigned to other threads while this thread waited for the lock
523
+ // Go to the next iteration to see if another hash group has a task
524
+ }
525
+ }
526
+
527
+ event->FinishTask();
528
+ return TaskExecutionResult::TASK_FINISHED;
529
+ }
530
+
531
+ void PartitionMergeEvent::Schedule() {
532
+ auto &context = pipeline->GetClientContext();
533
+
534
+ // Schedule tasks equal to the number of threads, which will each merge multiple partitions
535
+ auto &ts = TaskScheduler::GetScheduler(context);
536
+ idx_t num_threads = ts.NumberOfThreads();
537
+
538
+ vector<unique_ptr<Task>> merge_tasks;
539
+ for (idx_t tnum = 0; tnum < num_threads; tnum++) {
540
+ merge_tasks.emplace_back(make_uniq<PartitionMergeTask>(shared_from_this(), context, merge_states));
541
+ }
542
+ SetTasks(std::move(merge_tasks));
543
+ }
544
+
545
+ PartitionLocalSourceState::PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p) : gstate(gstate_p) {
546
+ const auto &input_types = gstate.payload_types;
547
+ layout.Initialize(input_types);
548
+ input_chunk.Initialize(gstate.allocator, input_types);
549
+ }
550
+
551
+ void PartitionLocalSourceState::MaterializeSortedData() {
552
+ auto &global_sort_state = *hash_group->global_sort;
553
+ if (global_sort_state.sorted_blocks.empty()) {
554
+ return;
555
+ }
556
+
557
+ // scan the sorted row data
558
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
559
+ auto &sb = *global_sort_state.sorted_blocks[0];
560
+
561
+ // Free up some memory before allocating more
562
+ sb.radix_sorting_data.clear();
563
+ sb.blob_sorting_data = nullptr;
564
+
565
+ // Move the sorting row blocks into our RDCs
566
+ auto &buffer_manager = global_sort_state.buffer_manager;
567
+ auto &sd = *sb.payload_data;
568
+
569
+ // Data blocks are required
570
+ D_ASSERT(!sd.data_blocks.empty());
571
+ auto &block = sd.data_blocks[0];
572
+ rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
573
+ rows->blocks = std::move(sd.data_blocks);
574
+ rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
575
+ [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
576
+
577
+ // Heap blocks are optional, but we want both for iteration.
578
+ if (!sd.heap_blocks.empty()) {
579
+ auto &block = sd.heap_blocks[0];
580
+ heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
581
+ heap->blocks = std::move(sd.heap_blocks);
582
+ hash_group.reset();
583
+ } else {
584
+ heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
585
+ }
586
+ heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
587
+ [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
588
+ }
589
+
590
+ idx_t PartitionLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
591
+ // Get rid of any stale data
592
+ hash_bin = hash_bin_p;
593
+
594
+ // There are three types of partitions:
595
+ // 1. No partition (no sorting)
596
+ // 2. One partition (sorting, but no hashing)
597
+ // 3. Multiple partitions (sorting and hashing)
598
+
599
+ // How big is the partition?
600
+ idx_t count = 0;
601
+ if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
602
+ count = gstate.hash_groups[hash_bin]->count;
603
+ } else if (gstate.rows && !hash_bin) {
604
+ count = gstate.count;
605
+ } else {
606
+ return count;
607
+ }
608
+
609
+ // Initialise masks to false
610
+ const auto bit_count = ValidityMask::ValidityMaskSize(count);
611
+ partition_bits.clear();
612
+ partition_bits.resize(bit_count, 0);
613
+ partition_mask.Initialize(partition_bits.data());
614
+
615
+ order_bits.clear();
616
+ order_bits.resize(bit_count, 0);
617
+ order_mask.Initialize(order_bits.data());
618
+
619
+ // Scan the sorted data into new Collections
620
+ auto external = gstate.external;
621
+ if (gstate.rows && !hash_bin) {
622
+ // Simple mask
623
+ partition_mask.SetValidUnsafe(0);
624
+ order_mask.SetValidUnsafe(0);
625
+ // No partition - align the heap blocks with the row blocks
626
+ rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
627
+ heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
628
+ RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
629
+ external = true;
630
+ } else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
631
+ // Overwrite the collections with the sorted data
632
+ hash_group = std::move(gstate.hash_groups[hash_bin]);
633
+ hash_group->ComputeMasks(partition_mask, order_mask);
634
+ MaterializeSortedData();
635
+ } else {
636
+ return count;
637
+ }
638
+
639
+ scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
640
+
641
+ return count;
642
+ }
643
+
644
+ } // namespace duckdb
@@ -56,7 +56,7 @@ Allocator &ExpressionExecutor::GetAllocator() {
56
56
 
57
57
  void ExpressionExecutor::AddExpression(const Expression &expr) {
58
58
  expressions.push_back(&expr);
59
- auto state = make_uniq<ExpressionExecutorState>(expr.ToString());
59
+ auto state = make_uniq<ExpressionExecutorState>();
60
60
  Initialize(expr, *state);
61
61
  state->Verify();
62
62
  states.push_back(std::move(state));
@@ -31,11 +31,10 @@ ClientContext &ExpressionState::GetContext() {
31
31
  return root.executor->GetContext();
32
32
  }
33
33
 
34
- ExpressionState::ExpressionState(const Expression &expr, ExpressionExecutorState &root)
35
- : expr(expr), root(root), name(expr.ToString()) {
34
+ ExpressionState::ExpressionState(const Expression &expr, ExpressionExecutorState &root) : expr(expr), root(root) {
36
35
  }
37
36
 
38
- ExpressionExecutorState::ExpressionExecutorState(const string &name) : profiler(), name(name) {
37
+ ExpressionExecutorState::ExpressionExecutorState() : profiler() {
39
38
  }
40
39
 
41
40
  void ExpressionState::Verify(ExpressionExecutorState &root_executor) {