duckdb 0.8.2-dev1724.0 → 0.8.2-dev1764.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +30 -43
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -9
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -1
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -3
package/package.json
CHANGED
@@ -87,16 +87,22 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
|
|
87
87
|
const vector<unique_ptr<BaseStatistics>> &partition_stats,
|
88
88
|
idx_t estimated_cardinality)
|
89
89
|
: context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
|
90
|
-
fixed_bits(0), payload_types(payload_types), memory_per_thread(0), count(0) {
|
90
|
+
fixed_bits(0), payload_types(payload_types), memory_per_thread(0), max_bits(1), count(0) {
|
91
91
|
|
92
92
|
GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
|
93
93
|
|
94
94
|
memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
|
95
95
|
external = ClientConfig::GetConfig(context).force_external;
|
96
96
|
|
97
|
+
const auto thread_pages = PreviousPowerOfTwo(memory_per_thread / (4 * idx_t(Storage::BLOCK_ALLOC_SIZE)));
|
98
|
+
while (max_bits < 10 && (thread_pages >> max_bits) > 1) {
|
99
|
+
++max_bits;
|
100
|
+
}
|
101
|
+
|
97
102
|
if (!orders.empty()) {
|
98
|
-
|
99
|
-
|
103
|
+
auto types = payload_types;
|
104
|
+
types.push_back(LogicalType::HASH);
|
105
|
+
grouping_types.Initialize(types);
|
100
106
|
|
101
107
|
ResizeGroupingData(estimated_cardinality);
|
102
108
|
}
|
@@ -108,10 +114,15 @@ void PartitionGlobalSinkState::SyncPartitioning(const PartitionGlobalSinkState &
|
|
108
114
|
const auto old_bits = grouping_data ? grouping_data->GetRadixBits() : 0;
|
109
115
|
if (fixed_bits != old_bits) {
|
110
116
|
const auto hash_col_idx = payload_types.size();
|
111
|
-
grouping_data = make_uniq<
|
117
|
+
grouping_data = make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, fixed_bits, hash_col_idx);
|
112
118
|
}
|
113
119
|
}
|
114
120
|
|
121
|
+
unique_ptr<RadixPartitionedTupleData> PartitionGlobalSinkState::CreatePartition(idx_t new_bits) const {
|
122
|
+
const auto hash_col_idx = payload_types.size();
|
123
|
+
return make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, new_bits, hash_col_idx);
|
124
|
+
}
|
125
|
+
|
115
126
|
void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
|
116
127
|
// Have we started to combine? Then just live with it.
|
117
128
|
if (fixed_bits || (grouping_data && !grouping_data->GetPartitions().empty())) {
|
@@ -121,47 +132,31 @@ void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
|
|
121
132
|
const idx_t partition_size = STANDARD_ROW_GROUPS_SIZE;
|
122
133
|
const auto bits = grouping_data ? grouping_data->GetRadixBits() : 0;
|
123
134
|
auto new_bits = bits ? bits : 4;
|
124
|
-
while (new_bits <
|
135
|
+
while (new_bits < max_bits && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
|
125
136
|
++new_bits;
|
126
137
|
}
|
127
138
|
|
128
139
|
// Repartition the grouping data
|
129
140
|
if (new_bits != bits) {
|
130
|
-
|
131
|
-
grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, new_bits, hash_col_idx);
|
141
|
+
grouping_data = CreatePartition(new_bits);
|
132
142
|
}
|
133
143
|
}
|
134
144
|
|
135
145
|
void PartitionGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
|
136
146
|
// We are done if the local_partition is right sized.
|
137
|
-
auto &local_radix = local_partition->Cast<
|
138
|
-
|
147
|
+
auto &local_radix = local_partition->Cast<RadixPartitionedTupleData>();
|
148
|
+
const auto new_bits = grouping_data->GetRadixBits();
|
149
|
+
if (local_radix.GetRadixBits() == new_bits) {
|
139
150
|
return;
|
140
151
|
}
|
141
152
|
|
142
153
|
// If the local partition is now too small, flush it and reallocate
|
143
|
-
auto new_partition =
|
144
|
-
auto new_append = make_uniq<PartitionedColumnDataAppendState>();
|
145
|
-
new_partition->InitializeAppendState(*new_append);
|
146
|
-
|
154
|
+
auto new_partition = CreatePartition(new_bits);
|
147
155
|
local_partition->FlushAppendState(*local_append);
|
148
|
-
|
149
|
-
for (auto &local_group : local_groups) {
|
150
|
-
ColumnDataScanState scanner;
|
151
|
-
local_group->InitializeScan(scanner);
|
152
|
-
|
153
|
-
DataChunk scan_chunk;
|
154
|
-
local_group->InitializeScanChunk(scan_chunk);
|
155
|
-
for (scan_chunk.Reset(); local_group->Scan(scanner, scan_chunk); scan_chunk.Reset()) {
|
156
|
-
new_partition->Append(*new_append, scan_chunk);
|
157
|
-
}
|
158
|
-
}
|
159
|
-
|
160
|
-
// The append state has stale pointers to the old local partition, so nuke it from orbit.
|
161
|
-
new_partition->FlushAppendState(*new_append);
|
156
|
+
local_partition->Repartition(*new_partition);
|
162
157
|
|
163
158
|
local_partition = std::move(new_partition);
|
164
|
-
local_append = make_uniq<
|
159
|
+
local_append = make_uniq<PartitionedTupleDataAppendState>();
|
165
160
|
local_partition->InitializeAppendState(*local_append);
|
166
161
|
}
|
167
162
|
|
@@ -170,8 +165,8 @@ void PartitionGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_par
|
|
170
165
|
lock_guard<mutex> guard(lock);
|
171
166
|
|
172
167
|
if (!local_partition) {
|
173
|
-
local_partition = grouping_data->
|
174
|
-
local_append = make_uniq<
|
168
|
+
local_partition = CreatePartition(grouping_data->GetRadixBits());
|
169
|
+
local_append = make_uniq<PartitionedTupleDataAppendState>();
|
175
170
|
local_partition->InitializeAppendState(*local_append);
|
176
171
|
return;
|
177
172
|
}
|
@@ -196,7 +191,7 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
|
|
196
191
|
grouping_data->Combine(*local_partition);
|
197
192
|
}
|
198
193
|
|
199
|
-
void PartitionGlobalSinkState::BuildSortState(
|
194
|
+
void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const {
|
200
195
|
// Set up the sort expression computation.
|
201
196
|
vector<LogicalType> sort_types;
|
202
197
|
ExpressionExecutor executor(context);
|
@@ -221,16 +216,9 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
|
|
221
216
|
for (column_t i = 0; i < payload_types.size(); ++i) {
|
222
217
|
column_ids.emplace_back(i);
|
223
218
|
}
|
224
|
-
|
225
|
-
|
226
|
-
chunk_state
|
227
|
-
scanner.InitializeScan();
|
228
|
-
for (auto chunk_idx = scanner.ChunkCount(); chunk_idx-- > 0;) {
|
229
|
-
if (!scanner.AssignChunk(chunk_state)) {
|
230
|
-
break;
|
231
|
-
}
|
232
|
-
scanner.ScanChunk(chunk_state, payload_chunk);
|
233
|
-
|
219
|
+
TupleDataScanState chunk_state;
|
220
|
+
group_data.InitializeScan(chunk_state, column_ids);
|
221
|
+
while (group_data.Scan(chunk_state, payload_chunk)) {
|
234
222
|
sort_chunk.Reset();
|
235
223
|
executor.Execute(payload_chunk, sort_chunk);
|
236
224
|
|
@@ -238,13 +226,12 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
|
|
238
226
|
if (local_sort.SizeInBytes() > memory_per_thread) {
|
239
227
|
local_sort.Sort(global_sort, true);
|
240
228
|
}
|
241
|
-
scanner.FinishChunk(chunk_state);
|
242
229
|
}
|
243
230
|
|
244
231
|
global_sort.AddLocalState(local_sort);
|
245
232
|
}
|
246
233
|
|
247
|
-
void PartitionGlobalSinkState::BuildSortState(
|
234
|
+
void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
|
248
235
|
BuildSortState(group_data, *hash_group.global_sort);
|
249
236
|
|
250
237
|
hash_group.count += group_data.Count();
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev1764"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "07b0b0a2a4"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -42,8 +42,8 @@ public:
|
|
42
42
|
using Orders = vector<BoundOrderByNode>;
|
43
43
|
using Types = vector<LogicalType>;
|
44
44
|
|
45
|
-
using GroupingPartition = unique_ptr<
|
46
|
-
using GroupingAppend = unique_ptr<
|
45
|
+
using GroupingPartition = unique_ptr<PartitionedTupleData>;
|
46
|
+
using GroupingAppend = unique_ptr<PartitionedTupleDataAppendState>;
|
47
47
|
|
48
48
|
static void GenerateOrderings(Orders &partitions, Orders &orders,
|
49
49
|
const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
|
@@ -53,13 +53,14 @@ public:
|
|
53
53
|
const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
|
54
54
|
const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
|
55
55
|
|
56
|
+
unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
|
56
57
|
void SyncPartitioning(const PartitionGlobalSinkState &other);
|
57
58
|
|
58
59
|
void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
59
60
|
void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
60
61
|
|
61
|
-
void BuildSortState(
|
62
|
-
void BuildSortState(
|
62
|
+
void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
|
63
|
+
void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
|
63
64
|
|
64
65
|
ClientContext &context;
|
65
66
|
BufferManager &buffer_manager;
|
@@ -67,9 +68,9 @@ public:
|
|
67
68
|
mutex lock;
|
68
69
|
|
69
70
|
// OVER(PARTITION BY...) (hash grouping)
|
70
|
-
unique_ptr<
|
71
|
+
unique_ptr<RadixPartitionedTupleData> grouping_data;
|
71
72
|
//! Payload plus hash column
|
72
|
-
|
73
|
+
TupleDataLayout grouping_types;
|
73
74
|
//! The number of radix bits if this partition is being synced with another
|
74
75
|
idx_t fixed_bits;
|
75
76
|
|
@@ -88,6 +89,7 @@ public:
|
|
88
89
|
|
89
90
|
// Threading
|
90
91
|
idx_t memory_per_thread;
|
92
|
+
idx_t max_bits;
|
91
93
|
atomic<idx_t> count;
|
92
94
|
|
93
95
|
private:
|
@@ -107,8 +109,8 @@ public:
|
|
107
109
|
ExpressionExecutor executor;
|
108
110
|
DataChunk group_chunk;
|
109
111
|
DataChunk payload_chunk;
|
110
|
-
unique_ptr<
|
111
|
-
unique_ptr<
|
112
|
+
unique_ptr<PartitionedTupleData> local_partition;
|
113
|
+
unique_ptr<PartitionedTupleDataAppendState> local_append;
|
112
114
|
|
113
115
|
// OVER(...) (sorting)
|
114
116
|
size_t sort_cols;
|
@@ -132,7 +134,7 @@ class PartitionLocalMergeState;
|
|
132
134
|
|
133
135
|
class PartitionGlobalMergeState {
|
134
136
|
public:
|
135
|
-
using GroupDataPtr = unique_ptr<
|
137
|
+
using GroupDataPtr = unique_ptr<TupleDataCollection>;
|
136
138
|
|
137
139
|
PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
|
138
140
|
|
@@ -123,7 +123,11 @@ protected:
|
|
123
123
|
void BuildBufferSpace(PartitionedTupleDataAppendState &state);
|
124
124
|
//! Create a collection for a specific a partition
|
125
125
|
unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
|
126
|
-
|
126
|
+
if (allocators) {
|
127
|
+
return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
|
128
|
+
} else {
|
129
|
+
return make_uniq<TupleDataCollection>(buffer_manager, layout);
|
130
|
+
}
|
127
131
|
}
|
128
132
|
|
129
133
|
protected:
|
@@ -81,9 +81,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
81
81
|
|
82
82
|
switch (plan_p->type) {
|
83
83
|
case LogicalOperatorType::LOGICAL_TRANSACTION:
|
84
|
-
|
85
|
-
case LogicalOperatorType::LOGICAL_PRAGMA:
|
86
|
-
return plan_p;
|
84
|
+
return plan_p; // skip optimizing simple & often-occurring plans unaffected by rewrites
|
87
85
|
default:
|
88
86
|
break;
|
89
87
|
}
|