duckdb 0.8.2-dev1724.0 → 0.8.2-dev1791.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +38 -22
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -4
- package/src/duckdb/src/common/sort/partition_state.cpp +30 -43
- package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +8 -0
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
- package/src/duckdb/src/common/types/string_heap.cpp +4 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -9
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -3
- package/src/duckdb/src/parser/parser.cpp +43 -38
- package/src/duckdb/src/storage/arena_allocator.cpp +12 -0
package/README.md
CHANGED
@@ -100,6 +100,13 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
|
|
100
100
|
});
|
101
101
|
```
|
102
102
|
|
103
|
+
## Supported Node versions
|
104
|
+
We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
|
105
|
+
Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
|
106
|
+
|
107
|
+
We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
|
108
|
+
As per July 2023, Node 15 has been removed from the supported versions.
|
109
|
+
|
103
110
|
## Development
|
104
111
|
|
105
112
|
### First install:
|
package/package.json
CHANGED
@@ -93,6 +93,7 @@ public:
|
|
93
93
|
shared_ptr<ParquetFileMetadataCache> metadata;
|
94
94
|
ParquetOptions parquet_options;
|
95
95
|
MultiFileReaderData reader_data;
|
96
|
+
unique_ptr<ColumnReader> root_reader;
|
96
97
|
|
97
98
|
public:
|
98
99
|
void InitializeScan(ParquetReaderScanState &state, vector<idx_t> groups_to_read);
|
@@ -116,6 +116,11 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
116
116
|
vector<string> column_names;
|
117
117
|
duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
|
118
118
|
idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
|
119
|
+
|
120
|
+
//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
|
121
|
+
static constexpr const idx_t BYTES_PER_ROW = 1024;
|
122
|
+
idx_t row_group_size_bytes;
|
123
|
+
|
119
124
|
ChildFieldIDs field_ids;
|
120
125
|
};
|
121
126
|
|
@@ -741,33 +746,39 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
|
|
741
746
|
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
742
747
|
vector<LogicalType> &sql_types) {
|
743
748
|
D_ASSERT(names.size() == sql_types.size());
|
749
|
+
bool row_group_size_bytes_set = false;
|
744
750
|
auto bind_data = make_uniq<ParquetWriteBindData>();
|
745
751
|
for (auto &option : info.options) {
|
746
|
-
auto loption = StringUtil::Lower(option.first);
|
752
|
+
const auto loption = StringUtil::Lower(option.first);
|
753
|
+
if (option.second.size() != 1) {
|
754
|
+
// All parquet write options require exactly one argument
|
755
|
+
throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption));
|
756
|
+
}
|
747
757
|
if (loption == "row_group_size" || loption == "chunk_size") {
|
748
758
|
bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
|
759
|
+
} else if (loption == "row_group_size_bytes") {
|
760
|
+
auto roption = option.second[0];
|
761
|
+
if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
|
762
|
+
bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString());
|
763
|
+
} else {
|
764
|
+
bind_data->row_group_size_bytes = option.second[0].GetValue<uint64_t>();
|
765
|
+
}
|
766
|
+
row_group_size_bytes_set = true;
|
749
767
|
} else if (loption == "compression" || loption == "codec") {
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
|
763
|
-
continue;
|
764
|
-
}
|
768
|
+
const auto roption = StringUtil::Lower(option.second[0].ToString());
|
769
|
+
if (roption == "uncompressed") {
|
770
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
|
771
|
+
} else if (roption == "snappy") {
|
772
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
|
773
|
+
} else if (roption == "gzip") {
|
774
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
|
775
|
+
} else if (roption == "zstd") {
|
776
|
+
bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
|
777
|
+
} else {
|
778
|
+
throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]",
|
779
|
+
loption);
|
765
780
|
}
|
766
|
-
throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
|
767
781
|
} else if (loption == "field_ids") {
|
768
|
-
if (option.second.size() != 1) {
|
769
|
-
throw BinderException("FIELD_IDS requires exactly one argument");
|
770
|
-
}
|
771
782
|
if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
|
772
783
|
StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
|
773
784
|
idx_t field_id = 0;
|
@@ -788,6 +799,9 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
|
|
788
799
|
throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
|
789
800
|
}
|
790
801
|
}
|
802
|
+
if (!row_group_size_bytes_set) {
|
803
|
+
bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW;
|
804
|
+
}
|
791
805
|
bind_data->sql_types = sql_types;
|
792
806
|
bind_data->column_names = names;
|
793
807
|
return std::move(bind_data);
|
@@ -812,8 +826,10 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
|
|
812
826
|
|
813
827
|
// append data to the local (buffered) chunk collection
|
814
828
|
local_state.buffer.Append(local_state.append_state, input);
|
815
|
-
|
816
|
-
|
829
|
+
|
830
|
+
if (local_state.buffer.Count() > bind_data.row_group_size ||
|
831
|
+
local_state.buffer.SizeInBytes() > bind_data.row_group_size_bytes) {
|
832
|
+
// if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file
|
817
833
|
local_state.append_state.current_chunk_state.handles.clear();
|
818
834
|
global_state.writer->Flush(local_state.buffer);
|
819
835
|
local_state.buffer.InitializeAppend(local_state.append_state);
|
@@ -399,8 +399,7 @@ void ParquetReader::InitializeSchema() {
|
|
399
399
|
if (file_meta_data->schema.size() < 2) {
|
400
400
|
throw FormatException("Need at least one non-root column in the file");
|
401
401
|
}
|
402
|
-
|
403
|
-
|
402
|
+
root_reader = CreateReader();
|
404
403
|
auto &root_type = root_reader->Type();
|
405
404
|
auto &child_types = StructType::GetChildTypes(root_type);
|
406
405
|
D_ASSERT(root_type.id() == LogicalTypeId::STRUCT);
|
@@ -450,7 +449,6 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
|
|
450
449
|
ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
|
451
450
|
}
|
452
451
|
}
|
453
|
-
|
454
452
|
InitializeSchema();
|
455
453
|
}
|
456
454
|
|
@@ -483,7 +481,6 @@ unique_ptr<BaseStatistics> ParquetReader::ReadStatistics(const string &name) {
|
|
483
481
|
|
484
482
|
unique_ptr<BaseStatistics> column_stats;
|
485
483
|
auto file_meta_data = GetFileMetadata();
|
486
|
-
auto root_reader = CreateReader();
|
487
484
|
auto column_reader = root_reader->Cast<StructColumnReader>().GetChildReader(file_col_idx);
|
488
485
|
|
489
486
|
for (idx_t row_group_idx = 0; row_group_idx < file_meta_data->row_groups.size(); row_group_idx++) {
|
@@ -87,16 +87,22 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
|
|
87
87
|
const vector<unique_ptr<BaseStatistics>> &partition_stats,
|
88
88
|
idx_t estimated_cardinality)
|
89
89
|
: context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
|
90
|
-
fixed_bits(0), payload_types(payload_types), memory_per_thread(0), count(0) {
|
90
|
+
fixed_bits(0), payload_types(payload_types), memory_per_thread(0), max_bits(1), count(0) {
|
91
91
|
|
92
92
|
GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
|
93
93
|
|
94
94
|
memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
|
95
95
|
external = ClientConfig::GetConfig(context).force_external;
|
96
96
|
|
97
|
+
const auto thread_pages = PreviousPowerOfTwo(memory_per_thread / (4 * idx_t(Storage::BLOCK_ALLOC_SIZE)));
|
98
|
+
while (max_bits < 10 && (thread_pages >> max_bits) > 1) {
|
99
|
+
++max_bits;
|
100
|
+
}
|
101
|
+
|
97
102
|
if (!orders.empty()) {
|
98
|
-
|
99
|
-
|
103
|
+
auto types = payload_types;
|
104
|
+
types.push_back(LogicalType::HASH);
|
105
|
+
grouping_types.Initialize(types);
|
100
106
|
|
101
107
|
ResizeGroupingData(estimated_cardinality);
|
102
108
|
}
|
@@ -108,10 +114,15 @@ void PartitionGlobalSinkState::SyncPartitioning(const PartitionGlobalSinkState &
|
|
108
114
|
const auto old_bits = grouping_data ? grouping_data->GetRadixBits() : 0;
|
109
115
|
if (fixed_bits != old_bits) {
|
110
116
|
const auto hash_col_idx = payload_types.size();
|
111
|
-
grouping_data = make_uniq<
|
117
|
+
grouping_data = make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, fixed_bits, hash_col_idx);
|
112
118
|
}
|
113
119
|
}
|
114
120
|
|
121
|
+
unique_ptr<RadixPartitionedTupleData> PartitionGlobalSinkState::CreatePartition(idx_t new_bits) const {
|
122
|
+
const auto hash_col_idx = payload_types.size();
|
123
|
+
return make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, new_bits, hash_col_idx);
|
124
|
+
}
|
125
|
+
|
115
126
|
void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
|
116
127
|
// Have we started to combine? Then just live with it.
|
117
128
|
if (fixed_bits || (grouping_data && !grouping_data->GetPartitions().empty())) {
|
@@ -121,47 +132,31 @@ void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
|
|
121
132
|
const idx_t partition_size = STANDARD_ROW_GROUPS_SIZE;
|
122
133
|
const auto bits = grouping_data ? grouping_data->GetRadixBits() : 0;
|
123
134
|
auto new_bits = bits ? bits : 4;
|
124
|
-
while (new_bits <
|
135
|
+
while (new_bits < max_bits && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
|
125
136
|
++new_bits;
|
126
137
|
}
|
127
138
|
|
128
139
|
// Repartition the grouping data
|
129
140
|
if (new_bits != bits) {
|
130
|
-
|
131
|
-
grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, new_bits, hash_col_idx);
|
141
|
+
grouping_data = CreatePartition(new_bits);
|
132
142
|
}
|
133
143
|
}
|
134
144
|
|
135
145
|
void PartitionGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
|
136
146
|
// We are done if the local_partition is right sized.
|
137
|
-
auto &local_radix = local_partition->Cast<
|
138
|
-
|
147
|
+
auto &local_radix = local_partition->Cast<RadixPartitionedTupleData>();
|
148
|
+
const auto new_bits = grouping_data->GetRadixBits();
|
149
|
+
if (local_radix.GetRadixBits() == new_bits) {
|
139
150
|
return;
|
140
151
|
}
|
141
152
|
|
142
153
|
// If the local partition is now too small, flush it and reallocate
|
143
|
-
auto new_partition =
|
144
|
-
auto new_append = make_uniq<PartitionedColumnDataAppendState>();
|
145
|
-
new_partition->InitializeAppendState(*new_append);
|
146
|
-
|
154
|
+
auto new_partition = CreatePartition(new_bits);
|
147
155
|
local_partition->FlushAppendState(*local_append);
|
148
|
-
|
149
|
-
for (auto &local_group : local_groups) {
|
150
|
-
ColumnDataScanState scanner;
|
151
|
-
local_group->InitializeScan(scanner);
|
152
|
-
|
153
|
-
DataChunk scan_chunk;
|
154
|
-
local_group->InitializeScanChunk(scan_chunk);
|
155
|
-
for (scan_chunk.Reset(); local_group->Scan(scanner, scan_chunk); scan_chunk.Reset()) {
|
156
|
-
new_partition->Append(*new_append, scan_chunk);
|
157
|
-
}
|
158
|
-
}
|
159
|
-
|
160
|
-
// The append state has stale pointers to the old local partition, so nuke it from orbit.
|
161
|
-
new_partition->FlushAppendState(*new_append);
|
156
|
+
local_partition->Repartition(*new_partition);
|
162
157
|
|
163
158
|
local_partition = std::move(new_partition);
|
164
|
-
local_append = make_uniq<
|
159
|
+
local_append = make_uniq<PartitionedTupleDataAppendState>();
|
165
160
|
local_partition->InitializeAppendState(*local_append);
|
166
161
|
}
|
167
162
|
|
@@ -170,8 +165,8 @@ void PartitionGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_par
|
|
170
165
|
lock_guard<mutex> guard(lock);
|
171
166
|
|
172
167
|
if (!local_partition) {
|
173
|
-
local_partition = grouping_data->
|
174
|
-
local_append = make_uniq<
|
168
|
+
local_partition = CreatePartition(grouping_data->GetRadixBits());
|
169
|
+
local_append = make_uniq<PartitionedTupleDataAppendState>();
|
175
170
|
local_partition->InitializeAppendState(*local_append);
|
176
171
|
return;
|
177
172
|
}
|
@@ -196,7 +191,7 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
|
|
196
191
|
grouping_data->Combine(*local_partition);
|
197
192
|
}
|
198
193
|
|
199
|
-
void PartitionGlobalSinkState::BuildSortState(
|
194
|
+
void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const {
|
200
195
|
// Set up the sort expression computation.
|
201
196
|
vector<LogicalType> sort_types;
|
202
197
|
ExpressionExecutor executor(context);
|
@@ -221,16 +216,9 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
|
|
221
216
|
for (column_t i = 0; i < payload_types.size(); ++i) {
|
222
217
|
column_ids.emplace_back(i);
|
223
218
|
}
|
224
|
-
|
225
|
-
|
226
|
-
chunk_state
|
227
|
-
scanner.InitializeScan();
|
228
|
-
for (auto chunk_idx = scanner.ChunkCount(); chunk_idx-- > 0;) {
|
229
|
-
if (!scanner.AssignChunk(chunk_state)) {
|
230
|
-
break;
|
231
|
-
}
|
232
|
-
scanner.ScanChunk(chunk_state, payload_chunk);
|
233
|
-
|
219
|
+
TupleDataScanState chunk_state;
|
220
|
+
group_data.InitializeScan(chunk_state, column_ids);
|
221
|
+
while (group_data.Scan(chunk_state, payload_chunk)) {
|
234
222
|
sort_chunk.Reset();
|
235
223
|
executor.Execute(payload_chunk, sort_chunk);
|
236
224
|
|
@@ -238,13 +226,12 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
|
|
238
226
|
if (local_sort.SizeInBytes() > memory_per_thread) {
|
239
227
|
local_sort.Sort(global_sort, true);
|
240
228
|
}
|
241
|
-
scanner.FinishChunk(chunk_state);
|
242
229
|
}
|
243
230
|
|
244
231
|
global_sort.AddLocalState(local_sort);
|
245
232
|
}
|
246
233
|
|
247
|
-
void PartitionGlobalSinkState::BuildSortState(
|
234
|
+
void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
|
248
235
|
BuildSortState(group_data, *hash_group.global_sort);
|
249
236
|
|
250
237
|
hash_group.count += group_data.Count();
|
@@ -315,7 +315,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
|
|
315
315
|
sd.data_blocks.back()->block->SetSwizzling(nullptr);
|
316
316
|
// Create a single heap block to store the ordered heap
|
317
317
|
idx_t total_byte_offset =
|
318
|
-
std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
|
318
|
+
std::accumulate(heap.blocks.begin(), heap.blocks.end(), (idx_t)0,
|
319
319
|
[](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->byte_offset; });
|
320
320
|
idx_t heap_block_size = MaxValue(total_byte_offset, (idx_t)Storage::BLOCK_SIZE);
|
321
321
|
auto ordered_heap_block = make_uniq<RowDataBlock>(*buffer_manager, heap_block_size, 1);
|
@@ -85,7 +85,7 @@ SortedBlock::SortedBlock(BufferManager &buffer_manager, GlobalSortState &state)
|
|
85
85
|
}
|
86
86
|
|
87
87
|
idx_t SortedBlock::Count() const {
|
88
|
-
idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), 0,
|
88
|
+
idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), (idx_t)0,
|
89
89
|
[](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->count; });
|
90
90
|
if (!sort_layout.all_constant) {
|
91
91
|
D_ASSERT(count == blob_sorting_data->Count());
|
@@ -100,6 +100,14 @@ Allocator &ColumnDataCollection::GetAllocator() const {
|
|
100
100
|
return allocator->GetAllocator();
|
101
101
|
}
|
102
102
|
|
103
|
+
idx_t ColumnDataCollection::SizeInBytes() const {
|
104
|
+
idx_t total_size = 0;
|
105
|
+
for (const auto &segment : segments) {
|
106
|
+
total_size += segment->SizeInBytes();
|
107
|
+
}
|
108
|
+
return total_size;
|
109
|
+
}
|
110
|
+
|
103
111
|
//===--------------------------------------------------------------------===//
|
104
112
|
// ColumnDataRow
|
105
113
|
//===--------------------------------------------------------------------===//
|
@@ -243,6 +243,11 @@ idx_t ColumnDataCollectionSegment::ChunkCount() const {
|
|
243
243
|
return chunk_data.size();
|
244
244
|
}
|
245
245
|
|
246
|
+
idx_t ColumnDataCollectionSegment::SizeInBytes() const {
|
247
|
+
D_ASSERT(!allocator->IsShared());
|
248
|
+
return allocator->SizeInBytes() + heap->SizeInBytes();
|
249
|
+
}
|
250
|
+
|
246
251
|
void ColumnDataCollectionSegment::FetchChunk(idx_t chunk_idx, DataChunk &result) {
|
247
252
|
vector<column_t> column_ids;
|
248
253
|
column_ids.reserve(types.size());
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev1791"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "ecae3d0c87"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -42,8 +42,8 @@ public:
|
|
42
42
|
using Orders = vector<BoundOrderByNode>;
|
43
43
|
using Types = vector<LogicalType>;
|
44
44
|
|
45
|
-
using GroupingPartition = unique_ptr<
|
46
|
-
using GroupingAppend = unique_ptr<
|
45
|
+
using GroupingPartition = unique_ptr<PartitionedTupleData>;
|
46
|
+
using GroupingAppend = unique_ptr<PartitionedTupleDataAppendState>;
|
47
47
|
|
48
48
|
static void GenerateOrderings(Orders &partitions, Orders &orders,
|
49
49
|
const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
|
@@ -53,13 +53,14 @@ public:
|
|
53
53
|
const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
|
54
54
|
const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
|
55
55
|
|
56
|
+
unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
|
56
57
|
void SyncPartitioning(const PartitionGlobalSinkState &other);
|
57
58
|
|
58
59
|
void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
59
60
|
void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
60
61
|
|
61
|
-
void BuildSortState(
|
62
|
-
void BuildSortState(
|
62
|
+
void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
|
63
|
+
void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
|
63
64
|
|
64
65
|
ClientContext &context;
|
65
66
|
BufferManager &buffer_manager;
|
@@ -67,9 +68,9 @@ public:
|
|
67
68
|
mutex lock;
|
68
69
|
|
69
70
|
// OVER(PARTITION BY...) (hash grouping)
|
70
|
-
unique_ptr<
|
71
|
+
unique_ptr<RadixPartitionedTupleData> grouping_data;
|
71
72
|
//! Payload plus hash column
|
72
|
-
|
73
|
+
TupleDataLayout grouping_types;
|
73
74
|
//! The number of radix bits if this partition is being synced with another
|
74
75
|
idx_t fixed_bits;
|
75
76
|
|
@@ -88,6 +89,7 @@ public:
|
|
88
89
|
|
89
90
|
// Threading
|
90
91
|
idx_t memory_per_thread;
|
92
|
+
idx_t max_bits;
|
91
93
|
atomic<idx_t> count;
|
92
94
|
|
93
95
|
private:
|
@@ -107,8 +109,8 @@ public:
|
|
107
109
|
ExpressionExecutor executor;
|
108
110
|
DataChunk group_chunk;
|
109
111
|
DataChunk payload_chunk;
|
110
|
-
unique_ptr<
|
111
|
-
unique_ptr<
|
112
|
+
unique_ptr<PartitionedTupleData> local_partition;
|
113
|
+
unique_ptr<PartitionedTupleDataAppendState> local_append;
|
112
114
|
|
113
115
|
// OVER(...) (sorting)
|
114
116
|
size_t sort_cols;
|
@@ -132,7 +134,7 @@ class PartitionLocalMergeState;
|
|
132
134
|
|
133
135
|
class PartitionGlobalMergeState {
|
134
136
|
public:
|
135
|
-
using GroupDataPtr = unique_ptr<
|
137
|
+
using GroupDataPtr = unique_ptr<TupleDataCollection>;
|
136
138
|
|
137
139
|
PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
|
138
140
|
|
@@ -43,9 +43,19 @@ public:
|
|
43
43
|
void MakeShared() {
|
44
44
|
shared = true;
|
45
45
|
}
|
46
|
+
bool IsShared() const {
|
47
|
+
return shared;
|
48
|
+
}
|
46
49
|
idx_t BlockCount() const {
|
47
50
|
return blocks.size();
|
48
51
|
}
|
52
|
+
idx_t SizeInBytes() const {
|
53
|
+
idx_t total_size = 0;
|
54
|
+
for (const auto &block : blocks) {
|
55
|
+
total_size += block.size;
|
56
|
+
}
|
57
|
+
return total_size;
|
58
|
+
}
|
49
59
|
|
50
60
|
public:
|
51
61
|
void AllocateData(idx_t size, uint32_t &block_id, uint32_t &offset, ChunkManagementState *chunk_state);
|
@@ -123,7 +123,11 @@ protected:
|
|
123
123
|
void BuildBufferSpace(PartitionedTupleDataAppendState &state);
|
124
124
|
//! Create a collection for a specific a partition
|
125
125
|
unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
|
126
|
-
|
126
|
+
if (allocators) {
|
127
|
+
return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
|
128
|
+
} else {
|
129
|
+
return make_uniq<TupleDataCollection>(buffer_manager, layout);
|
130
|
+
}
|
127
131
|
}
|
128
132
|
|
129
133
|
protected:
|
@@ -81,9 +81,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
81
81
|
|
82
82
|
switch (plan_p->type) {
|
83
83
|
case LogicalOperatorType::LOGICAL_TRANSACTION:
|
84
|
-
|
85
|
-
case LogicalOperatorType::LOGICAL_PRAGMA:
|
86
|
-
return plan_p;
|
84
|
+
return plan_p; // skip optimizing simple & often-occurring plans unaffected by rewrites
|
87
85
|
default:
|
88
86
|
break;
|
89
87
|
}
|
@@ -193,48 +193,53 @@ void Parser::ParseQuery(const string &query) {
|
|
193
193
|
auto query_statements = SplitQueryStringIntoStatements(query);
|
194
194
|
auto stmt_loc = 0;
|
195
195
|
for (auto const &query_statement : query_statements) {
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
statements.back()->stmt_length = query_statement.size() - 1;
|
208
|
-
statements.back()->stmt_location = stmt_loc;
|
209
|
-
stmt_loc += query_statement.size();
|
210
|
-
} else {
|
211
|
-
// let extensions parse the statement which DuckDB failed to parse
|
212
|
-
bool parsed_single_statement = false;
|
213
|
-
for (auto &ext : *options.extensions) {
|
214
|
-
D_ASSERT(!parsed_single_statement);
|
215
|
-
D_ASSERT(ext.parse_function);
|
216
|
-
auto result = ext.parse_function(ext.parser_info.get(), query_statement);
|
217
|
-
if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
|
218
|
-
auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
|
219
|
-
statement->stmt_length = query_statement.size() - 1;
|
220
|
-
statement->stmt_location = stmt_loc;
|
221
|
-
stmt_loc += query_statement.size();
|
222
|
-
statements.push_back(std::move(statement));
|
223
|
-
parsed_single_statement = true;
|
224
|
-
break;
|
225
|
-
} else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
|
226
|
-
throw ParserException(result.error);
|
227
|
-
} else {
|
228
|
-
// We move to the next one!
|
196
|
+
string another_parser_error;
|
197
|
+
// Creating a new scope to allow extensions to use PostgresParser, which is not reentrant
|
198
|
+
{
|
199
|
+
PostgresParser another_parser;
|
200
|
+
another_parser.Parse(query_statement);
|
201
|
+
// LCOV_EXCL_START
|
202
|
+
// first see if DuckDB can parse this individual query statement
|
203
|
+
if (another_parser.success) {
|
204
|
+
if (!another_parser.parse_tree) {
|
205
|
+
// empty statement
|
206
|
+
continue;
|
229
207
|
}
|
208
|
+
transformer.TransformParseTree(another_parser.parse_tree, statements);
|
209
|
+
// important to set in the case of a mixture of DDB and parser ext statements
|
210
|
+
statements.back()->stmt_length = query_statement.size() - 1;
|
211
|
+
statements.back()->stmt_location = stmt_loc;
|
212
|
+
stmt_loc += query_statement.size();
|
213
|
+
continue;
|
214
|
+
} else {
|
215
|
+
another_parser_error = QueryErrorContext::Format(query, another_parser.error_message,
|
216
|
+
another_parser.error_location - 1);
|
230
217
|
}
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
218
|
+
} // LCOV_EXCL_STOP
|
219
|
+
// LCOV_EXCL_START
|
220
|
+
// let extensions parse the statement which DuckDB failed to parse
|
221
|
+
bool parsed_single_statement = false;
|
222
|
+
for (auto &ext : *options.extensions) {
|
223
|
+
D_ASSERT(!parsed_single_statement);
|
224
|
+
D_ASSERT(ext.parse_function);
|
225
|
+
auto result = ext.parse_function(ext.parser_info.get(), query_statement);
|
226
|
+
if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
|
227
|
+
auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
|
228
|
+
statement->stmt_length = query_statement.size() - 1;
|
229
|
+
statement->stmt_location = stmt_loc;
|
230
|
+
stmt_loc += query_statement.size();
|
231
|
+
statements.push_back(std::move(statement));
|
232
|
+
parsed_single_statement = true;
|
233
|
+
break;
|
234
|
+
} else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
|
235
|
+
throw ParserException(result.error);
|
236
|
+
} else {
|
237
|
+
// We move to the next one!
|
235
238
|
}
|
236
239
|
}
|
237
|
-
|
240
|
+
if (!parsed_single_statement) {
|
241
|
+
throw ParserException(parser_error);
|
242
|
+
} // LCOV_EXCL_STOP
|
238
243
|
}
|
239
244
|
}
|
240
245
|
}
|
@@ -151,4 +151,16 @@ bool ArenaAllocator::IsEmpty() const {
|
|
151
151
|
return head == nullptr;
|
152
152
|
}
|
153
153
|
|
154
|
+
idx_t ArenaAllocator::SizeInBytes() const {
|
155
|
+
idx_t total_size = 0;
|
156
|
+
if (!IsEmpty()) {
|
157
|
+
auto current = head.get();
|
158
|
+
while (current != nullptr) {
|
159
|
+
total_size += current->current_position;
|
160
|
+
current = current->next.get();
|
161
|
+
}
|
162
|
+
}
|
163
|
+
return total_size;
|
164
|
+
}
|
165
|
+
|
154
166
|
} // namespace duckdb
|