npm - duckdb - Versions diffs - 0.8.2-dev1724.0 → 0.8.2-dev1791.0 - Mend

duckdb 0.8.2-dev1724.0 → 0.8.2-dev1791.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md CHANGED Viewed

@@ -100,6 +100,13 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
 });
 ```
+## Supported Node versions
+We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
+Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
+We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
+As per July 2023, Node 15 has been removed from the supported versions.
 ## Development
 ### First install:

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev1724.0",
+  "version": "0.8.2-dev1791.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/parquet/include/parquet_reader.hpp CHANGED Viewed

@@ -93,6 +93,7 @@ public:
 	shared_ptr<ParquetFileMetadataCache> metadata;
 	ParquetOptions parquet_options;
 	MultiFileReaderData reader_data;
+	unique_ptr<ColumnReader> root_reader;
 public:
 	void InitializeScan(ParquetReaderScanState &state, vector<idx_t> groups_to_read);

package/src/duckdb/extension/parquet/parquet_extension.cpp CHANGED Viewed

@@ -116,6 +116,11 @@ struct ParquetWriteBindData : public TableFunctionData {
 	vector<string> column_names;
 	duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
 	idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
+	//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
+	static constexpr const idx_t BYTES_PER_ROW = 1024;
+	idx_t row_group_size_bytes;
 	ChildFieldIDs field_ids;
 };
@@ -741,33 +746,39 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
 unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
                                           vector<LogicalType> &sql_types) {
 	D_ASSERT(names.size() == sql_types.size());
+	bool row_group_size_bytes_set = false;
 	auto bind_data = make_uniq<ParquetWriteBindData>();
 	for (auto &option : info.options) {
-		auto loption = StringUtil::Lower(option.first);
+		const auto loption = StringUtil::Lower(option.first);
+		if (option.second.size() != 1) {
+			// All parquet write options require exactly one argument
+			throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption));
+		}
 		if (loption == "row_group_size" || loption == "chunk_size") {
 			bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
+		} else if (loption == "row_group_size_bytes") {
+			auto roption = option.second[0];
+			if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
+				bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString());
+			} else {
+				bind_data->row_group_size_bytes = option.second[0].GetValue<uint64_t>();
+			}
+			row_group_size_bytes_set = true;
 		} else if (loption == "compression" || loption == "codec") {
-			if (!option.second.empty()) {
-				auto roption = StringUtil::Lower(option.second[0].ToString());
-				if (roption == "uncompressed") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
-					continue;
-				} else if (roption == "snappy") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
-					continue;
-				} else if (roption == "gzip") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
-					continue;
-				} else if (roption == "zstd") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
-					continue;
-				}
+			const auto roption = StringUtil::Lower(option.second[0].ToString());
+			if (roption == "uncompressed") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
+			} else if (roption == "snappy") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
+			} else if (roption == "gzip") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
+			} else if (roption == "zstd") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
+			} else {
+				throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]",
+				                      loption);
 			}
-			throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
 		} else if (loption == "field_ids") {
-			if (option.second.size() != 1) {
-				throw BinderException("FIELD_IDS requires exactly one argument");
-			}
 			if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
 			    StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
 				idx_t field_id = 0;
@@ -788,6 +799,9 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
 			throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
 		}
 	}
+	if (!row_group_size_bytes_set) {
+		bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW;
+	}
 	bind_data->sql_types = sql_types;
 	bind_data->column_names = names;
 	return std::move(bind_data);
@@ -812,8 +826,10 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
 	// append data to the local (buffered) chunk collection
 	local_state.buffer.Append(local_state.append_state, input);
-	if (local_state.buffer.Count() > bind_data.row_group_size) {
-		// if the chunk collection exceeds a certain size we flush it to the parquet file
+	if (local_state.buffer.Count() > bind_data.row_group_size ||
+	    local_state.buffer.SizeInBytes() > bind_data.row_group_size_bytes) {
+		// if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file
 		local_state.append_state.current_chunk_state.handles.clear();
 		global_state.writer->Flush(local_state.buffer);
 		local_state.buffer.InitializeAppend(local_state.append_state);

package/src/duckdb/extension/parquet/parquet_reader.cpp CHANGED Viewed

@@ -399,8 +399,7 @@ void ParquetReader::InitializeSchema() {
 	if (file_meta_data->schema.size() < 2) {
 		throw FormatException("Need at least one non-root column in the file");
 	}
-	auto root_reader = CreateReader();
+	root_reader = CreateReader();
 	auto &root_type = root_reader->Type();
 	auto &child_types = StructType::GetChildTypes(root_type);
 	D_ASSERT(root_type.id() == LogicalTypeId::STRUCT);
@@ -450,7 +449,6 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
 			ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
 		}
 	}
 	InitializeSchema();
 }
@@ -483,7 +481,6 @@ unique_ptr<BaseStatistics> ParquetReader::ReadStatistics(const string &name) {
 	unique_ptr<BaseStatistics> column_stats;
 	auto file_meta_data = GetFileMetadata();
-	auto root_reader = CreateReader();
 	auto column_reader = root_reader->Cast<StructColumnReader>().GetChildReader(file_col_idx);
 	for (idx_t row_group_idx = 0; row_group_idx < file_meta_data->row_groups.size(); row_group_idx++) {

package/src/duckdb/src/common/sort/partition_state.cpp CHANGED Viewed

@@ -87,16 +87,22 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
                                                    const vector<unique_ptr<BaseStatistics>> &partition_stats,
                                                    idx_t estimated_cardinality)
     : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
-      fixed_bits(0), payload_types(payload_types), memory_per_thread(0), count(0) {
+      fixed_bits(0), payload_types(payload_types), memory_per_thread(0), max_bits(1), count(0) {
 	GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
 	memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
 	external = ClientConfig::GetConfig(context).force_external;
+	const auto thread_pages = PreviousPowerOfTwo(memory_per_thread / (4 * idx_t(Storage::BLOCK_ALLOC_SIZE)));
+	while (max_bits < 10 && (thread_pages >> max_bits) > 1) {
+		++max_bits;
+	}
 	if (!orders.empty()) {
-		grouping_types = payload_types;
-		grouping_types.push_back(LogicalType::HASH);
+		auto types = payload_types;
+		types.push_back(LogicalType::HASH);
+		grouping_types.Initialize(types);
 		ResizeGroupingData(estimated_cardinality);
 	}
@@ -108,10 +114,15 @@ void PartitionGlobalSinkState::SyncPartitioning(const PartitionGlobalSinkState &
 	const auto old_bits = grouping_data ? grouping_data->GetRadixBits() : 0;
 	if (fixed_bits != old_bits) {
 		const auto hash_col_idx = payload_types.size();
-		grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, fixed_bits, hash_col_idx);
+		grouping_data = make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, fixed_bits, hash_col_idx);
 	}
 }
+unique_ptr<RadixPartitionedTupleData> PartitionGlobalSinkState::CreatePartition(idx_t new_bits) const {
+	const auto hash_col_idx = payload_types.size();
+	return make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, new_bits, hash_col_idx);
+}
 void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
 	//	Have we started to combine? Then just live with it.
 	if (fixed_bits || (grouping_data && !grouping_data->GetPartitions().empty())) {
@@ -121,47 +132,31 @@ void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
 	const idx_t partition_size = STANDARD_ROW_GROUPS_SIZE;
 	const auto bits = grouping_data ? grouping_data->GetRadixBits() : 0;
 	auto new_bits = bits ? bits : 4;
-	while (new_bits < 10 && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
+	while (new_bits < max_bits && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
 		++new_bits;
 	}
 	// Repartition the grouping data
 	if (new_bits != bits) {
-		const auto hash_col_idx = payload_types.size();
-		grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, new_bits, hash_col_idx);
+		grouping_data = CreatePartition(new_bits);
 	}
 }
 void PartitionGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
 	// We are done if the local_partition is right sized.
-	auto &local_radix = local_partition->Cast<RadixPartitionedColumnData>();
-	if (local_radix.GetRadixBits() == grouping_data->GetRadixBits()) {
+	auto &local_radix = local_partition->Cast<RadixPartitionedTupleData>();
+	const auto new_bits = grouping_data->GetRadixBits();
+	if (local_radix.GetRadixBits() == new_bits) {
 		return;
 	}
 	// If the local partition is now too small, flush it and reallocate
-	auto new_partition = grouping_data->CreateShared();
-	auto new_append = make_uniq<PartitionedColumnDataAppendState>();
-	new_partition->InitializeAppendState(*new_append);
+	auto new_partition = CreatePartition(new_bits);
 	local_partition->FlushAppendState(*local_append);
-	auto &local_groups = local_partition->GetPartitions();
-	for (auto &local_group : local_groups) {
-		ColumnDataScanState scanner;
-		local_group->InitializeScan(scanner);
-		DataChunk scan_chunk;
-		local_group->InitializeScanChunk(scan_chunk);
-		for (scan_chunk.Reset(); local_group->Scan(scanner, scan_chunk); scan_chunk.Reset()) {
-			new_partition->Append(*new_append, scan_chunk);
-		}
-	}
-	// The append state has stale pointers to the old local partition, so nuke it from orbit.
-	new_partition->FlushAppendState(*new_append);
+	local_partition->Repartition(*new_partition);
 	local_partition = std::move(new_partition);
-	local_append = make_uniq<PartitionedColumnDataAppendState>();
+	local_append = make_uniq<PartitionedTupleDataAppendState>();
 	local_partition->InitializeAppendState(*local_append);
 }
@@ -170,8 +165,8 @@ void PartitionGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_par
 	lock_guard<mutex> guard(lock);
 	if (!local_partition) {
-		local_partition = grouping_data->CreateShared();
-		local_append = make_uniq<PartitionedColumnDataAppendState>();
+		local_partition = CreatePartition(grouping_data->GetRadixBits());
+		local_append = make_uniq<PartitionedTupleDataAppendState>();
 		local_partition->InitializeAppendState(*local_append);
 		return;
 	}
@@ -196,7 +191,7 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
 	grouping_data->Combine(*local_partition);
 }
-void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, GlobalSortState &global_sort) const {
+void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const {
 	//	 Set up the sort expression computation.
 	vector<LogicalType> sort_types;
 	ExpressionExecutor executor(context);
@@ -221,16 +216,9 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
 	for (column_t i = 0; i < payload_types.size(); ++i) {
 		column_ids.emplace_back(i);
 	}
-	ColumnDataConsumer scanner(group_data, column_ids);
-	ColumnDataConsumerScanState chunk_state;
-	chunk_state.current_chunk_state.properties = ColumnDataScanProperties::ALLOW_ZERO_COPY;
-	scanner.InitializeScan();
-	for (auto chunk_idx = scanner.ChunkCount(); chunk_idx-- > 0;) {
-		if (!scanner.AssignChunk(chunk_state)) {
-			break;
-		}
-		scanner.ScanChunk(chunk_state, payload_chunk);
+	TupleDataScanState chunk_state;
+	group_data.InitializeScan(chunk_state, column_ids);
+	while (group_data.Scan(chunk_state, payload_chunk)) {
 		sort_chunk.Reset();
 		executor.Execute(payload_chunk, sort_chunk);
@@ -238,13 +226,12 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
 		if (local_sort.SizeInBytes() > memory_per_thread) {
 			local_sort.Sort(global_sort, true);
 		}
-		scanner.FinishChunk(chunk_state);
 	}
 	global_sort.AddLocalState(local_sort);
 }
-void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
+void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
 	BuildSortState(group_data, *hash_group.global_sort);
 	hash_group.count += group_data.Count();

package/src/duckdb/src/common/sort/sort_state.cpp CHANGED Viewed

@@ -315,7 +315,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
 		sd.data_blocks.back()->block->SetSwizzling(nullptr);
 		// Create a single heap block to store the ordered heap
 		idx_t total_byte_offset =
-		    std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
+		    std::accumulate(heap.blocks.begin(), heap.blocks.end(), (idx_t)0,
 		                    [](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->byte_offset; });
 		idx_t heap_block_size = MaxValue(total_byte_offset, (idx_t)Storage::BLOCK_SIZE);
 		auto ordered_heap_block = make_uniq<RowDataBlock>(*buffer_manager, heap_block_size, 1);

package/src/duckdb/src/common/sort/sorted_block.cpp CHANGED Viewed

@@ -85,7 +85,7 @@ SortedBlock::SortedBlock(BufferManager &buffer_manager, GlobalSortState &state)
 }
 idx_t SortedBlock::Count() const {
-	idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), 0,
+	idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), (idx_t)0,
 	                              [](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->count; });
 	if (!sort_layout.all_constant) {
 		D_ASSERT(count == blob_sorting_data->Count());

package/src/duckdb/src/common/types/column/column_data_collection.cpp CHANGED Viewed

@@ -100,6 +100,14 @@ Allocator &ColumnDataCollection::GetAllocator() const {
 	return allocator->GetAllocator();
 }
+idx_t ColumnDataCollection::SizeInBytes() const {
+	idx_t total_size = 0;
+	for (const auto &segment : segments) {
+		total_size += segment->SizeInBytes();
+	}
+	return total_size;
+}
 //===--------------------------------------------------------------------===//
 // ColumnDataRow
 //===--------------------------------------------------------------------===//

package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp CHANGED Viewed

@@ -243,6 +243,11 @@ idx_t ColumnDataCollectionSegment::ChunkCount() const {
 	return chunk_data.size();
 }
+idx_t ColumnDataCollectionSegment::SizeInBytes() const {
+	D_ASSERT(!allocator->IsShared());
+	return allocator->SizeInBytes() + heap->SizeInBytes();
+}
 void ColumnDataCollectionSegment::FetchChunk(idx_t chunk_idx, DataChunk &result) {
 	vector<column_t> column_ids;
 	column_ids.reserve(types.size());

package/src/duckdb/src/common/types/string_heap.cpp CHANGED Viewed

@@ -55,4 +55,8 @@ string_t StringHeap::EmptyString(idx_t len) {
 	return string_t(insert_pos, len);
 }
+idx_t StringHeap::SizeInBytes() const {
+	return allocator.SizeInBytes();
+}
 } // namespace duckdb

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev1724"
+#define DUCKDB_VERSION "0.8.2-dev1791"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "0e0fd210cd"
+#define DUCKDB_SOURCE_ID "ecae3d0c87"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp CHANGED Viewed

@@ -42,8 +42,8 @@ public:
 	using Orders = vector<BoundOrderByNode>;
 	using Types = vector<LogicalType>;
-	using GroupingPartition = unique_ptr<PartitionedColumnData>;
-	using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
+	using GroupingPartition = unique_ptr<PartitionedTupleData>;
+	using GroupingAppend = unique_ptr<PartitionedTupleDataAppendState>;
 	static void GenerateOrderings(Orders &partitions, Orders &orders,
 	                              const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
@@ -53,13 +53,14 @@ public:
 	                         const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
 	                         const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
+	unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
 	void SyncPartitioning(const PartitionGlobalSinkState &other);
 	void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
 	void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
-	void BuildSortState(ColumnDataCollection &group_data, GlobalSortState &global_sort) const;
-	void BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
+	void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
+	void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
 	ClientContext &context;
 	BufferManager &buffer_manager;
@@ -67,9 +68,9 @@ public:
 	mutex lock;
 	// OVER(PARTITION BY...) (hash grouping)
-	unique_ptr<RadixPartitionedColumnData> grouping_data;
+	unique_ptr<RadixPartitionedTupleData> grouping_data;
 	//! Payload plus hash column
-	Types grouping_types;
+	TupleDataLayout grouping_types;
 	//! The number of radix bits if this partition is being synced with another
 	idx_t fixed_bits;
@@ -88,6 +89,7 @@ public:
 	// Threading
 	idx_t memory_per_thread;
+	idx_t max_bits;
 	atomic<idx_t> count;
 private:
@@ -107,8 +109,8 @@ public:
 	ExpressionExecutor executor;
 	DataChunk group_chunk;
 	DataChunk payload_chunk;
-	unique_ptr<PartitionedColumnData> local_partition;
-	unique_ptr<PartitionedColumnDataAppendState> local_append;
+	unique_ptr<PartitionedTupleData> local_partition;
+	unique_ptr<PartitionedTupleDataAppendState> local_append;
 	// OVER(...) (sorting)
 	size_t sort_cols;
@@ -132,7 +134,7 @@ class PartitionLocalMergeState;
 class PartitionGlobalMergeState {
 public:
-	using GroupDataPtr = unique_ptr<ColumnDataCollection>;
+	using GroupDataPtr = unique_ptr<TupleDataCollection>;
 	PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);

package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp CHANGED Viewed

@@ -43,9 +43,19 @@ public:
 	void MakeShared() {
 		shared = true;
 	}
+	bool IsShared() const {
+		return shared;
+	}
 	idx_t BlockCount() const {
 		return blocks.size();
 	}
+	idx_t SizeInBytes() const {
+		idx_t total_size = 0;
+		for (const auto &block : blocks) {
+			total_size += block.size;
+		}
+		return total_size;
+	}
 public:
 	void AllocateData(idx_t size, uint32_t &block_id, uint32_t &offset, ChunkManagementState *chunk_state);

package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp CHANGED Viewed

@@ -61,6 +61,9 @@ public:
 		return types.size();
 	}
+	//! The size (in bytes) of this ColumnDataCollection
+	idx_t SizeInBytes() const;
 	//! Get the allocator
 	DUCKDB_API Allocator &GetAllocator() const;

package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp CHANGED Viewed

@@ -126,6 +126,8 @@ public:
 	}
 	idx_t ChunkCount() const;
+	idx_t SizeInBytes() const;
 	void FetchChunk(idx_t chunk_idx, DataChunk &result);
 	void FetchChunk(idx_t chunk_idx, DataChunk &result, const vector<column_t> &column_ids);

package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp CHANGED Viewed

@@ -123,7 +123,11 @@ protected:
 	void BuildBufferSpace(PartitionedTupleDataAppendState &state);
 	//! Create a collection for a specific a partition
 	unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
-		return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
+		if (allocators) {
+			return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
+		} else {
+			return make_uniq<TupleDataCollection>(buffer_manager, layout);
+		}
 	}
 protected:

package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp CHANGED Viewed

@@ -38,6 +38,9 @@ public:
 	//! Allocates space for an empty string of size "len" on the heap
 	DUCKDB_API string_t EmptyString(idx_t len);
+	//! Size of strings
+	DUCKDB_API idx_t SizeInBytes() const;
 private:
 	ArenaAllocator allocator;
 };

package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp CHANGED Viewed

@@ -46,6 +46,7 @@ public:
 	DUCKDB_API ArenaChunk *GetTail();
 	DUCKDB_API bool IsEmpty() const;
+	DUCKDB_API idx_t SizeInBytes() const;
 	//! Returns an "Allocator" wrapper for this arena allocator
 	Allocator &GetAllocator() {

package/src/duckdb/src/optimizer/optimizer.cpp CHANGED Viewed

@@ -81,9 +81,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
 	switch (plan_p->type) {
 	case LogicalOperatorType::LOGICAL_TRANSACTION:
-	case LogicalOperatorType::LOGICAL_SET:
-	case LogicalOperatorType::LOGICAL_PRAGMA:
-		return plan_p;
+		return plan_p; // skip optimizing simple & often-occurring plans unaffected by rewrites
 	default:
 		break;
 	}

package/src/duckdb/src/parser/parser.cpp CHANGED Viewed

@@ -193,48 +193,53 @@ void Parser::ParseQuery(const string &query) {
 			auto query_statements = SplitQueryStringIntoStatements(query);
 			auto stmt_loc = 0;
 			for (auto const &query_statement : query_statements) {
-				PostgresParser another_parser;
-				another_parser.Parse(query_statement);
-				// LCOV_EXCL_START
-				// first see if DuckDB can parse this individual query statement
-				if (another_parser.success) {
-					if (!another_parser.parse_tree) {
-						// empty statement
-						continue;
-					}
-					transformer.TransformParseTree(another_parser.parse_tree, statements);
-					// important to set in the case of a mixture of DDB and parser ext statements
-					statements.back()->stmt_length = query_statement.size() - 1;
-					statements.back()->stmt_location = stmt_loc;
-					stmt_loc += query_statement.size();
-				} else {
-					// let extensions parse the statement which DuckDB failed to parse
-					bool parsed_single_statement = false;
-					for (auto &ext : *options.extensions) {
-						D_ASSERT(!parsed_single_statement);
-						D_ASSERT(ext.parse_function);
-						auto result = ext.parse_function(ext.parser_info.get(), query_statement);
-						if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
-							auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
-							statement->stmt_length = query_statement.size() - 1;
-							statement->stmt_location = stmt_loc;
-							stmt_loc += query_statement.size();
-							statements.push_back(std::move(statement));
-							parsed_single_statement = true;
-							break;
-						} else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
-							throw ParserException(result.error);
-						} else {
-							// We move to the next one!
+				string another_parser_error;
+				// Creating a new scope to allow extensions to use PostgresParser, which is not reentrant
+				{
+					PostgresParser another_parser;
+					another_parser.Parse(query_statement);
+					// LCOV_EXCL_START
+					// first see if DuckDB can parse this individual query statement
+					if (another_parser.success) {
+						if (!another_parser.parse_tree) {
+							// empty statement
+							continue;
 						}
+						transformer.TransformParseTree(another_parser.parse_tree, statements);
+						// important to set in the case of a mixture of DDB and parser ext statements
+						statements.back()->stmt_length = query_statement.size() - 1;
+						statements.back()->stmt_location = stmt_loc;
+						stmt_loc += query_statement.size();
+						continue;
+					} else {
+						another_parser_error = QueryErrorContext::Format(query, another_parser.error_message,
+						                                                 another_parser.error_location - 1);
 					}
-					if (!parsed_single_statement) {
-						parser_error = QueryErrorContext::Format(query, another_parser.error_message,
-						                                         another_parser.error_location - 1);
-						throw ParserException(parser_error);
+				} // LCOV_EXCL_STOP
+				// LCOV_EXCL_START
+				// let extensions parse the statement which DuckDB failed to parse
+				bool parsed_single_statement = false;
+				for (auto &ext : *options.extensions) {
+					D_ASSERT(!parsed_single_statement);
+					D_ASSERT(ext.parse_function);
+					auto result = ext.parse_function(ext.parser_info.get(), query_statement);
+					if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
+						auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
+						statement->stmt_length = query_statement.size() - 1;
+						statement->stmt_location = stmt_loc;
+						stmt_loc += query_statement.size();
+						statements.push_back(std::move(statement));
+						parsed_single_statement = true;
+						break;
+					} else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
+						throw ParserException(result.error);
+					} else {
+						// We move to the next one!
 					}
 				}
-				// LCOV_EXCL_STOP
+				if (!parsed_single_statement) {
+					throw ParserException(parser_error);
+				} // LCOV_EXCL_STOP
 			}
 		}
 	}

package/src/duckdb/src/storage/arena_allocator.cpp CHANGED Viewed

@@ -151,4 +151,16 @@ bool ArenaAllocator::IsEmpty() const {
 	return head == nullptr;
 }
+idx_t ArenaAllocator::SizeInBytes() const {
+	idx_t total_size = 0;
+	if (!IsEmpty()) {
+		auto current = head.get();
+		while (current != nullptr) {
+			total_size += current->current_position;
+			current = current->next.get();
+		}
+	}
+	return total_size;
+}
 } // namespace duckdb