npm - duckdb - Versions diffs - 0.8.2-dev1764.0 → 0.8.2-dev1791.0 - Mend

duckdb 0.8.2-dev1764.0 → 0.8.2-dev1791.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md CHANGED Viewed

@@ -100,6 +100,13 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
 });
 ```
+## Supported Node versions
+We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
+Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
+We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
+As per July 2023, Node 15 has been removed from the supported versions.
 ## Development
 ### First install:

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev1764.0",
+  "version": "0.8.2-dev1791.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/parquet/include/parquet_reader.hpp CHANGED Viewed

@@ -93,6 +93,7 @@ public:
 	shared_ptr<ParquetFileMetadataCache> metadata;
 	ParquetOptions parquet_options;
 	MultiFileReaderData reader_data;
+	unique_ptr<ColumnReader> root_reader;
 public:
 	void InitializeScan(ParquetReaderScanState &state, vector<idx_t> groups_to_read);

package/src/duckdb/extension/parquet/parquet_extension.cpp CHANGED Viewed

@@ -116,6 +116,11 @@ struct ParquetWriteBindData : public TableFunctionData {
 	vector<string> column_names;
 	duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
 	idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
+	//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
+	static constexpr const idx_t BYTES_PER_ROW = 1024;
+	idx_t row_group_size_bytes;
 	ChildFieldIDs field_ids;
 };
@@ -741,33 +746,39 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
 unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
                                           vector<LogicalType> &sql_types) {
 	D_ASSERT(names.size() == sql_types.size());
+	bool row_group_size_bytes_set = false;
 	auto bind_data = make_uniq<ParquetWriteBindData>();
 	for (auto &option : info.options) {
-		auto loption = StringUtil::Lower(option.first);
+		const auto loption = StringUtil::Lower(option.first);
+		if (option.second.size() != 1) {
+			// All parquet write options require exactly one argument
+			throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption));
+		}
 		if (loption == "row_group_size" || loption == "chunk_size") {
 			bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
+		} else if (loption == "row_group_size_bytes") {
+			auto roption = option.second[0];
+			if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
+				bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString());
+			} else {
+				bind_data->row_group_size_bytes = option.second[0].GetValue<uint64_t>();
+			}
+			row_group_size_bytes_set = true;
 		} else if (loption == "compression" || loption == "codec") {
-			if (!option.second.empty()) {
-				auto roption = StringUtil::Lower(option.second[0].ToString());
-				if (roption == "uncompressed") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
-					continue;
-				} else if (roption == "snappy") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
-					continue;
-				} else if (roption == "gzip") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
-					continue;
-				} else if (roption == "zstd") {
-					bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
-					continue;
-				}
+			const auto roption = StringUtil::Lower(option.second[0].ToString());
+			if (roption == "uncompressed") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
+			} else if (roption == "snappy") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
+			} else if (roption == "gzip") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
+			} else if (roption == "zstd") {
+				bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
+			} else {
+				throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]",
+				                      loption);
 			}
-			throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
 		} else if (loption == "field_ids") {
-			if (option.second.size() != 1) {
-				throw BinderException("FIELD_IDS requires exactly one argument");
-			}
 			if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
 			    StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
 				idx_t field_id = 0;
@@ -788,6 +799,9 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
 			throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
 		}
 	}
+	if (!row_group_size_bytes_set) {
+		bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW;
+	}
 	bind_data->sql_types = sql_types;
 	bind_data->column_names = names;
 	return std::move(bind_data);
@@ -812,8 +826,10 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
 	// append data to the local (buffered) chunk collection
 	local_state.buffer.Append(local_state.append_state, input);
-	if (local_state.buffer.Count() > bind_data.row_group_size) {
-		// if the chunk collection exceeds a certain size we flush it to the parquet file
+	if (local_state.buffer.Count() > bind_data.row_group_size ||
+	    local_state.buffer.SizeInBytes() > bind_data.row_group_size_bytes) {
+		// if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file
 		local_state.append_state.current_chunk_state.handles.clear();
 		global_state.writer->Flush(local_state.buffer);
 		local_state.buffer.InitializeAppend(local_state.append_state);

package/src/duckdb/extension/parquet/parquet_reader.cpp CHANGED Viewed

@@ -399,8 +399,7 @@ void ParquetReader::InitializeSchema() {
 	if (file_meta_data->schema.size() < 2) {
 		throw FormatException("Need at least one non-root column in the file");
 	}
-	auto root_reader = CreateReader();
+	root_reader = CreateReader();
 	auto &root_type = root_reader->Type();
 	auto &child_types = StructType::GetChildTypes(root_type);
 	D_ASSERT(root_type.id() == LogicalTypeId::STRUCT);
@@ -450,7 +449,6 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
 			ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
 		}
 	}
 	InitializeSchema();
 }
@@ -483,7 +481,6 @@ unique_ptr<BaseStatistics> ParquetReader::ReadStatistics(const string &name) {
 	unique_ptr<BaseStatistics> column_stats;
 	auto file_meta_data = GetFileMetadata();
-	auto root_reader = CreateReader();
 	auto column_reader = root_reader->Cast<StructColumnReader>().GetChildReader(file_col_idx);
 	for (idx_t row_group_idx = 0; row_group_idx < file_meta_data->row_groups.size(); row_group_idx++) {

package/src/duckdb/src/common/sort/sort_state.cpp CHANGED Viewed

@@ -315,7 +315,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
 		sd.data_blocks.back()->block->SetSwizzling(nullptr);
 		// Create a single heap block to store the ordered heap
 		idx_t total_byte_offset =
-		    std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
+		    std::accumulate(heap.blocks.begin(), heap.blocks.end(), (idx_t)0,
 		                    [](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->byte_offset; });
 		idx_t heap_block_size = MaxValue(total_byte_offset, (idx_t)Storage::BLOCK_SIZE);
 		auto ordered_heap_block = make_uniq<RowDataBlock>(*buffer_manager, heap_block_size, 1);

package/src/duckdb/src/common/sort/sorted_block.cpp CHANGED Viewed

@@ -85,7 +85,7 @@ SortedBlock::SortedBlock(BufferManager &buffer_manager, GlobalSortState &state)
 }
 idx_t SortedBlock::Count() const {
-	idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), 0,
+	idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), (idx_t)0,
 	                              [](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->count; });
 	if (!sort_layout.all_constant) {
 		D_ASSERT(count == blob_sorting_data->Count());

package/src/duckdb/src/common/types/column/column_data_collection.cpp CHANGED Viewed

@@ -100,6 +100,14 @@ Allocator &ColumnDataCollection::GetAllocator() const {
 	return allocator->GetAllocator();
 }
+idx_t ColumnDataCollection::SizeInBytes() const {
+	idx_t total_size = 0;
+	for (const auto &segment : segments) {
+		total_size += segment->SizeInBytes();
+	}
+	return total_size;
+}
 //===--------------------------------------------------------------------===//
 // ColumnDataRow
 //===--------------------------------------------------------------------===//

package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp CHANGED Viewed

@@ -243,6 +243,11 @@ idx_t ColumnDataCollectionSegment::ChunkCount() const {
 	return chunk_data.size();
 }
+idx_t ColumnDataCollectionSegment::SizeInBytes() const {
+	D_ASSERT(!allocator->IsShared());
+	return allocator->SizeInBytes() + heap->SizeInBytes();
+}
 void ColumnDataCollectionSegment::FetchChunk(idx_t chunk_idx, DataChunk &result) {
 	vector<column_t> column_ids;
 	column_ids.reserve(types.size());

package/src/duckdb/src/common/types/string_heap.cpp CHANGED Viewed

@@ -55,4 +55,8 @@ string_t StringHeap::EmptyString(idx_t len) {
 	return string_t(insert_pos, len);
 }
+idx_t StringHeap::SizeInBytes() const {
+	return allocator.SizeInBytes();
+}
 } // namespace duckdb

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev1764"
+#define DUCKDB_VERSION "0.8.2-dev1791"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "07b0b0a2a4"
+#define DUCKDB_SOURCE_ID "ecae3d0c87"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp CHANGED Viewed

@@ -43,9 +43,19 @@ public:
 	void MakeShared() {
 		shared = true;
 	}
+	bool IsShared() const {
+		return shared;
+	}
 	idx_t BlockCount() const {
 		return blocks.size();
 	}
+	idx_t SizeInBytes() const {
+		idx_t total_size = 0;
+		for (const auto &block : blocks) {
+			total_size += block.size;
+		}
+		return total_size;
+	}
 public:
 	void AllocateData(idx_t size, uint32_t &block_id, uint32_t &offset, ChunkManagementState *chunk_state);

package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp CHANGED Viewed

@@ -61,6 +61,9 @@ public:
 		return types.size();
 	}
+	//! The size (in bytes) of this ColumnDataCollection
+	idx_t SizeInBytes() const;
 	//! Get the allocator
 	DUCKDB_API Allocator &GetAllocator() const;

package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp CHANGED Viewed

@@ -126,6 +126,8 @@ public:
 	}
 	idx_t ChunkCount() const;
+	idx_t SizeInBytes() const;
 	void FetchChunk(idx_t chunk_idx, DataChunk &result);
 	void FetchChunk(idx_t chunk_idx, DataChunk &result, const vector<column_t> &column_ids);

package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp CHANGED Viewed

@@ -38,6 +38,9 @@ public:
 	//! Allocates space for an empty string of size "len" on the heap
 	DUCKDB_API string_t EmptyString(idx_t len);
+	//! Size of strings
+	DUCKDB_API idx_t SizeInBytes() const;
 private:
 	ArenaAllocator allocator;
 };

package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp CHANGED Viewed

@@ -46,6 +46,7 @@ public:
 	DUCKDB_API ArenaChunk *GetTail();
 	DUCKDB_API bool IsEmpty() const;
+	DUCKDB_API idx_t SizeInBytes() const;
 	//! Returns an "Allocator" wrapper for this arena allocator
 	Allocator &GetAllocator() {

package/src/duckdb/src/parser/parser.cpp CHANGED Viewed

@@ -193,48 +193,53 @@ void Parser::ParseQuery(const string &query) {
 			auto query_statements = SplitQueryStringIntoStatements(query);
 			auto stmt_loc = 0;
 			for (auto const &query_statement : query_statements) {
-				PostgresParser another_parser;
-				another_parser.Parse(query_statement);
-				// LCOV_EXCL_START
-				// first see if DuckDB can parse this individual query statement
-				if (another_parser.success) {
-					if (!another_parser.parse_tree) {
-						// empty statement
-						continue;
-					}
-					transformer.TransformParseTree(another_parser.parse_tree, statements);
-					// important to set in the case of a mixture of DDB and parser ext statements
-					statements.back()->stmt_length = query_statement.size() - 1;
-					statements.back()->stmt_location = stmt_loc;
-					stmt_loc += query_statement.size();
-				} else {
-					// let extensions parse the statement which DuckDB failed to parse
-					bool parsed_single_statement = false;
-					for (auto &ext : *options.extensions) {
-						D_ASSERT(!parsed_single_statement);
-						D_ASSERT(ext.parse_function);
-						auto result = ext.parse_function(ext.parser_info.get(), query_statement);
-						if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
-							auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
-							statement->stmt_length = query_statement.size() - 1;
-							statement->stmt_location = stmt_loc;
-							stmt_loc += query_statement.size();
-							statements.push_back(std::move(statement));
-							parsed_single_statement = true;
-							break;
-						} else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
-							throw ParserException(result.error);
-						} else {
-							// We move to the next one!
+				string another_parser_error;
+				// Creating a new scope to allow extensions to use PostgresParser, which is not reentrant
+				{
+					PostgresParser another_parser;
+					another_parser.Parse(query_statement);
+					// LCOV_EXCL_START
+					// first see if DuckDB can parse this individual query statement
+					if (another_parser.success) {
+						if (!another_parser.parse_tree) {
+							// empty statement
+							continue;
 						}
+						transformer.TransformParseTree(another_parser.parse_tree, statements);
+						// important to set in the case of a mixture of DDB and parser ext statements
+						statements.back()->stmt_length = query_statement.size() - 1;
+						statements.back()->stmt_location = stmt_loc;
+						stmt_loc += query_statement.size();
+						continue;
+					} else {
+						another_parser_error = QueryErrorContext::Format(query, another_parser.error_message,
+						                                                 another_parser.error_location - 1);
 					}
-					if (!parsed_single_statement) {
-						parser_error = QueryErrorContext::Format(query, another_parser.error_message,
-						                                         another_parser.error_location - 1);
-						throw ParserException(parser_error);
+				} // LCOV_EXCL_STOP
+				// LCOV_EXCL_START
+				// let extensions parse the statement which DuckDB failed to parse
+				bool parsed_single_statement = false;
+				for (auto &ext : *options.extensions) {
+					D_ASSERT(!parsed_single_statement);
+					D_ASSERT(ext.parse_function);
+					auto result = ext.parse_function(ext.parser_info.get(), query_statement);
+					if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
+						auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
+						statement->stmt_length = query_statement.size() - 1;
+						statement->stmt_location = stmt_loc;
+						stmt_loc += query_statement.size();
+						statements.push_back(std::move(statement));
+						parsed_single_statement = true;
+						break;
+					} else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
+						throw ParserException(result.error);
+					} else {
+						// We move to the next one!
 					}
 				}
-				// LCOV_EXCL_STOP
+				if (!parsed_single_statement) {
+					throw ParserException(parser_error);
+				} // LCOV_EXCL_STOP
 			}
 		}
 	}

package/src/duckdb/src/storage/arena_allocator.cpp CHANGED Viewed

@@ -151,4 +151,16 @@ bool ArenaAllocator::IsEmpty() const {
 	return head == nullptr;
 }
+idx_t ArenaAllocator::SizeInBytes() const {
+	idx_t total_size = 0;
+	if (!IsEmpty()) {
+		auto current = head.get();
+		while (current != nullptr) {
+			total_size += current->current_position;
+			current = current->next.get();
+		}
+	}
+	return total_size;
+}
 } // namespace duckdb