npm - duckdb - Versions diffs - 0.5.2-dev2181.0 → 0.5.2-dev2196.0 - Mend

duckdb 0.5.2-dev2181.0 → 0.5.2-dev2196.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +62 -37
package/src/duckdb.hpp +14 -9
package/src/parquet-amalgamation.cpp +37741 -37741

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.5.2-dev2181.0",
+  "version": "0.5.2-dev2196.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -81381,17 +81381,21 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
 namespace duckdb {
-CSVBuffer::CSVBuffer(idx_t buffer_size_p, CSVFileHandle &file_handle) : first_buffer(true) {
-	buffer = unique_ptr<char[]>(new char[buffer_size_p]);
-	actual_size = file_handle.Read(buffer.get(), buffer_size_p);
+CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle)
+    : context(context), first_buffer(true) {
+	this->handle = AllocateBuffer(buffer_size_p);
+	auto buffer = Ptr();
+	actual_size = file_handle.Read(buffer, buffer_size_p);
 	if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
 		start_position += 3;
 	}
 	last_buffer = file_handle.FinishedReading();
 }
-CSVBuffer::CSVBuffer(unique_ptr<char[]> buffer_p, idx_t buffer_size_p, idx_t actual_size_p, bool final_buffer)
-    : buffer(move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer) {
+CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p,
+                     bool final_buffer)
+    : context(context), handle(move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer) {
 }
 unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buffer_size) {
@@ -81400,14 +81404,18 @@ unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buff
 		return nullptr;
 	}
-	auto next_buffer = unique_ptr<char[]>(new char[set_buffer_size]);
-	idx_t next_buffer_actual_size = file_handle.Read(next_buffer.get(), set_buffer_size);
+	auto next_buffer = AllocateBuffer(set_buffer_size);
+	idx_t next_buffer_actual_size = file_handle.Read(next_buffer.Ptr(), set_buffer_size);
-	return make_unique<CSVBuffer>(move(next_buffer), set_buffer_size, next_buffer_actual_size,
+	return make_unique<CSVBuffer>(context, move(next_buffer), set_buffer_size, next_buffer_actual_size,
 	                              file_handle.FinishedReading());
 }
+BufferHandle CSVBuffer::AllocateBuffer(idx_t buffer_size) {
+	auto &buffer_manager = BufferManager::GetBufferManager(context);
+	return buffer_manager.Allocate(MaxValue<idx_t>(Storage::BLOCK_SIZE, buffer_size));
+}
 idx_t CSVBuffer::GetBufferSize() {
 	return actual_size;
 }
@@ -81458,6 +81466,9 @@ static bool ParseBoolean(const Value &value, const string &loption) {
 }
 static string ParseString(const Value &value, const string &loption) {
+	if (value.IsNull()) {
+		return string();
+	}
 	if (value.type().id() == LogicalTypeId::LIST) {
 		auto &children = ListValue::GetChildren(value);
 		if (children.size() != 1) {
@@ -81612,6 +81623,11 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
 		ignore_errors = ParseBoolean(value, loption);
 	} else if (loption == "union_by_name") {
 		union_by_name = ParseBoolean(value, loption);
+	} else if (loption == "buffer_size") {
+		buffer_size = ParseInteger(value, loption);
+		if (buffer_size == 0) {
+			throw InvalidInputException("Buffer Size option must be higher than 0");
+		}
 	} else {
 		throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
 	}
@@ -81725,34 +81741,38 @@ struct CSVBufferRead {
 	const char &operator[](size_t i) const {
 		if (i < buffer->GetBufferSize()) {
-			return buffer->buffer[i];
+			auto buffer_ptr = buffer->Ptr();
+			return buffer_ptr[i];
 		}
-		return next_buffer->buffer[i - buffer->GetBufferSize()];
+		auto next_ptr = next_buffer->Ptr();
+		return next_ptr[i - buffer->GetBufferSize()];
 	}
 	string_t GetValue(idx_t start_buffer, idx_t position_buffer, idx_t offset) {
 		idx_t length = position_buffer - start_buffer - offset;
 		// 1) It's all in the current buffer
 		if (start_buffer + length <= buffer->GetBufferSize()) {
-			auto buffer_ptr = buffer->buffer.get();
+			auto buffer_ptr = buffer->Ptr();
 			return string_t(buffer_ptr + start_buffer, length);
 		} else if (start_buffer >= buffer->GetBufferSize()) {
 			// 2) It's all in the next buffer
 			D_ASSERT(next_buffer);
 			D_ASSERT(next_buffer->GetBufferSize() >= length + (start_buffer - buffer->GetBufferSize()));
-			auto buffer_ptr = next_buffer->buffer.get();
+			auto buffer_ptr = next_buffer->Ptr();
 			return string_t(buffer_ptr + (start_buffer - buffer->GetBufferSize()), length);
 		} else {
 			// 3) It starts in the current buffer and ends in the next buffer
 			D_ASSERT(next_buffer);
 			auto intersection = unique_ptr<char[]>(new char[length]);
 			idx_t cur_pos = 0;
+			auto buffer_ptr = buffer->Ptr();
 			for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
-				intersection[cur_pos++] = buffer->buffer[i];
+				intersection[cur_pos++] = buffer_ptr[i];
 			}
 			idx_t nxt_buffer_pos = 0;
+			auto next_buffer_ptr = next_buffer->Ptr();
 			for (; cur_pos < length; cur_pos++) {
-				intersection[cur_pos] = next_buffer->buffer[nxt_buffer_pos++];
+				intersection[cur_pos] = next_buffer_ptr[nxt_buffer_pos++];
 			}
 			intersections.emplace_back(move(intersection));
 			return string_t(intersections.back().get(), length);
@@ -82065,10 +82085,11 @@ normal : {
 	/* state: normal parsing state */
 	// this state parses the remainder of a non-quoted value until we reach a delimiter or newline
 	for (; position_buffer < end_buffer; position_buffer++) {
-		if ((*buffer)[position_buffer] == options.delimiter[0]) {
+		auto c = (*buffer)[position_buffer];
+		if (c == options.delimiter[0]) {
 			// delimiter: end the value and add it to the chunk
 			goto add_value;
-		} else if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
+		} else if (StringUtil::CharacterIsNewline(c)) {
 			// newline: add row
 			D_ASSERT(try_add_line || column == insert_chunk.ColumnCount() - 1);
 			goto add_row;
@@ -82138,10 +82159,11 @@ in_quotes:
 	has_quotes = true;
 	position_buffer++;
 	for (; position_buffer < end_buffer; position_buffer++) {
-		if ((*buffer)[position_buffer] == options.quote[0]) {
+		auto c = (*buffer)[position_buffer];
+		if (c == options.quote[0]) {
 			// quote: move to unquoted state
 			goto unquote;
-		} else if ((*buffer)[position_buffer] == options.escape[0]) {
+		} else if (c == options.escape[0]) {
 			// escape: store the escaped position and move to handle_escape state
 			escape_positions.push_back(position_buffer - start_buffer);
 			goto handle_escape;
@@ -82163,7 +82185,7 @@ in_quotes:
 		goto in_quotes;
 	}
-unquote:
+unquote : {
 	/* state: unquote: this state handles the state directly after we unquote*/
 	//
 	// in this state we expect either another quote (entering the quoted state again, and escaping the quote)
@@ -82173,16 +82195,16 @@ unquote:
 		offset = 1;
 		goto final_state;
 	}
-	if ((*buffer)[position_buffer] == options.quote[0] &&
-	    (options.escape.empty() || options.escape[0] == options.quote[0])) {
+	auto c = (*buffer)[position_buffer];
+	if (c == options.quote[0] && (options.escape.empty() || options.escape[0] == options.quote[0])) {
 		// escaped quote, return to quoted state and store escape position
 		escape_positions.push_back(position_buffer - start_buffer);
 		goto in_quotes;
-	} else if ((*buffer)[position_buffer] == options.delimiter[0]) {
+	} else if (c == options.delimiter[0]) {
 		// delimiter, add value
 		offset = 1;
 		goto add_value;
-	} else if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
+	} else if (StringUtil::CharacterIsNewline(c)) {
 		offset = 1;
 		D_ASSERT(column == insert_chunk.ColumnCount() - 1);
 		goto add_row;
@@ -82197,6 +82219,7 @@ unquote:
 		    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
 		return false;
 	}
+}
 handle_escape : {
 	/* state: handle_escape */
 	// escape should be followed by a quote or another escape character
@@ -125104,7 +125127,7 @@ void SubstringDetection(string &str_1, string &str_2, const string &name_str_1,
 	if (str_1.empty() || str_2.empty()) {
 		return;
 	}
-	if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) && str_1 != "NULL") {
+	if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos)) {
 		throw BinderException("%s must not appear in the %s specification and vice versa", name_str_1, name_str_2);
 	}
 }
@@ -125197,6 +125220,11 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
 		options.force_not_null.resize(expected_types.size(), false);
 	}
 	bind_data->FinalizeRead(context);
+	if (!bind_data->single_threaded && options.auto_detect) {
+		options.file_path = bind_data->files[0];
+		auto initial_reader = make_unique<BufferedCSVReader>(context, options);
+		options = initial_reader->options;
+	}
 	return move(bind_data);
 }
@@ -126339,11 +126367,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			options.include_file_name = BooleanValue::Get(kv.second);
 		} else if (loption == "hive_partitioning") {
 			options.include_parsed_hive_partitions = BooleanValue::Get(kv.second);
-		} else if (loption == "buffer_size") {
-			options.buffer_size = kv.second.GetValue<uint64_t>();
-			if (options.buffer_size == 0) {
-				throw InvalidInputException("Buffer Size option must be higher than 0");
-			}
 		} else {
 			options.SetReadOption(loption, kv.second, names);
 		}
@@ -126362,7 +126385,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 		} else {
 			D_ASSERT(return_types.size() == names.size());
 		}
-		options = result->options;
+		options = initial_reader->options;
 		result->sql_types = initial_reader->sql_types;
 		result->initial_reader = move(initial_reader);
 	} else {
@@ -126460,8 +126483,9 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
 //===--------------------------------------------------------------------===//
 struct ParallelCSVGlobalState : public GlobalTableFunctionState {
 public:
-	ParallelCSVGlobalState(unique_ptr<CSVFileHandle> file_handle_p, vector<string> &files_path_p,
-	                       idx_t system_threads_p, idx_t buffer_size_p, idx_t rows_to_skip)
+	ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
+	                       vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
+	                       idx_t rows_to_skip)
 	    : file_handle(move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p) {
 		for (idx_t i = 0; i < rows_to_skip; i++) {
 			file_handle->ReadLine();
@@ -126475,7 +126499,7 @@ public:
 		} else {
 			bytes_per_local_state = file_size / MaxThreads();
 		}
-		current_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
+		current_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle);
 		next_buffer = current_buffer->Next(*file_handle, buffer_size);
 	}
 	ParallelCSVGlobalState() {
@@ -126562,7 +126586,7 @@ unique_ptr<CSVBufferRead> ParallelCSVGlobalState::Next(ClientContext &context, R
 		if (file_index < bind_data.files.size()) {
 			bind_data.options.file_path = bind_data.files[file_index++];
 			file_handle = ReadCSV::OpenCSV(bind_data.options, context);
-			next_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
+			next_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle);
 		}
 	}
 	return result;
@@ -126583,8 +126607,9 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
 		file_handle = ReadCSV::OpenCSV(bind_data.options, context);
 	}
 	idx_t rows_to_skip = bind_data.options.skip_rows + (bind_data.options.has_header ? 1 : 0);
-	return make_unique<ParallelCSVGlobalState>(move(file_handle), bind_data.files, context.db->NumberOfThreads(),
-	                                           bind_data.options.buffer_size, rows_to_skip);
+	return make_unique<ParallelCSVGlobalState>(context, move(file_handle), bind_data.files,
+	                                           context.db->NumberOfThreads(), bind_data.options.buffer_size,
+	                                           rows_to_skip);
 }
 //===--------------------------------------------------------------------===//

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "fc9fe05841"
-#define DUCKDB_VERSION "v0.5.2-dev2181"
+#define DUCKDB_SOURCE_ID "0ac5e8ee35"
+#define DUCKDB_VERSION "v0.5.2-dev2196"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -6367,9 +6367,6 @@ public:
 	DUCKDB_API static bool CharacterIsNewline(char c) {
 		return c == '\n' || c == '\r';
 	}
-	DUCKDB_API static bool CharacterIsNullTerminator(char c) {
-		return c == '\0';
-	}
 	DUCKDB_API static bool CharacterIsDigit(char c) {
 		return c >= '0' && c <= '9';
 	}
@@ -28261,6 +28258,7 @@ private:
 } // namespace duckdb
 namespace duckdb {
 class CSVBuffer {
@@ -28269,10 +28267,10 @@ public:
 	static constexpr idx_t INITIAL_BUFFER_SIZE_COLOSSAL = 32000000; // 32MB
 	//! Constructor for Initial Buffer
-	CSVBuffer(idx_t buffer_size_p, CSVFileHandle &file_handle);
+	CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle);
 	//! Constructor for `Next()` Buffers
-	CSVBuffer(unique_ptr<char[]> buffer_p, idx_t buffer_size_p, idx_t actual_size_p, bool final_buffer);
+	CSVBuffer(ClientContext &context, BufferHandle handle, idx_t buffer_size_p, idx_t actual_size_p, bool final_buffer);
 	//! Creates a new buffer with the next part of the CSV File
 	unique_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t set_buffer_size);
@@ -28288,10 +28286,17 @@ public:
 	//! If this buffer is the first buffer of the CSV File
 	bool IsCSVFileFirstBuffer();
-	//! The actual buffer
-	unique_ptr<char[]> buffer;
+	BufferHandle AllocateBuffer(idx_t buffer_size);
+	char *Ptr() {
+		return (char *)handle.Ptr();
+	}
 private:
+	ClientContext &context;
+	BufferHandle handle;
 	//! Actual size can be smaller than the buffer size in case we allocate it too optimistically.
 	idx_t actual_size;
 	//! We need to check for Byte Order Mark, to define the start position of this buffer