npm - duckdb - Versions diffs - 0.9.1-dev97.0 → 0.9.2-dev2.0 - Mend

duckdb 0.9.1-dev97.0 → 0.9.2-dev2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/src/duckdb/extension/parquet/column_reader.cpp CHANGED Viewed

@@ -243,6 +243,7 @@ void ColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChun
 void ColumnReader::PrepareRead(parquet_filter_t &filter) {
 	dict_decoder.reset();
 	defined_decoder.reset();
+	bss_decoder.reset();
 	block.reset();
 	PageHeader page_hdr;
 	page_hdr.read(protocol);
@@ -443,6 +444,13 @@ void ColumnReader::PrepareDataPage(PageHeader &page_hdr) {
 		PrepareDeltaByteArray(*block);
 		break;
 	}
+	case Encoding::BYTE_STREAM_SPLIT: {
+		// Subtract 1 from length as the block is allocated with 1 extra byte,
+		// but the byte stream split encoder needs to know the correct data size.
+		bss_decoder = make_uniq<BssDecoder>(block->ptr, block->len - 1);
+		block->inc(block->len);
+		break;
+	}
 	case Encoding::PLAIN:
 		// nothing to do here, will be read directly below
 		break;
@@ -488,7 +496,7 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
 		idx_t null_count = 0;
-		if ((dict_decoder || dbp_decoder || rle_decoder) && HasDefines()) {
+		if ((dict_decoder || dbp_decoder || rle_decoder || bss_decoder) && HasDefines()) {
 			// we need the null count because the dictionary offsets have no entries for nulls
 			for (idx_t i = 0; i < read_now; i++) {
 				if (define_out[i + result_offset] != max_define) {
@@ -534,6 +542,23 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
 		} else if (byte_array_data) {
 			// DELTA_BYTE_ARRAY or DELTA_LENGTH_BYTE_ARRAY
 			DeltaByteArray(define_out, read_now, filter, result_offset, result);
+		} else if (bss_decoder) {
+			auto read_buf = make_shared<ResizeableBuffer>();
+			switch (schema.type) {
+			case duckdb_parquet::format::Type::FLOAT:
+				read_buf->resize(reader.allocator, sizeof(float) * (read_now - null_count));
+				bss_decoder->GetBatch<float>(read_buf->ptr, read_now - null_count);
+				break;
+			case duckdb_parquet::format::Type::DOUBLE:
+				read_buf->resize(reader.allocator, sizeof(double) * (read_now - null_count));
+				bss_decoder->GetBatch<double>(read_buf->ptr, read_now - null_count);
+				break;
+			default:
+				throw std::runtime_error("BYTE_STREAM_SPLIT encoding is only supported for FLOAT or DOUBLE data");
+			}
+			Plain(read_buf, define_out, read_now, filter, result_offset, result);
 		} else {
 			PlainReference(block, result);
 			Plain(block, define_out, read_now, filter, result_offset, result);

package/src/duckdb/extension/parquet/column_writer.cpp CHANGED Viewed

@@ -796,6 +796,13 @@ struct ParquetTimestampSOperator : public BaseParquetOperator {
 	}
 };
+struct ParquetTimeTZOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return input.time().micros;
+	}
+};
 struct ParquetHugeintOperator {
 	template <class SRC, class TGT>
 	static TGT Operation(SRC input) {
@@ -1975,12 +1982,14 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
 		                                                         max_define, can_have_nulls);
 	case LogicalTypeId::BIGINT:
 	case LogicalTypeId::TIME:
-	case LogicalTypeId::TIME_TZ:
 	case LogicalTypeId::TIMESTAMP:
 	case LogicalTypeId::TIMESTAMP_TZ:
 	case LogicalTypeId::TIMESTAMP_MS:
 		return make_uniq<StandardColumnWriter<int64_t, int64_t>>(writer, schema_idx, std::move(schema_path), max_repeat,
 		                                                         max_define, can_have_nulls);
+	case LogicalTypeId::TIME_TZ:
+		return make_uniq<StandardColumnWriter<dtime_tz_t, int64_t, ParquetTimeTZOperator>>(
+		    writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
 	case LogicalTypeId::HUGEINT:
 		return make_uniq<StandardColumnWriter<hugeint_t, double, ParquetHugeintOperator>>(
 		    writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);

package/src/duckdb/extension/parquet/include/column_reader.hpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #pragma once
 #include "duckdb.hpp"
+#include "parquet_bss_decoder.hpp"
 #include "parquet_dbp_decoder.hpp"
 #include "parquet_rle_bp_decoder.hpp"
 #include "parquet_statistics.hpp"
@@ -161,6 +162,7 @@ private:
 	unique_ptr<RleBpDecoder> repeated_decoder;
 	unique_ptr<DbpDecoder> dbp_decoder;
 	unique_ptr<RleBpDecoder> rle_decoder;
+	unique_ptr<BssDecoder> bss_decoder;
 	// dummies for Skip()
 	parquet_filter_t none_filter;

package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp ADDED Viewed

@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_bss_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+namespace duckdb {
+/// Decoder for the Byte Stream Split encoding
+class BssDecoder {
+public:
+	/// Create a decoder object. buffer/buffer_len is the encoded data.
+	BssDecoder(data_ptr_t buffer, uint32_t buffer_len) : buffer_(buffer, buffer_len), value_offset_(0) {
+	}
+public:
+	template <typename T>
+	void GetBatch(data_ptr_t values_target_ptr, uint32_t batch_size) {
+		if (buffer_.len % sizeof(T) != 0) {
+			std::stringstream error;
+			error << "Data buffer size for the BYTE_STREAM_SPLIT encoding (" << buffer_.len
+			      << ") should be a multiple of the type size (" << sizeof(T) << ")";
+			throw std::runtime_error(error.str());
+		}
+		uint32_t num_buffer_values = buffer_.len / sizeof(T);
+		buffer_.available((value_offset_ + batch_size) * sizeof(T));
+		for (uint32_t byte_offset = 0; byte_offset < sizeof(T); ++byte_offset) {
+			data_ptr_t input_bytes = buffer_.ptr + byte_offset * num_buffer_values + value_offset_;
+			for (uint32_t i = 0; i < batch_size; ++i) {
+				values_target_ptr[byte_offset + i * sizeof(T)] = *(input_bytes + i);
+			}
+		}
+		value_offset_ += batch_size;
+	}
+private:
+	ByteBuffer buffer_;
+	uint32_t value_offset_;
+};
+} // namespace duckdb

package/src/duckdb/extension/parquet/parquet_extension.cpp CHANGED Viewed

@@ -20,6 +20,8 @@
 #include "duckdb/common/enums/file_compression_type.hpp"
 #include "duckdb/common/file_system.hpp"
 #include "duckdb/common/multi_file_reader.hpp"
+#include "duckdb/common/serializer/deserializer.hpp"
+#include "duckdb/common/serializer/serializer.hpp"
 #include "duckdb/common/types/chunk_collection.hpp"
 #include "duckdb/function/copy_function.hpp"
 #include "duckdb/function/table_function.hpp"
@@ -34,8 +36,6 @@
 #include "duckdb/planner/operator/logical_get.hpp"
 #include "duckdb/storage/statistics/base_statistics.hpp"
 #include "duckdb/storage/table/row_group.hpp"
-#include "duckdb/common/serializer/serializer.hpp"
-#include "duckdb/common/serializer/deserializer.hpp"
 #endif
 namespace duckdb {
@@ -983,8 +983,7 @@ idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_da
 //===--------------------------------------------------------------------===//
 unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string &table_name,
                                             ReplacementScanData *data) {
-	auto lower_name = StringUtil::Lower(table_name);
-	if (!StringUtil::EndsWith(lower_name, ".parquet") && !StringUtil::Contains(lower_name, ".parquet?")) {
+	if (!ReplacementScan::CanReplace(table_name, {"parquet"})) {
 		return nullptr;
 	}
 	auto table_function = make_uniq<TableFunctionRef>();

package/src/duckdb/extension/parquet/parquet_timestamp.cpp CHANGED Viewed

@@ -66,10 +66,9 @@ dtime_t ParquetIntToTimeNs(const int64_t &raw_time) {
 	return Time::FromTimeNs(raw_time);
 }
-dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_time) {
-	dtime_tz_t result;
-	result.bits = raw_time;
-	return result;
+dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_micros) {
+	dtime_t t(raw_micros);
+	return dtime_tz_t(t, 0);
 }
 } // namespace duckdb

package/src/duckdb/src/common/arrow/appender/list_data.cpp CHANGED Viewed

@@ -69,10 +69,10 @@ void ArrowListData::Finalize(ArrowAppendData &append_data, const LogicalType &ty
 	result->buffers[1] = append_data.main_buffer.data();
 	auto &child_type = ListType::GetChildType(type);
-	append_data.child_pointers.resize(1);
+	ArrowAppender::AddChildren(append_data, 1);
 	result->children = append_data.child_pointers.data();
 	result->n_children = 1;
-	append_data.child_pointers[0] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[0]);
+	append_data.child_arrays[0] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[0]));
 }
 } // namespace duckdb

package/src/duckdb/src/common/arrow/appender/map_data.cpp CHANGED Viewed

@@ -52,33 +52,38 @@ void ArrowMapData::Append(ArrowAppendData &append_data, Vector &input, idx_t fro
 void ArrowMapData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
 	// set up the main map buffer
+	D_ASSERT(result);
 	result->n_buffers = 2;
 	result->buffers[1] = append_data.main_buffer.data();
 	// the main map buffer has a single child: a struct
-	append_data.child_pointers.resize(1);
+	ArrowAppender::AddChildren(append_data, 1);
 	result->children = append_data.child_pointers.data();
 	result->n_children = 1;
-	append_data.child_pointers[0] = ArrowAppender::FinalizeChild(type, *append_data.child_data[0]);
-	// now that struct has two children: the key and the value type
 	auto &struct_data = *append_data.child_data[0];
-	auto &struct_result = append_data.child_pointers[0];
-	struct_data.child_pointers.resize(2);
+	auto struct_result = ArrowAppender::FinalizeChild(type, std::move(append_data.child_data[0]));
+	// Initialize the struct array data
+	const auto struct_child_count = 2;
+	ArrowAppender::AddChildren(struct_data, struct_child_count);
+	struct_result->children = struct_data.child_pointers.data();
 	struct_result->n_buffers = 1;
-	struct_result->n_children = 2;
+	struct_result->n_children = struct_child_count;
 	struct_result->length = struct_data.child_data[0]->row_count;
-	struct_result->children = struct_data.child_pointers.data();
+	append_data.child_arrays[0] = *struct_result;
 	D_ASSERT(struct_data.child_data[0]->row_count == struct_data.child_data[1]->row_count);
 	auto &key_type = MapType::KeyType(type);
 	auto &value_type = MapType::ValueType(type);
-	struct_data.child_pointers[0] = ArrowAppender::FinalizeChild(key_type, *struct_data.child_data[0]);
-	struct_data.child_pointers[1] = ArrowAppender::FinalizeChild(value_type, *struct_data.child_data[1]);
+	auto key_data = ArrowAppender::FinalizeChild(key_type, std::move(struct_data.child_data[0]));
+	struct_data.child_arrays[0] = *key_data;
+	struct_data.child_arrays[1] = *ArrowAppender::FinalizeChild(value_type, std::move(struct_data.child_data[1]));
 	// keys cannot have null values
-	if (struct_data.child_pointers[0]->null_count > 0) {
+	if (key_data->null_count > 0) {
 		throw std::runtime_error("Arrow doesn't accept NULL keys on Maps");
 	}
 }

package/src/duckdb/src/common/arrow/appender/struct_data.cpp CHANGED Viewed

@@ -33,12 +33,12 @@ void ArrowStructData::Finalize(ArrowAppendData &append_data, const LogicalType &
 	result->n_buffers = 1;
 	auto &child_types = StructType::GetChildTypes(type);
-	append_data.child_pointers.resize(child_types.size());
+	ArrowAppender::AddChildren(append_data, child_types.size());
 	result->children = append_data.child_pointers.data();
 	result->n_children = child_types.size();
 	for (idx_t i = 0; i < child_types.size(); i++) {
 		auto &child_type = child_types[i].second;
-		append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
+		append_data.child_arrays[i] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[i]));
 	}
 }

package/src/duckdb/src/common/arrow/appender/union_data.cpp CHANGED Viewed

@@ -58,12 +58,12 @@ void ArrowUnionData::Finalize(ArrowAppendData &append_data, const LogicalType &t
 	result->buffers[1] = append_data.main_buffer.data();
 	auto &child_types = UnionType::CopyMemberTypes(type);
-	append_data.child_pointers.resize(child_types.size());
+	ArrowAppender::AddChildren(append_data, child_types.size());
 	result->children = append_data.child_pointers.data();
 	result->n_children = child_types.size();
 	for (idx_t i = 0; i < child_types.size(); i++) {
 		auto &child_type = child_types[i].second;
-		append_data.child_pointers[i] = ArrowAppender::FinalizeChild(child_type, *append_data.child_data[i]);
+		append_data.child_arrays[i] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[i]));
 	}
 }

package/src/duckdb/src/common/arrow/arrow_appender.cpp CHANGED Viewed

@@ -39,18 +39,31 @@ void ArrowAppender::ReleaseArray(ArrowArray *array) {
 	if (!array || !array->release) {
 		return;
 	}
-	array->release = nullptr;
 	auto holder = static_cast<ArrowAppendData *>(array->private_data);
+	for (int64_t i = 0; i < array->n_children; i++) {
+		auto child = array->children[i];
+		if (!child->release) {
+			// Child was moved out of the array
+			continue;
+		}
+		child->release(child);
+		D_ASSERT(!child->release);
+	}
+	if (array->dictionary && array->dictionary->release) {
+		array->dictionary->release(array->dictionary);
+	}
+	array->release = nullptr;
 	delete holder;
 }
 //===--------------------------------------------------------------------===//
 // Finalize Arrow Child
 //===--------------------------------------------------------------------===//
-ArrowArray *ArrowAppender::FinalizeChild(const LogicalType &type, ArrowAppendData &append_data) {
+ArrowArray *ArrowAppender::FinalizeChild(const LogicalType &type, unique_ptr<ArrowAppendData> append_data_p) {
 	auto result = make_uniq<ArrowArray>();
-	result->private_data = nullptr;
+	auto &append_data = *append_data_p;
+	result->private_data = append_data_p.release();
 	result->release = ArrowAppender::ReleaseArray;
 	result->n_children = 0;
 	result->null_count = 0;
@@ -75,7 +88,7 @@ ArrowArray ArrowAppender::Finalize() {
 	auto root_holder = make_uniq<ArrowAppendData>(options);
 	ArrowArray result;
-	root_holder->child_pointers.resize(types.size());
+	AddChildren(*root_holder, types.size());
 	result.children = root_holder->child_pointers.data();
 	result.n_children = types.size();
@@ -88,10 +101,8 @@ ArrowArray ArrowAppender::Finalize() {
 	result.dictionary = nullptr;
 	root_holder->child_data = std::move(root_data);
-	// FIXME: this violates a property of the arrow format, if root owns all the child memory then consumers can't move
-	// child arrays https://arrow.apache.org/docs/format/CDataInterface.html#moving-child-arrays
 	for (idx_t i = 0; i < root_holder->child_data.size(); i++) {
-		root_holder->child_pointers[i] = ArrowAppender::FinalizeChild(types[i], *root_holder->child_data[i]);
+		root_holder->child_arrays[i] = *ArrowAppender::FinalizeChild(types[i], std::move(root_holder->child_data[i]));
 	}
 	// Release ownership to caller
@@ -238,4 +249,12 @@ unique_ptr<ArrowAppendData> ArrowAppender::InitializeChild(const LogicalType &ty
 	return result;
 }
+void ArrowAppender::AddChildren(ArrowAppendData &data, idx_t count) {
+	data.child_pointers.resize(count);
+	data.child_arrays.resize(count);
+	for (idx_t i = 0; i < count; i++) {
+		data.child_pointers[i] = &data.child_arrays[i];
+	}
+}
 } // namespace duckdb

package/src/duckdb/src/common/arrow/arrow_wrapper.cpp CHANGED Viewed

@@ -16,21 +16,21 @@ namespace duckdb {
 ArrowSchemaWrapper::~ArrowSchemaWrapper() {
 	if (arrow_schema.release) {
 		arrow_schema.release(&arrow_schema);
-		arrow_schema.release = nullptr;
+		D_ASSERT(!arrow_schema.release);
 	}
 }
 ArrowArrayWrapper::~ArrowArrayWrapper() {
 	if (arrow_array.release) {
 		arrow_array.release(&arrow_array);
-		arrow_array.release = nullptr;
+		D_ASSERT(!arrow_array.release);
 	}
 }
 ArrowArrayStreamWrapper::~ArrowArrayStreamWrapper() {
 	if (arrow_array_stream.release) {
 		arrow_array_stream.release(&arrow_array_stream);
-		arrow_array_stream.release = nullptr;
+		D_ASSERT(!arrow_array_stream.release);
 	}
 }

package/src/duckdb/src/common/exception.cpp CHANGED Viewed

@@ -1,5 +1,4 @@
 #include "duckdb/common/exception.hpp"
 #include "duckdb/common/string_util.hpp"
 #include "duckdb/common/to_string.hpp"
 #include "duckdb/common/types.hpp"
@@ -82,91 +81,68 @@ string Exception::ConstructMessageRecursive(const string &msg, std::vector<Excep
 	return ExceptionFormatValue::Format(msg, values);
 }
+struct ExceptionEntry {
+	ExceptionType type;
+	char text[48];
+};
+static constexpr ExceptionEntry EXCEPTION_MAP[] = {{ExceptionType::INVALID, "Invalid"},
+                                                   {ExceptionType::OUT_OF_RANGE, "Out of Range"},
+                                                   {ExceptionType::CONVERSION, "Conversion"},
+                                                   {ExceptionType::UNKNOWN_TYPE, "Unknown Type"},
+                                                   {ExceptionType::DECIMAL, "Decimal"},
+                                                   {ExceptionType::MISMATCH_TYPE, "Mismatch Type"},
+                                                   {ExceptionType::DIVIDE_BY_ZERO, "Divide by Zero"},
+                                                   {ExceptionType::OBJECT_SIZE, "Object Size"},
+                                                   {ExceptionType::INVALID_TYPE, "Invalid type"},
+                                                   {ExceptionType::SERIALIZATION, "Serialization"},
+                                                   {ExceptionType::TRANSACTION, "TransactionContext"},
+                                                   {ExceptionType::NOT_IMPLEMENTED, "Not implemented"},
+                                                   {ExceptionType::EXPRESSION, "Expression"},
+                                                   {ExceptionType::CATALOG, "Catalog"},
+                                                   {ExceptionType::PARSER, "Parser"},
+                                                   {ExceptionType::BINDER, "Binder"},
+                                                   {ExceptionType::PLANNER, "Planner"},
+                                                   {ExceptionType::SCHEDULER, "Scheduler"},
+                                                   {ExceptionType::EXECUTOR, "Executor"},
+                                                   {ExceptionType::CONSTRAINT, "Constraint"},
+                                                   {ExceptionType::INDEX, "Index"},
+                                                   {ExceptionType::STAT, "Stat"},
+                                                   {ExceptionType::CONNECTION, "Connection"},
+                                                   {ExceptionType::SYNTAX, "Syntax"},
+                                                   {ExceptionType::SETTINGS, "Settings"},
+                                                   {ExceptionType::OPTIMIZER, "Optimizer"},
+                                                   {ExceptionType::NULL_POINTER, "NullPointer"},
+                                                   {ExceptionType::IO, "IO"},
+                                                   {ExceptionType::INTERRUPT, "INTERRUPT"},
+                                                   {ExceptionType::FATAL, "FATAL"},
+                                                   {ExceptionType::INTERNAL, "INTERNAL"},
+                                                   {ExceptionType::INVALID_INPUT, "Invalid Input"},
+                                                   {ExceptionType::OUT_OF_MEMORY, "Out of Memory"},
+                                                   {ExceptionType::PERMISSION, "Permission"},
+                                                   {ExceptionType::PARAMETER_NOT_RESOLVED, "Parameter Not Resolved"},
+                                                   {ExceptionType::PARAMETER_NOT_ALLOWED, "Parameter Not Allowed"},
+                                                   {ExceptionType::DEPENDENCY, "Dependency"},
+                                                   {ExceptionType::MISSING_EXTENSION, "Missing Extension"},
+                                                   {ExceptionType::HTTP, "HTTP"},
+                                                   {ExceptionType::AUTOLOAD, "Extension Autoloading"}};
 string Exception::ExceptionTypeToString(ExceptionType type) {
-	switch (type) {
-	case ExceptionType::INVALID:
-		return "Invalid";
-	case ExceptionType::OUT_OF_RANGE:
-		return "Out of Range";
-	case ExceptionType::CONVERSION:
-		return "Conversion";
-	case ExceptionType::UNKNOWN_TYPE:
-		return "Unknown Type";
-	case ExceptionType::DECIMAL:
-		return "Decimal";
-	case ExceptionType::MISMATCH_TYPE:
-		return "Mismatch Type";
-	case ExceptionType::DIVIDE_BY_ZERO:
-		return "Divide by Zero";
-	case ExceptionType::OBJECT_SIZE:
-		return "Object Size";
-	case ExceptionType::INVALID_TYPE:
-		return "Invalid type";
-	case ExceptionType::SERIALIZATION:
-		return "Serialization";
-	case ExceptionType::TRANSACTION:
-		return "TransactionContext";
-	case ExceptionType::NOT_IMPLEMENTED:
-		return "Not implemented";
-	case ExceptionType::EXPRESSION:
-		return "Expression";
-	case ExceptionType::CATALOG:
-		return "Catalog";
-	case ExceptionType::PARSER:
-		return "Parser";
-	case ExceptionType::BINDER:
-		return "Binder";
-	case ExceptionType::PLANNER:
-		return "Planner";
-	case ExceptionType::SCHEDULER:
-		return "Scheduler";
-	case ExceptionType::EXECUTOR:
-		return "Executor";
-	case ExceptionType::CONSTRAINT:
-		return "Constraint";
-	case ExceptionType::INDEX:
-		return "Index";
-	case ExceptionType::STAT:
-		return "Stat";
-	case ExceptionType::CONNECTION:
-		return "Connection";
-	case ExceptionType::SYNTAX:
-		return "Syntax";
-	case ExceptionType::SETTINGS:
-		return "Settings";
-	case ExceptionType::OPTIMIZER:
-		return "Optimizer";
-	case ExceptionType::NULL_POINTER:
-		return "NullPointer";
-	case ExceptionType::IO:
-		return "IO";
-	case ExceptionType::INTERRUPT:
-		return "INTERRUPT";
-	case ExceptionType::FATAL:
-		return "FATAL";
-	case ExceptionType::INTERNAL:
-		return "INTERNAL";
-	case ExceptionType::INVALID_INPUT:
-		return "Invalid Input";
-	case ExceptionType::OUT_OF_MEMORY:
-		return "Out of Memory";
-	case ExceptionType::PERMISSION:
-		return "Permission";
-	case ExceptionType::PARAMETER_NOT_RESOLVED:
-		return "Parameter Not Resolved";
-	case ExceptionType::PARAMETER_NOT_ALLOWED:
-		return "Parameter Not Allowed";
-	case ExceptionType::DEPENDENCY:
-		return "Dependency";
-	case ExceptionType::MISSING_EXTENSION:
-		return "Missing Extension";
-	case ExceptionType::HTTP:
-		return "HTTP";
-	case ExceptionType::AUTOLOAD:
-		return "Extension Autoloading";
-	default:
-		return "Unknown";
+	for (auto &e : EXCEPTION_MAP) {
+		if (e.type == type) {
+			return e.text;
+		}
+	}
+	return "Unknown";
+}
+ExceptionType Exception::StringToExceptionType(const string &type) {
+	for (auto &e : EXCEPTION_MAP) {
+		if (e.text == type) {
+			return e.type;
+		}
 	}
+	return ExceptionType::INVALID;
 }
 const HTTPException &Exception::AsHTTPException() const {

package/src/duckdb/src/common/preserved_error.cpp CHANGED Viewed

@@ -18,6 +18,26 @@ PreservedError::PreservedError(const Exception &exception)
 PreservedError::PreservedError(const string &message)
     : initialized(true), type(ExceptionType::INVALID), raw_message(SanitizeErrorMessage(message)),
       exception_instance(nullptr) {
+	// Given a message in the form: 	xxxxx Error: yyyyy
+	// Try to match xxxxxxx with known error so to potentially reconstruct the original error type
+	auto position_semicolon = raw_message.find(':');
+	if (position_semicolon == std::string::npos) {
+		// Semicolon not found, bail out
+		return;
+	}
+	if (position_semicolon + 2 >= raw_message.size()) {
+		// Not enough characters afterward, bail out
+		return;
+	}
+	string err = raw_message.substr(0, position_semicolon);
+	string msg = raw_message.substr(position_semicolon + 2);
+	if (err.size() > 6 && err.substr(err.size() - 6) == " Error" && !msg.empty()) {
+		ExceptionType new_type = Exception::StringToExceptionType(err.substr(0, err.size() - 6));
+		if (new_type != type) {
+			type = new_type;
+			raw_message = msg;
+		}
+	}
 }
 const string &PreservedError::Message() {

package/src/duckdb/src/common/types/data_chunk.cpp CHANGED Viewed

@@ -64,7 +64,7 @@ void DataChunk::InitializeEmpty(vector<LogicalType>::const_iterator begin, vecto
 }
 void DataChunk::Reset() {
-	if (data.empty()) {
+	if (data.empty() || vector_caches.empty()) {
 		return;
 	}
 	if (vector_caches.size() != data.size()) {

package/src/duckdb/src/execution/expression_executor/execute_reference.cpp CHANGED Viewed

@@ -6,7 +6,7 @@ namespace duckdb {
 unique_ptr<ExpressionState> ExpressionExecutor::InitializeState(const BoundReferenceExpression &expr,
                                                                 ExpressionExecutorState &root) {
 	auto result = make_uniq<ExpressionState>(expr, root);
-	result->Finalize();
+	result->Finalize(true);
 	return result;
 }

package/src/duckdb/src/execution/expression_executor_state.cpp CHANGED Viewed

@@ -1,4 +1,5 @@
 #include "duckdb/execution/expression_executor_state.hpp"
 #include "duckdb/execution/expression_executor.hpp"
 #include "duckdb/planner/expression.hpp"
 #include "duckdb/planner/expression/bound_function_expression.hpp"
@@ -10,8 +11,13 @@ void ExpressionState::AddChild(Expression *expr) {
 	child_states.push_back(ExpressionExecutor::InitializeState(*expr, root));
 }
-void ExpressionState::Finalize() {
-	if (!types.empty()) {
+void ExpressionState::Finalize(bool empty) {
+	if (types.empty()) {
+		return;
+	}
+	if (empty) {
+		intermediate_chunk.InitializeEmpty(types);
+	} else {
 		intermediate_chunk.Initialize(GetAllocator(), types);
 	}
 }

package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp CHANGED Viewed

@@ -192,6 +192,7 @@ void BufferedCSVReader::ParseCSV(ParserMode mode) {
 }
 bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_chunk, string &error_message) {
+	cached_buffers.clear();
 	mode = parser_mode;
 	// used for parsing algorithm
 	bool finished_chunk = false;
@@ -427,7 +428,6 @@ final_state:
 		Flush(insert_chunk);
 	}
-	end_of_file_reached = true;
 	return true;
 }

package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp CHANGED Viewed

@@ -70,6 +70,8 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 	// 8) Empty Line State
 	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>('\r')] = CSVState::EMPTY_LINE;
 	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>('\n')] = CSVState::EMPTY_LINE;
+	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>(state_machine_options.delimiter)] = CSVState::DELIMITER;
+	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
 }
 CSVStateMachineCache::CSVStateMachineCache() {