npm - duckdb - Versions diffs - 0.9.0 → 0.9.1-dev120.0 - Mend

duckdb 0.9.0 → 0.9.1-dev120.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.9.0",
+  "version": "0.9.1-dev120.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/src/common/enum_util.cpp CHANGED Viewed

@@ -68,7 +68,7 @@
 #include "duckdb/execution/index/art/node.hpp"
 #include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
 #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
-#include "duckdb/execution/operator/scan/csv/csv_state_machine.hpp"
+#include "duckdb/execution/operator/scan/csv/csv_state.hpp"
 #include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
 #include "duckdb/function/aggregate_state.hpp"
 #include "duckdb/function/function.hpp"

package/src/duckdb/src/common/serializer/binary_deserializer.cpp CHANGED Viewed

@@ -8,7 +8,8 @@ namespace duckdb {
 void BinaryDeserializer::OnPropertyBegin(const field_id_t field_id, const char *) {
 	auto field = NextField();
 	if (field != field_id) {
-		throw InternalException("Failed to deserialize: field id mismatch, expected: %d, got: %d", field_id, field);
+		throw SerializationException("Failed to deserialize: field id mismatch, expected: %d, got: %d", field_id,
+		                             field);
 	}
 }
@@ -34,7 +35,8 @@ void BinaryDeserializer::OnObjectBegin() {
 void BinaryDeserializer::OnObjectEnd() {
 	auto next_field = NextField();
 	if (next_field != MESSAGE_TERMINATOR_FIELD_ID) {
-		throw InternalException("Failed to deserialize: expected end of object, but found field id: %d", next_field);
+		throw SerializationException("Failed to deserialize: expected end of object, but found field id: %d",
+		                             next_field);
 	}
 	nesting_level--;
 }

package/src/duckdb/src/common/types/data_chunk.cpp CHANGED Viewed

@@ -64,7 +64,7 @@ void DataChunk::InitializeEmpty(vector<LogicalType>::const_iterator begin, vecto
 }
 void DataChunk::Reset() {
-	if (data.empty()) {
+	if (data.empty() || vector_caches.empty()) {
 		return;
 	}
 	if (vector_caches.size() != data.size()) {

package/src/duckdb/src/core_functions/scalar/map/map.cpp CHANGED Viewed

@@ -87,11 +87,24 @@ static bool ListEntriesEqual(Vector &keys, Vector &values, idx_t count) {
 	return true;
 }
+static list_entry_t *GetBiggestList(Vector &key, Vector &value, idx_t &size) {
+	auto key_size = ListVector::GetListSize(key);
+	auto value_size = ListVector::GetListSize(value);
+	if (key_size > value_size) {
+		size = key_size;
+		return ListVector::GetData(key);
+	}
+	size = value_size;
+	return ListVector::GetData(value);
+}
 static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result) {
 	D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
-	auto &key_vector = MapVector::GetKeys(result);
-	auto &value_vector = MapVector::GetValues(result);
+	auto count = args.size();
+	auto &map_key_vector = MapVector::GetKeys(result);
+	auto &map_value_vector = MapVector::GetValues(result);
 	auto result_data = ListVector::GetData(result);
 	result.SetVectorType(VectorType::CONSTANT_VECTOR);
@@ -99,52 +112,73 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
 		ListVector::SetListSize(result, 0);
 		result_data->offset = 0;
 		result_data->length = 0;
-		result.Verify(args.size());
+		result.Verify(count);
 		return;
 	}
-	bool keys_are_const = args.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR;
-	bool values_are_const = args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR;
-	if (!keys_are_const || !values_are_const) {
-		result.SetVectorType(VectorType::FLAT_VECTOR);
+	D_ASSERT(args.ColumnCount() == 2);
+	auto &key_vector = args.data[0];
+	auto &value_vector = args.data[1];
+	if (args.AllConstant()) {
+		auto key_data = ListVector::GetData(key_vector);
+		auto value_data = ListVector::GetData(value_vector);
+		auto key_entry = key_data[0];
+		auto value_entry = value_data[0];
+		if (key_entry != value_entry) {
+			throw BinderException("Key and value list sizes don't match");
+		}
+		result_data[0] = key_entry;
+		ListVector::SetListSize(result, ListVector::GetListSize(key_vector));
+		map_key_vector.Reference(ListVector::GetEntry(key_vector));
+		map_value_vector.Reference(ListVector::GetEntry(value_vector));
+		MapVector::MapConversionVerify(result, count);
+		result.Verify(count);
+		return;
 	}
-	auto key_count = ListVector::GetListSize(args.data[0]);
-	auto value_count = ListVector::GetListSize(args.data[1]);
-	auto key_data = ListVector::GetData(args.data[0]);
-	auto value_data = ListVector::GetData(args.data[1]);
-	auto src_data = key_data;
-	if (keys_are_const && !values_are_const) {
-		AlignVectorToReference(args.data[0], args.data[1], args.size(), key_vector);
-		src_data = value_data;
-	} else if (values_are_const && !keys_are_const) {
-		AlignVectorToReference(args.data[1], args.data[0], args.size(), value_vector);
+	result.SetVectorType(VectorType::FLAT_VECTOR);
+	if (key_vector.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+		D_ASSERT(value_vector.GetVectorType() != VectorType::CONSTANT_VECTOR);
+		Vector expanded_const(ListType::GetChildType(key_vector.GetType()), count);
+		AlignVectorToReference(key_vector, value_vector, count, expanded_const);
+		map_key_vector.Reference(expanded_const);
+		value_vector.Flatten(count);
+		map_value_vector.Reference(ListVector::GetEntry(value_vector));
+	} else if (value_vector.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+		D_ASSERT(key_vector.GetVectorType() != VectorType::CONSTANT_VECTOR);
+		Vector expanded_const(ListType::GetChildType(value_vector.GetType()), count);
+		AlignVectorToReference(value_vector, key_vector, count, expanded_const);
+		map_value_vector.Reference(expanded_const);
+		key_vector.Flatten(count);
+		map_key_vector.Reference(ListVector::GetEntry(key_vector));
 	} else {
-		if (!ListEntriesEqual(args.data[0], args.data[1], args.size())) {
+		key_vector.Flatten(count);
+		value_vector.Flatten(count);
+		if (!ListEntriesEqual(key_vector, value_vector, count)) {
 			throw InvalidInputException("Error in MAP creation: key list and value list do not align. i.e. different "
 			                            "size or incompatible structure");
 		}
+		map_value_vector.Reference(ListVector::GetEntry(value_vector));
+		map_key_vector.Reference(ListVector::GetEntry(key_vector));
 	}
-	ListVector::SetListSize(result, MaxValue(key_count, value_count));
+	idx_t list_size;
+	auto src_data = GetBiggestList(key_vector, value_vector, list_size);
+	ListVector::SetListSize(result, list_size);
 	result_data = ListVector::GetData(result);
-	for (idx_t i = 0; i < args.size(); i++) {
+	for (idx_t i = 0; i < count; i++) {
 		result_data[i] = src_data[i];
 	}
-	// check whether one of the vectors has already been referenced to an expanded vector in the case of const/non-const
-	// combination. If not, then referencing is still necessary
-	if (!(keys_are_const && !values_are_const)) {
-		key_vector.Reference(ListVector::GetEntry(args.data[0]));
-	}
-	if (!(values_are_const && !keys_are_const)) {
-		value_vector.Reference(ListVector::GetEntry(args.data[1]));
-	}
-	MapVector::MapConversionVerify(result, args.size());
-	result.Verify(args.size());
+	MapVector::MapConversionVerify(result, count);
+	result.Verify(count);
 }
 static unique_ptr<FunctionData> MapBind(ClientContext &context, ScalarFunction &bound_function,

package/src/duckdb/src/execution/expression_executor/execute_reference.cpp CHANGED Viewed

@@ -6,7 +6,7 @@ namespace duckdb {
 unique_ptr<ExpressionState> ExpressionExecutor::InitializeState(const BoundReferenceExpression &expr,
                                                                 ExpressionExecutorState &root) {
 	auto result = make_uniq<ExpressionState>(expr, root);
-	result->Finalize();
+	result->Finalize(true);
 	return result;
 }

package/src/duckdb/src/execution/expression_executor_state.cpp CHANGED Viewed

@@ -1,4 +1,5 @@
 #include "duckdb/execution/expression_executor_state.hpp"
 #include "duckdb/execution/expression_executor.hpp"
 #include "duckdb/planner/expression.hpp"
 #include "duckdb/planner/expression/bound_function_expression.hpp"
@@ -10,8 +11,13 @@ void ExpressionState::AddChild(Expression *expr) {
 	child_states.push_back(ExpressionExecutor::InitializeState(*expr, root));
 }
-void ExpressionState::Finalize() {
-	if (!types.empty()) {
+void ExpressionState::Finalize(bool empty) {
+	if (types.empty()) {
+		return;
+	}
+	if (empty) {
+		intermediate_chunk.InitializeEmpty(types);
+	} else {
 		intermediate_chunk.Initialize(GetAllocator(), types);
 	}
 }

package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp CHANGED Viewed

@@ -3,8 +3,8 @@
 namespace duckdb {
-void InitializeTransitionArray(unsigned char *transition_array, const uint8_t state) {
-	for (uint32_t i = 0; i < NUM_TRANSITIONS; i++) {
+void InitializeTransitionArray(CSVState *transition_array, const CSVState state) {
+	for (uint32_t i = 0; i < StateMachine::NUM_TRANSITIONS; i++) {
 		transition_array[i] = state;
 	}
 }
@@ -13,72 +13,65 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 	D_ASSERT(state_machine_cache.find(state_machine_options) == state_machine_cache.end());
 	// Initialize transition array with default values to the Standard option
 	auto &transition_array = state_machine_cache[state_machine_options];
-	const uint8_t standard_state = static_cast<uint8_t>(CSVState::STANDARD);
-	const uint8_t field_separator_state = static_cast<uint8_t>(CSVState::DELIMITER);
-	const uint8_t record_separator_state = static_cast<uint8_t>(CSVState::RECORD_SEPARATOR);
-	const uint8_t carriage_return_state = static_cast<uint8_t>(CSVState::CARRIAGE_RETURN);
-	const uint8_t quoted_state = static_cast<uint8_t>(CSVState::QUOTED);
-	const uint8_t unquoted_state = static_cast<uint8_t>(CSVState::UNQUOTED);
-	const uint8_t escape_state = static_cast<uint8_t>(CSVState::ESCAPE);
-	const uint8_t empty_line_state = static_cast<uint8_t>(CSVState::EMPTY_LINE);
-	const uint8_t invalid_state = static_cast<uint8_t>(CSVState::INVALID);
-	for (uint32_t i = 0; i < NUM_STATES; i++) {
-		switch (i) {
-		case quoted_state:
-			InitializeTransitionArray(transition_array[i], quoted_state);
+	for (uint32_t i = 0; i < StateMachine::NUM_STATES; i++) {
+		CSVState cur_state = CSVState(i);
+		switch (cur_state) {
+		case CSVState::QUOTED:
+			InitializeTransitionArray(transition_array[cur_state], CSVState::QUOTED);
 			break;
-		case unquoted_state:
-		case invalid_state:
-		case escape_state:
-			InitializeTransitionArray(transition_array[i], invalid_state);
+		case CSVState::UNQUOTED:
+		case CSVState::INVALID:
+		case CSVState::ESCAPE:
+			InitializeTransitionArray(transition_array[cur_state], CSVState::INVALID);
 			break;
 		default:
-			InitializeTransitionArray(transition_array[i], standard_state);
+			InitializeTransitionArray(transition_array[cur_state], CSVState::STANDARD);
 			break;
 		}
 	}
 	// Now set values depending on configuration
 	// 1) Standard State
-	transition_array[standard_state][static_cast<uint8_t>(state_machine_options.delimiter)] = field_separator_state;
-	transition_array[standard_state][static_cast<uint8_t>('\n')] = record_separator_state;
-	transition_array[standard_state][static_cast<uint8_t>('\r')] = carriage_return_state;
-	transition_array[standard_state][static_cast<uint8_t>(state_machine_options.quote)] = quoted_state;
+	transition_array[CSVState::STANDARD][static_cast<uint8_t>(state_machine_options.delimiter)] = CSVState::DELIMITER;
+	transition_array[CSVState::STANDARD][static_cast<uint8_t>('\n')] = CSVState::RECORD_SEPARATOR;
+	transition_array[CSVState::STANDARD][static_cast<uint8_t>('\r')] = CSVState::CARRIAGE_RETURN;
+	transition_array[CSVState::STANDARD][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
 	// 2) Field Separator State
-	transition_array[field_separator_state][static_cast<uint8_t>(state_machine_options.delimiter)] =
-	    field_separator_state;
-	transition_array[field_separator_state][static_cast<uint8_t>('\n')] = record_separator_state;
-	transition_array[field_separator_state][static_cast<uint8_t>('\r')] = carriage_return_state;
-	transition_array[field_separator_state][static_cast<uint8_t>(state_machine_options.quote)] = quoted_state;
+	transition_array[CSVState::DELIMITER][static_cast<uint8_t>(state_machine_options.delimiter)] = CSVState::DELIMITER;
+	transition_array[CSVState::DELIMITER][static_cast<uint8_t>('\n')] = CSVState::RECORD_SEPARATOR;
+	transition_array[CSVState::DELIMITER][static_cast<uint8_t>('\r')] = CSVState::CARRIAGE_RETURN;
+	transition_array[CSVState::DELIMITER][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
 	// 3) Record Separator State
-	transition_array[record_separator_state][static_cast<uint8_t>(state_machine_options.delimiter)] =
-	    field_separator_state;
-	transition_array[record_separator_state][static_cast<uint8_t>('\n')] = empty_line_state;
-	transition_array[record_separator_state][static_cast<uint8_t>('\r')] = empty_line_state;
-	transition_array[record_separator_state][static_cast<uint8_t>(state_machine_options.quote)] = quoted_state;
+	transition_array[CSVState::RECORD_SEPARATOR][static_cast<uint8_t>(state_machine_options.delimiter)] =
+	    CSVState::DELIMITER;
+	transition_array[CSVState::RECORD_SEPARATOR][static_cast<uint8_t>('\n')] = CSVState::EMPTY_LINE;
+	transition_array[CSVState::RECORD_SEPARATOR][static_cast<uint8_t>('\r')] = CSVState::EMPTY_LINE;
+	transition_array[CSVState::RECORD_SEPARATOR][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
 	// 4) Carriage Return State
-	transition_array[carriage_return_state][static_cast<uint8_t>('\n')] = record_separator_state;
-	transition_array[carriage_return_state][static_cast<uint8_t>('\r')] = empty_line_state;
-	transition_array[carriage_return_state][static_cast<uint8_t>(state_machine_options.escape)] = escape_state;
+	transition_array[CSVState::CARRIAGE_RETURN][static_cast<uint8_t>('\n')] = CSVState::RECORD_SEPARATOR;
+	transition_array[CSVState::CARRIAGE_RETURN][static_cast<uint8_t>('\r')] = CSVState::EMPTY_LINE;
+	transition_array[CSVState::CARRIAGE_RETURN][static_cast<uint8_t>(state_machine_options.escape)] = CSVState::ESCAPE;
 	// 5) Quoted State
-	transition_array[quoted_state][static_cast<uint8_t>(state_machine_options.quote)] = unquoted_state;
+	transition_array[CSVState::QUOTED][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::UNQUOTED;
 	if (state_machine_options.quote != state_machine_options.escape) {
-		transition_array[quoted_state][static_cast<uint8_t>(state_machine_options.escape)] = escape_state;
+		transition_array[CSVState::QUOTED][static_cast<uint8_t>(state_machine_options.escape)] = CSVState::ESCAPE;
 	}
 	// 6) Unquoted State
-	transition_array[unquoted_state][static_cast<uint8_t>('\n')] = record_separator_state;
-	transition_array[unquoted_state][static_cast<uint8_t>('\r')] = carriage_return_state;
-	transition_array[unquoted_state][static_cast<uint8_t>(state_machine_options.delimiter)] = field_separator_state;
+	transition_array[CSVState::UNQUOTED][static_cast<uint8_t>('\n')] = CSVState::RECORD_SEPARATOR;
+	transition_array[CSVState::UNQUOTED][static_cast<uint8_t>('\r')] = CSVState::CARRIAGE_RETURN;
+	transition_array[CSVState::UNQUOTED][static_cast<uint8_t>(state_machine_options.delimiter)] = CSVState::DELIMITER;
 	if (state_machine_options.quote == state_machine_options.escape) {
-		transition_array[unquoted_state][static_cast<uint8_t>(state_machine_options.escape)] = quoted_state;
+		transition_array[CSVState::UNQUOTED][static_cast<uint8_t>(state_machine_options.escape)] = CSVState::QUOTED;
 	}
 	// 7) Escaped State
-	transition_array[escape_state][static_cast<uint8_t>(state_machine_options.quote)] = quoted_state;
-	transition_array[escape_state][static_cast<uint8_t>(state_machine_options.escape)] = quoted_state;
+	transition_array[CSVState::ESCAPE][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
+	transition_array[CSVState::ESCAPE][static_cast<uint8_t>(state_machine_options.escape)] = CSVState::QUOTED;
 	// 8) Empty Line State
-	transition_array[empty_line_state][static_cast<uint8_t>('\r')] = empty_line_state;
-	transition_array[empty_line_state][static_cast<uint8_t>('\n')] = empty_line_state;
+	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>('\r')] = CSVState::EMPTY_LINE;
+	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>('\n')] = CSVState::EMPTY_LINE;
+	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>(state_machine_options.delimiter)] = CSVState::DELIMITER;
+	transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
 }
 CSVStateMachineCache::CSVStateMachineCache() {
@@ -95,7 +88,7 @@ CSVStateMachineCache::CSVStateMachineCache() {
 	}
 }
-const state_machine_t &CSVStateMachineCache::Get(const CSVStateMachineOptions &state_machine_options) {
+const StateMachine &CSVStateMachineCache::Get(const CSVStateMachineOptions &state_machine_options) {
 	//! Custom State Machine, we need to create it and cache it first
 	if (state_machine_cache.find(state_machine_options) == state_machine_cache.end()) {
 		Insert(state_machine_options);

package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp CHANGED Viewed

@@ -49,11 +49,12 @@ bool ParallelCSVReader::NewLineDelimiter(bool carry, bool carry_followed_by_nl,
 	return (carry && carry_followed_by_nl) || (!carry && first_char);
 }
-void ParallelCSVReader::SkipEmptyLines() {
+bool ParallelCSVReader::SkipEmptyLines() {
+	const idx_t initial_position_buffer = position_buffer;
 	idx_t new_pos_buffer = position_buffer;
 	if (parse_chunk.data.size() == 1) {
 		// Empty lines are null data.
-		return;
+		return initial_position_buffer != position_buffer;
 	}
 	for (; new_pos_buffer < end_buffer; new_pos_buffer++) {
 		if (StringUtil::CharacterIsNewline((*buffer)[new_pos_buffer])) {
@@ -63,13 +64,14 @@ void ParallelCSVReader::SkipEmptyLines() {
 				position_buffer++;
 			}
 			if (new_pos_buffer > end_buffer) {
-				return;
+				return initial_position_buffer != position_buffer;
 			}
 			position_buffer = new_pos_buffer;
 		} else if ((*buffer)[new_pos_buffer] != ' ') {
-			return;
+			return initial_position_buffer != position_buffer;
 		}
 	}
+	return initial_position_buffer != position_buffer;
 }
 bool ParallelCSVReader::SetPosition() {
@@ -185,7 +187,6 @@ bool ParallelCSVReader::SetPosition() {
 	}
 	// Ensure that parse_chunk has no gunk when trying to figure new line
 	parse_chunk.Reset();
 	verification_positions.end_of_last_line = position_buffer;
 	finished = false;
 	return successfully_read_first_line;
@@ -288,7 +289,7 @@ bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error
 	idx_t column = 0;
 	idx_t offset = 0;
 	bool has_quotes = false;
+	bool last_line_empty = false;
 	vector<idx_t> escape_positions;
 	if ((start_buffer == buffer->buffer_start || start_buffer == buffer->buffer_end) && !try_add_line) {
 		// First time reading this buffer piece
@@ -454,7 +455,10 @@ add_row : {
 		if (!BufferRemainder()) {
 			goto final_state;
 		}
-		SkipEmptyLines();
+		if (SkipEmptyLines() && reached_remainder_state) {
+			last_line_empty = true;
+			goto final_state;
+		}
 		if (position_buffer - verification_positions.end_of_last_line > options.buffer_size) {
 			error_message = "Line does not fit in one buffer. Increase the buffer size.";
 			return false;
@@ -583,8 +587,8 @@ final_state : {
 		return true;
 	}
 	// If this is the last buffer, we have to read the last value
-	if (buffer->buffer->is_last_buffer || !buffer->next_buffer ||
-	    (buffer->next_buffer && buffer->next_buffer->is_last_buffer)) {
+	if (!last_line_empty && (buffer->buffer->is_last_buffer || !buffer->next_buffer ||
+	                         (buffer->next_buffer && buffer->next_buffer->is_last_buffer))) {
 		if (column > 0 || start_buffer != position_buffer || try_add_line ||
 		    (insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
 			// remaining values to be added to the chunk

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp CHANGED Viewed

@@ -22,30 +22,9 @@ CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager>
 	}
 }
-SnifferResult CSVSniffer::SniffCSV() {
-	// 1. Dialect Detection
-	DetectDialect();
-	if (explicit_set_columns) {
-		if (!candidates.empty()) {
-			options.dialect_options.state_machine_options = candidates[0]->dialect_options.state_machine_options;
-			options.dialect_options.new_line = candidates[0]->dialect_options.new_line;
-		}
-		// We do not need to run type and header detection as these were defined by the user
-		return SnifferResult(detected_types, names);
-	}
-	// 2. Type Detection
-	DetectTypes();
-	// 3. Header Detection
-	DetectHeader();
-	D_ASSERT(best_sql_types_candidates_per_column_idx.size() == names.size());
-	// 4. Type Replacement
-	ReplaceTypes();
-	// 5. Type Refinement
-	RefineTypes();
-	// We are done, construct and return the result.
-	// Set the CSV Options in the reference
+void CSVSniffer::SetResultOptions() {
 	options.dialect_options = best_candidate->dialect_options;
+	options.dialect_options.new_line = best_candidate->dialect_options.new_line;
 	options.has_header = best_candidate->dialect_options.header;
 	options.skip_rows_set = options.dialect_options.skip_rows > 0;
 	if (options.has_header) {
@@ -53,8 +32,27 @@ SnifferResult CSVSniffer::SniffCSV() {
 	} else {
 		options.dialect_options.true_start = best_start_without_header;
 	}
+}
-	// Return the types and names
+SnifferResult CSVSniffer::SniffCSV() {
+	// 1. Dialect Detection
+	DetectDialect();
+	// 2. Type Detection
+	DetectTypes();
+	// 3. Type Refinement
+	RefineTypes();
+	// 4. Header Detection
+	DetectHeader();
+	if (explicit_set_columns) {
+		SetResultOptions();
+		// We do not need to run type refinement, since the types have been given by the user
+		return SnifferResult({}, {});
+	}
+	// 5. Type Replacement
+	ReplaceTypes();
+	D_ASSERT(best_sql_types_candidates_per_column_idx.size() == names.size());
+	// We are done, Set the CSV Options in the reference. Construct and return the result.
+	SetResultOptions();
 	return SnifferResult(detected_types, names);
 }

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp CHANGED Viewed

@@ -5,9 +5,9 @@ namespace duckdb {
 struct SniffDialect {
 	inline static void Initialize(CSVStateMachine &machine) {
-		machine.state = CSVState::STANDARD;
-		machine.previous_state = CSVState::STANDARD;
-		machine.pre_previous_state = CSVState::STANDARD;
+		machine.state = CSVState::EMPTY_LINE;
+		machine.previous_state = CSVState::EMPTY_LINE;
+		machine.pre_previous_state = CSVState::EMPTY_LINE;
 		machine.cur_rows = 0;
 		machine.column_count = 1;
 	}
@@ -21,17 +21,12 @@ struct SniffDialect {
 			sniffed_column_counts.clear();
 			return true;
 		}
-		machine.pre_previous_state = machine.previous_state;
-		machine.previous_state = machine.state;
-		machine.state = static_cast<CSVState>(
-		    machine.transition_array[static_cast<uint8_t>(machine.state)][static_cast<uint8_t>(current_char)]);
+		machine.Transition(current_char);
 		bool carriage_return = machine.previous_state == CSVState::CARRIAGE_RETURN;
 		machine.column_count += machine.previous_state == CSVState::DELIMITER;
 		sniffed_column_counts[machine.cur_rows] = machine.column_count;
-		machine.cur_rows +=
-		    machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE;
+		machine.cur_rows += machine.previous_state == CSVState::RECORD_SEPARATOR;
 		machine.column_count -= (machine.column_count - 1) * (machine.previous_state == CSVState::RECORD_SEPARATOR);
 		// It means our carriage return is actually a record separator
@@ -304,7 +299,7 @@ void CSVSniffer::DetectDialect() {
 	unordered_map<uint8_t, vector<char>> quote_candidates_map;
 	// Candidates for the escape option
 	unordered_map<uint8_t, vector<char>> escape_candidates_map;
-	escape_candidates_map[(uint8_t)QuoteRule::QUOTES_RFC] = {'\0', '\"', '\''};
+	escape_candidates_map[(uint8_t)QuoteRule::QUOTES_RFC] = {'\"', '\'', '\0'};
 	escape_candidates_map[(uint8_t)QuoteRule::QUOTES_OTHER] = {'\\'};
 	escape_candidates_map[(uint8_t)QuoteRule::NO_QUOTES] = {'\0'};
 	// Number of rows read

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp CHANGED Viewed

@@ -97,9 +97,14 @@ void CSVSniffer::DetectHeader() {
 	bool first_row_consistent = true;
 	// check if header row is all null and/or consistent with detected column data types
 	bool first_row_nulls = true;
-	// This case will fail in dialect detection, so we assert here just for sanity
-	D_ASSERT(best_candidate->options.null_padding ||
-	         best_sql_types_candidates_per_column_idx.size() == best_header_row.size());
+	// If null-padding is not allowed and there is a mismatch between our header candidate and the number of columns
+	// We can't detect the dialect/type options properly
+	if (!best_candidate->options.null_padding &&
+	    best_sql_types_candidates_per_column_idx.size() != best_header_row.size()) {
+		throw InvalidInputException(
+		    "Error in file \"%s\": CSV options could not be auto-detected. Consider setting parser options manually.",
+		    options.file_path);
+	}
 	for (idx_t col = 0; col < best_header_row.size(); col++) {
 		auto dummy_val = best_header_row[col];
 		if (!dummy_val.IsNull()) {

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp CHANGED Viewed

@@ -143,20 +143,17 @@ struct SniffValue {
 			machine.rows_read++;
 		}
-		if ((machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
+		if ((machine.previous_state == CSVState::RECORD_SEPARATOR) ||
 		    (machine.state != CSVState::RECORD_SEPARATOR && machine.previous_state == CSVState::CARRIAGE_RETURN)) {
 			sniffed_values[machine.cur_rows].position = machine.line_start_pos;
 			sniffed_values[machine.cur_rows].set = true;
 			machine.line_start_pos = current_pos;
 		}
-		machine.pre_previous_state = machine.previous_state;
-		machine.previous_state = machine.state;
-		machine.state = static_cast<CSVState>(
-		    machine.transition_array[static_cast<uint8_t>(machine.state)][static_cast<uint8_t>(current_char)]);
+		machine.Transition(current_char);
 		bool carriage_return = machine.previous_state == CSVState::CARRIAGE_RETURN;
-		if (machine.previous_state == CSVState::DELIMITER ||
-		    (machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
+		if (machine.previous_state == CSVState::DELIMITER || (machine.previous_state == CSVState::RECORD_SEPARATOR) ||
 		    (machine.state != CSVState::RECORD_SEPARATOR && carriage_return)) {
 			// Started a new value
 			// Check if it's UTF-8
@@ -175,8 +172,7 @@ struct SniffValue {
 		    (machine.state == CSVState::QUOTED && machine.previous_state == CSVState::QUOTED)) {
 			machine.value += current_char;
 		}
-		machine.cur_rows +=
-		    machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE;
+		machine.cur_rows += machine.previous_state == CSVState::RECORD_SEPARATOR;
 		// It means our carriage return is actually a record separator
 		machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return;
 		if (machine.cur_rows >= sniffed_values.size()) {

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp CHANGED Viewed

@@ -3,9 +3,9 @@
 namespace duckdb {
 struct Parse {
 	inline static void Initialize(CSVStateMachine &machine) {
-		machine.state = CSVState::STANDARD;
-		machine.previous_state = CSVState::STANDARD;
-		machine.pre_previous_state = CSVState::STANDARD;
+		machine.state = CSVState::EMPTY_LINE;
+		machine.previous_state = CSVState::EMPTY_LINE;
+		machine.pre_previous_state = CSVState::EMPTY_LINE;
 		machine.cur_rows = 0;
 		machine.column_count = 0;
@@ -14,22 +14,18 @@ struct Parse {
 	inline static bool Process(CSVStateMachine &machine, DataChunk &parse_chunk, char current_char, idx_t current_pos) {
-		machine.pre_previous_state = machine.previous_state;
-		machine.previous_state = machine.state;
-		machine.state = static_cast<CSVState>(
-		    machine.transition_array[static_cast<uint8_t>(machine.state)][static_cast<uint8_t>(current_char)]);
+		machine.Transition(current_char);
 		bool carriage_return = machine.previous_state == CSVState::CARRIAGE_RETURN;
-		if (machine.previous_state == CSVState::DELIMITER ||
-		    (machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
+		if (machine.previous_state == CSVState::DELIMITER || (machine.previous_state == CSVState::RECORD_SEPARATOR) ||
 		    (machine.state != CSVState::RECORD_SEPARATOR && carriage_return)) {
 			// Started a new value
 			// Check if it's UTF-8 (Or not?)
 			machine.VerifyUTF8();
 			auto &v = parse_chunk.data[machine.column_count++];
 			auto parse_data = FlatVector::GetData<string_t>(v);
-			auto &validity_mask = FlatVector::Validity(v);
 			if (machine.value.empty()) {
+				auto &validity_mask = FlatVector::Validity(v);
 				validity_mask.SetInvalid(machine.cur_rows);
 			} else {
 				parse_data[machine.cur_rows] = StringVector::AddStringOrBlob(v, string_t(machine.value));
@@ -50,12 +46,11 @@ struct Parse {
 		    (machine.state == CSVState::QUOTED && machine.previous_state == CSVState::QUOTED)) {
 			machine.value += current_char;
 		}
-		machine.cur_rows +=
-		    machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE;
+		machine.cur_rows += machine.previous_state == CSVState::RECORD_SEPARATOR && machine.column_count > 0;
 		machine.column_count -= machine.column_count * (machine.previous_state == CSVState::RECORD_SEPARATOR);
 		// It means our carriage return is actually a record separator
-		machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return;
+		machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return && machine.column_count > 0;
 		machine.column_count -= machine.column_count * (machine.state != CSVState::RECORD_SEPARATOR && carriage_return);
 		if (machine.cur_rows >= STANDARD_VECTOR_SIZE) {

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp CHANGED Viewed

@@ -14,7 +14,7 @@ void CSVSniffer::ReplaceTypes() {
 		for (idx_t i = 0; i < names.size(); i++) {
 			auto it = best_candidate->options.sql_types_per_column.find(names[i]);
 			if (it != best_candidate->options.sql_types_per_column.end()) {
-				best_sql_types_candidates_per_column_idx[i] = {best_candidate->options.sql_type_list[it->second]};
+				detected_types[i] = best_candidate->options.sql_type_list[it->second];
 				found++;
 			}
 		}
@@ -33,7 +33,7 @@ void CSVSniffer::ReplaceTypes() {
 		                      best_candidate->options.sql_type_list.size(), names.size());
 	}
 	for (idx_t i = 0; i < best_candidate->options.sql_type_list.size(); i++) {
-		best_sql_types_candidates_per_column_idx[i] = {best_candidate->options.sql_type_list[i]};
+		detected_types[i] = best_candidate->options.sql_type_list[i];
 	}
 }
 } // namespace duckdb

package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp CHANGED Viewed

@@ -298,12 +298,10 @@ void PerfectAggregateHashTable::Destroy() {
 	RowOperationsState row_state(*aggregate_allocator);
 	data_ptr_t payload_ptr = data;
 	for (idx_t i = 0; i < total_groups; i++) {
-		if (group_is_set[i]) {
-			data_pointers[count++] = payload_ptr;
-			if (count == STANDARD_VECTOR_SIZE) {
-				RowOperations::DestroyStates(row_state, layout, addresses, count);
-				count = 0;
-			}
+		data_pointers[count++] = payload_ptr;
+		if (count == STANDARD_VECTOR_SIZE) {
+			RowOperations::DestroyStates(row_state, layout, addresses, count);
+			count = 0;
 		}
 		payload_ptr += tuple_size;
 	}

package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp CHANGED Viewed

@@ -261,7 +261,7 @@ idx_t RadixHTConfig::ExternalRadixBits(const idx_t &maximum_sink_radix_bits_p) {
 idx_t RadixHTConfig::SinkCapacity(ClientContext &context) {
 	// Get active and maximum number of threads
 	const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
-	const auto max_threads = DBConfig::GetSystemMaxThreads(FileSystem::GetFileSystem(context));
+	const auto max_threads = DBConfig::GetConfig(context).options.maximum_threads;
 	// Compute cache size per active thread (assuming cache is shared)
 	const auto total_shared_cache_size = max_threads * L3_CACHE_SIZE;

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -38,7 +38,7 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
 	auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
 	//! If we have many csv files, we run single-threaded on each file and parallelize on the number of files
 	bool many_csv_files = files.size() > 1 && int64_t(files.size() * 2) >= number_of_threads;
-	if (options.parallel_mode != ParallelMode::PARALLEL && many_csv_files) {
+	if (options.parallel_mode != ParallelMode::PARALLEL && (many_csv_files || number_of_threads == 1)) {
 		single_threaded = true;
 	}
 	if (options.parallel_mode == ParallelMode::SINGLE_THREADED || not_supported_options ||

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.9.0"
+#define DUCKDB_VERSION "v0.9.1-dev120"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "0d84ccf478"
+#define DUCKDB_SOURCE_ID "af666ad8ba"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp CHANGED Viewed

@@ -1,6 +1,7 @@
 #pragma once
 #include <type_traits>
 #include <cstdint>
+#include <atomic>
 #include "duckdb/common/vector.hpp"
 #include "duckdb/common/unordered_map.hpp"

package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp CHANGED Viewed

@@ -33,7 +33,7 @@ struct ExpressionState {
 public:
 	void AddChild(Expression *expr);
-	void Finalize();
+	void Finalize(bool empty = false);
 	Allocator &GetAllocator();
 	bool HasContext();
 	DUCKDB_API ClientContext &GetContext();

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp CHANGED Viewed

@@ -34,9 +34,9 @@ public:
 	//! CSV Sniffing consists of five steps:
 	//! 1. Dialect Detection: Generate the CSV Options (delimiter, quote, escape, etc.)
 	//! 2. Type Detection: Figures out the types of the columns (For one chunk)
-	//! 3. Header Detection: Figures out if  the CSV file has a header and produces the names of the columns
-	//! 4. Type Replacement: Replaces the types of the columns if the user specified them
-	//! 5. Type Refinement: Refines the types of the columns for the remaining chunks
+	//! 3. Type Refinement: Refines the types of the columns for the remaining chunks
+	//! 4. Header Detection: Figures out if  the CSV file has a header and produces the names of the columns
+	//! 5. Type Replacement: Replaces the types of the columns if the user specified them
 	SnifferResult SniffCSV();
 private:
@@ -50,6 +50,8 @@ private:
 	CSVReaderOptions &options;
 	//! Buffer being used on sniffer
 	shared_ptr<CSVBufferManager> buffer_manager;
+	//! Sets the result options
+	void SetResultOptions();
 	//! ------------------------------------------------------//
 	//! ----------------- Dialect Detection ----------------- //
@@ -105,6 +107,13 @@ private:
 	idx_t best_start_without_header = 0;
 	vector<Value> best_header_row;
+	//! ------------------------------------------------------//
+	//! ------------------ Type Refinement ------------------ //
+	//! ------------------------------------------------------//
+	void RefineTypes();
+	bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
+	vector<LogicalType> detected_types;
 	//! ------------------------------------------------------//
 	//! ------------------ Header Detection ----------------- //
 	//! ------------------------------------------------------//
@@ -117,13 +126,6 @@ private:
 	//! ------------------ Type Replacement ----------------- //
 	//! ------------------------------------------------------//
 	void ReplaceTypes();
-	//! ------------------------------------------------------//
-	//! ------------------ Type Refinement ------------------ //
-	//! ------------------------------------------------------//
-	void RefineTypes();
-	bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
-	vector<LogicalType> detected_types;
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp ADDED Viewed

@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/execution/operator/scan/csv/csv_state.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+#include <cstdint>
+namespace duckdb {
+//! All States of CSV Parsing
+enum class CSVState : uint8_t {
+	STANDARD = 0,         //! Regular unquoted field state
+	DELIMITER = 1,        //! State after encountering a field separator (e.g., ;)
+	RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n)
+	CARRIAGE_RETURN = 3,  //! State after encountering a carriage return(i.e., \r)
+	QUOTED = 4,           //! State when inside a quoted field
+	UNQUOTED = 5,         //! State when leaving a quoted field
+	ESCAPE = 6,           //! State when encountering an escape character (e.g., \)
+	EMPTY_LINE = 7,       //! State when encountering an empty line (i.e., \r\r \n\n, \n\r)
+	INVALID = 8           //! Got to an Invalid State, this should error.
+};
+} // namespace duckdb

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp CHANGED Viewed

@@ -14,19 +14,6 @@
 namespace duckdb {
-//! All States of CSV Parsing
-enum class CSVState : uint8_t {
-	STANDARD = 0,         //! Regular unquoted field state
-	DELIMITER = 1,        //! State after encountering a field separator (e.g., ;)
-	RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n)
-	CARRIAGE_RETURN = 3,  //! State after encountering a carriage return(i.e., \r)
-	QUOTED = 4,           //! State when inside a quoted field
-	UNQUOTED = 5,         //! State when leaving a quoted field
-	ESCAPE = 6,           //! State when encountering an escape character (e.g., \)
-	EMPTY_LINE = 7,       //! State when encountering an empty line (i.e., \r\r \n\n, \n\r)
-	INVALID = 8           //! Got to an Invalid State, this should error.
-};
 //! The CSV State Machine comprises a state transition array (STA).
 //! The STA indicates the current state of parsing based on both the current and preceding characters.
 //! This reveals whether we are dealing with a Field, a New Line, a Delimiter, and so forth.
@@ -38,6 +25,14 @@ public:
 	explicit CSVStateMachine(CSVReaderOptions &options_p, const CSVStateMachineOptions &state_machine_options,
 	                         shared_ptr<CSVBufferManager> buffer_manager_p,
 	                         CSVStateMachineCache &csv_state_machine_cache_p);
+	//! Transition all states to next state, that depends on the current char
+	inline void Transition(char current_char) {
+		pre_previous_state = previous_state;
+		previous_state = state;
+		state = transition_array[state][static_cast<uint8_t>(current_char)];
+	}
 	//! Resets the state machine, so it can be used again
 	void Reset();
@@ -52,7 +47,7 @@ public:
 	idx_t start_row = 0;
 	//! The Transition Array is a Finite State Machine
 	//! It holds the transitions of all states, on all 256 possible different characters
-	const state_machine_t &transition_array;
+	const StateMachine &transition_array;
 	//! Both these variables are used for new line identifier detection
 	bool single_record_separator = false;

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp CHANGED Viewed

@@ -8,14 +8,28 @@
 #pragma once
-#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
+#include "duckdb/execution/operator/scan/csv/csv_state.hpp"
 #include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
+#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
 #include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
 namespace duckdb {
-static constexpr uint32_t NUM_STATES = 9;
-static constexpr uint32_t NUM_TRANSITIONS = 256;
-typedef uint8_t state_machine_t[NUM_STATES][NUM_TRANSITIONS];
+//! Class to wrap the state machine matrix
+class StateMachine {
+public:
+	static constexpr uint32_t NUM_STATES = 9;
+	static constexpr uint32_t NUM_TRANSITIONS = 256;
+	CSVState state_machine[NUM_STATES][NUM_TRANSITIONS];
+	const CSVState *operator[](CSVState state) const {
+		return state_machine[static_cast<uint8_t>(state)];
+	}
+	CSVState *operator[](CSVState state) {
+		return state_machine[static_cast<uint8_t>(state)];
+	}
+};
 //! Hash function used in out state machine cache, it hashes and combines all options used to generate a state machine
 struct HashCSVStateMachineConfig {
@@ -36,12 +50,12 @@ public:
 	~CSVStateMachineCache() {};
 	//! Gets a state machine from the cache, if it's not from one the default options
 	//! It first caches it, then returns it.
-	const state_machine_t &Get(const CSVStateMachineOptions &state_machine_options);
+	const StateMachine &Get(const CSVStateMachineOptions &state_machine_options);
 private:
 	void Insert(const CSVStateMachineOptions &state_machine_options);
 	//! Cache on delimiter|quote|escape
-	unordered_map<CSVStateMachineOptions, state_machine_t, HashCSVStateMachineConfig> state_machine_cache;
+	unordered_map<CSVStateMachineOptions, StateMachine, HashCSVStateMachineConfig> state_machine_cache;
 	//! Default value for options used to intialize CSV State Machine Cache
 	const vector<char> default_delimiter = {',', '|', ';', '\t'};
 	const vector<vector<char>> default_quote = {{'\"'}, {'\"', '\''}, {'\0'}};

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp CHANGED Viewed

@@ -148,7 +148,7 @@ private:
 	//! Sets Position depending on the byte_start of this thread
 	bool SetPosition();
 	//! Called when scanning the 1st buffer, skips empty lines
-	void SkipEmptyLines();
+	bool SkipEmptyLines();
 	//! When a buffer finishes reading its piece, it still can try to scan up to the real end of the buffer
 	//! Up to finding a new line. This function sets the buffer_end and marks a boolean variable
 	//! when changing the buffer end the first time.

package/src/duckdb/src/include/duckdb.h CHANGED Viewed

@@ -317,7 +317,7 @@ typedef enum {
 //===--------------------------------------------------------------------===//
 /*!
-Creates a new database or opens an existing database file stored at the the given path.
+Creates a new database or opens an existing database file stored at the given path.
 If no path is given a new in-memory database is created instead.
 The instantiated database should be closed with 'duckdb_close'
@@ -328,7 +328,7 @@ The instantiated database should be closed with 'duckdb_close'
 DUCKDB_API duckdb_state duckdb_open(const char *path, duckdb_database *out_database);
 /*!
-Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the the given path.
+Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the given path.
 * path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.
 * out_database: The result database object.
@@ -1009,7 +1009,7 @@ Binds an int64_t value to the prepared statement at the specified index.
 DUCKDB_API duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val);
 /*!
-Binds an duckdb_hugeint value to the prepared statement at the specified index.
+Binds a duckdb_hugeint value to the prepared statement at the specified index.
 */
 DUCKDB_API duckdb_state duckdb_bind_hugeint(duckdb_prepared_statement prepared_statement, idx_t param_idx,
                                             duckdb_hugeint val);
@@ -1040,12 +1040,12 @@ Binds an uint64_t value to the prepared statement at the specified index.
 DUCKDB_API duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val);
 /*!
-Binds an float value to the prepared statement at the specified index.
+Binds a float value to the prepared statement at the specified index.
 */
 DUCKDB_API duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val);
 /*!
-Binds an double value to the prepared statement at the specified index.
+Binds a double value to the prepared statement at the specified index.
 */
 DUCKDB_API duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val);

package/src/duckdb/src/main/extension/extension_helper.cpp CHANGED Viewed

@@ -196,6 +196,9 @@ string ExtensionHelper::AddExtensionInstallHintToErrorMsg(ClientContext &context
 }
 bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string &extension_name) noexcept {
+	if (context.db->ExtensionIsLoaded(extension_name)) {
+		return true;
+	}
 	auto &dbconfig = DBConfig::GetConfig(context);
 	try {
 		if (dbconfig.options.autoinstall_known_extensions) {
@@ -211,6 +214,10 @@ bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string
 }
 void ExtensionHelper::AutoLoadExtension(ClientContext &context, const string &extension_name) {
+	if (context.db->ExtensionIsLoaded(extension_name)) {
+		// Avoid downloading again
+		return;
+	}
 	auto &dbconfig = DBConfig::GetConfig(context);
 	try {
 #ifndef DUCKDB_WASM

package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp CHANGED Viewed

@@ -38,8 +38,8 @@ void CommonAggregateOptimizer::ExtractCommonAggregates(LogicalAggregate &aggr) {
 			// aggregate does not exist yet: add it to the map
 			aggregate_remap[*aggr.expressions[i]] = i;
 			if (i != original_index) {
-				// this aggregate is not erased, however an agregate BEFORE it has been erased
-				// so we need to remap this aggregaet
+				// this aggregate is not erased, however an aggregate BEFORE it has been erased
+				// so we need to remap this aggregate
 				ColumnBinding original_binding(aggr.aggregate_index, original_index);
 				ColumnBinding new_binding(aggr.aggregate_index, i);
 				aggregate_map[original_binding] = new_binding;

package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp CHANGED Viewed

@@ -135,6 +135,9 @@ unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(ClientContext &con
 	bool need_to_consider_arbitrary_expressions = true;
 	switch (reftype) {
 	case JoinRefType::ASOF: {
+		if (!arbitrary_expressions.empty()) {
+			throw BinderException("Invalid ASOF JOIN condition");
+		}
 		need_to_consider_arbitrary_expressions = false;
 		auto asof_idx = conditions.size();
 		for (size_t c = 0; c < conditions.size(); ++c) {