npm - duckdb - Versions diffs - 1.1.0 → 1.1.1 - Mend

duckdb 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp CHANGED Viewed

@@ -46,7 +46,7 @@ string DialectCandidates::Print() {
 	}
 	search_space << "\n";
 	search_space << "Quote/Escape Candidates: ";
-	for (uint8_t i = 0; i < static_cast<uint8_t>(quoterule_candidates.size()); i++) {
+	for (uint8_t i = 0; i < static_cast<uint8_t>(quote_rule_candidates.size()); i++) {
 		auto quote_candidate = quote_candidates_map[i];
 		auto escape_candidate = escape_candidates_map[i];
 		for (idx_t j = 0; j < quote_candidate.size(); j++) {
@@ -60,7 +60,7 @@ string DialectCandidates::Print() {
 				search_space << ",";
 			}
 		}
-		if (i < quoterule_candidates.size() - 1) {
+		if (i < quote_rule_candidates.size() - 1) {
 			search_space << ",";
 		}
 	}
@@ -111,7 +111,7 @@ DialectCandidates::DialectCandidates(const CSVStateMachineOptions &options) {
 		for (auto &quote_rule : default_quote_rule) {
 			quote_candidates_map[static_cast<uint8_t>(quote_rule)] = {options.quote.GetValue()};
 		}
-		// also add it as a escape rule
+		// also add it as an escape rule
 		if (!IsQuoteDefault(options.quote.GetValue())) {
 			escape_candidates_map[static_cast<uint8_t>(QuoteRule::QUOTES_RFC)].emplace_back(options.quote.GetValue());
 		}
@@ -124,14 +124,14 @@ DialectCandidates::DialectCandidates(const CSVStateMachineOptions &options) {
 	if (options.escape.IsSetByUser()) {
 		// user provided escape: use that escape rule
 		if (options.escape == '\0') {
-			quoterule_candidates = {QuoteRule::QUOTES_RFC};
+			quote_rule_candidates = {QuoteRule::QUOTES_RFC};
 		} else {
-			quoterule_candidates = {QuoteRule::QUOTES_OTHER};
+			quote_rule_candidates = {QuoteRule::QUOTES_OTHER};
 		}
-		escape_candidates_map[static_cast<uint8_t>(quoterule_candidates[0])] = {options.escape.GetValue()};
+		escape_candidates_map[static_cast<uint8_t>(quote_rule_candidates[0])] = {options.escape.GetValue()};
 	} else {
 		// no escape provided: try standard/common escapes
-		quoterule_candidates = default_quote_rule;
+		quote_rule_candidates = default_quote_rule;
 	}
 }
@@ -146,12 +146,12 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
 	}
 	CSVIterator first_iterator;
 	bool iterator_set = false;
-	for (const auto quoterule : dialect_candidates.quoterule_candidates) {
-		const auto &quote_candidates = dialect_candidates.quote_candidates_map.at(static_cast<uint8_t>(quoterule));
+	for (const auto quote_rule : dialect_candidates.quote_rule_candidates) {
+		const auto &quote_candidates = dialect_candidates.quote_candidates_map.at(static_cast<uint8_t>(quote_rule));
 		for (const auto &quote : quote_candidates) {
 			for (const auto &delimiter : dialect_candidates.delim_candidates) {
 				const auto &escape_candidates =
-				    dialect_candidates.escape_candidates_map.at(static_cast<uint8_t>(quoterule));
+				    dialect_candidates.escape_candidates_map.at(static_cast<uint8_t>(quote_rule));
 				for (const auto &escape : escape_candidates) {
 					for (const auto &comment : dialect_candidates.comment_candidates) {
 						D_ASSERT(buffer_manager);
@@ -181,7 +181,7 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
 // Returns true if a comment is acceptable
 bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool comment_set_by_user) {
-	// For a comment to be acceptable, we want 3/5th's majority of unmatches in the columns
+	// For a comment to be acceptable, we want 3/5th's the majority of unmatched in the columns
 	constexpr double min_majority = 0.6;
 	// detected comments, are all lines that started with a comment character.
 	double detected_comments = 0;
@@ -226,6 +226,12 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	}
 	idx_t consistent_rows = 0;
 	idx_t num_cols = sniffed_column_counts.result_position == 0 ? 1 : sniffed_column_counts[0].number_of_columns;
+	const bool ignore_errors = options.ignore_errors.GetValue();
+	// If we are ignoring errors and not null_padding , we pick the most frequent number of columns as the right one
+	bool use_most_frequent_columns = ignore_errors && !options.null_padding;
+	if (use_most_frequent_columns) {
+		num_cols = sniffed_column_counts.GetMostFrequentColumnCount();
+	}
 	idx_t padding_count = 0;
 	idx_t comment_rows = 0;
 	idx_t ignored_rows = 0;
@@ -234,7 +240,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	if (sniffed_column_counts.result_position > rows_read) {
 		rows_read = sniffed_column_counts.result_position;
 	}
-	if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, options.ignore_errors.GetValue(),
+	if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, ignore_errors,
 	                                        sniffed_column_counts[0].last_value_always_empty)) {
 		// Not acceptable
 		return;
@@ -242,8 +248,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	idx_t header_idx = 0;
 	for (idx_t row = 0; row < sniffed_column_counts.result_position; row++) {
 		if (set_columns.IsCandidateUnacceptable(sniffed_column_counts[row].number_of_columns, options.null_padding,
-		                                        options.ignore_errors.GetValue(),
-		                                        sniffed_column_counts[row].last_value_always_empty)) {
+		                                        ignore_errors, sniffed_column_counts[row].last_value_always_empty)) {
 			// Not acceptable
 			return;
 		}
@@ -258,7 +263,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 			consistent_rows++;
 		} else if (num_cols < sniffed_column_counts[row].number_of_columns &&
 		           (!options.dialect_options.skip_rows.IsSetByUser() || comment_rows > 0) &&
-		           (!set_columns.IsSet() || options.null_padding)) {
+		           (!set_columns.IsSet() || options.null_padding) && (!first_valid || (!use_most_frequent_columns))) {
 			// all rows up to this point will need padding
 			if (!first_valid) {
 				first_valid = true;
@@ -268,15 +273,14 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 			// we use the maximum amount of num_cols that we find
 			num_cols = sniffed_column_counts[row].number_of_columns;
 			dirty_notes = row;
-			// sniffed_column_counts.state_machine.dialect_options.rows_until_header = dirty_notes;
 			dirty_notes_minus_comments = dirty_notes - comment_rows;
 			header_idx = row;
 			consistent_rows = 1;
-		} else if (sniffed_column_counts[row].number_of_columns == num_cols ||
-		           (options.ignore_errors.GetValue() && !options.null_padding)) {
+		} else if (sniffed_column_counts[row].number_of_columns == num_cols || (use_most_frequent_columns)) {
 			if (!first_valid) {
 				first_valid = true;
 				sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
+				dirty_notes = row;
 			}
 			if (sniffed_column_counts[row].number_of_columns != num_cols) {
 				ignored_rows++;
@@ -404,7 +408,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	}
 }
-bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) {
+bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) const {
 	auto &sniffed_column_counts = candidate.ParseChunk();
 	for (idx_t i = 0; i < sniffed_column_counts.result_position; i++) {
 		if (set_columns.IsSet()) {

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp CHANGED Viewed

@@ -1,7 +1,6 @@
 #include "duckdb/common/types/cast_helpers.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
-#include "duckdb/common/types/value.hpp"
 #include "utf8proc.hpp"
@@ -32,8 +31,7 @@ static string TrimWhitespace(const string &col_name) {
 	}
 	// Find the last character that is not right trimmed
-	idx_t end;
-	end = begin;
+	idx_t end = begin;
 	for (auto next = begin; next < col_name.size();) {
 		auto bytes = utf8proc_iterate(str + next, NumericCast<utf8proc_ssize_t>(size - next), &codepoint);
 		D_ASSERT(bytes > 0);
@@ -90,7 +88,9 @@ static string NormalizeColumnName(const string &col_name) {
 	}
 	// prepend _ if name starts with a digit or is a reserved keyword
-	if (KeywordHelper::IsKeyword(col_name_cleaned) || (col_name_cleaned[0] >= '0' && col_name_cleaned[0] <= '9')) {
+	auto keyword = KeywordHelper::KeywordCategoryType(col_name_cleaned);
+	if (keyword == KeywordCategory::KEYWORD_TYPE_FUNC || keyword == KeywordCategory::KEYWORD_RESERVED ||
+	    (col_name_cleaned[0] >= '0' && col_name_cleaned[0] <= '9')) {
 		col_name_cleaned = "_" + col_name_cleaned;
 	}
 	return col_name_cleaned;
@@ -98,10 +98,9 @@ static string NormalizeColumnName(const string &col_name) {
 // If our columns were set by the user, we verify if their names match with the first row
 bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<HeaderValue> &best_header_row,
-                                           SetColumns &set_columns, CSVReaderOptions &options) {
+                                           const SetColumns &set_columns, CSVReaderOptions &options) {
 	bool has_header = true;
-	bool all_varchar = true;
-	bool first_row_consistent = true;
 	std::ostringstream error;
 	// User set the names, we must check if they match the first row
 	// We do a +1 to check for situations where the csv file has an extra all null column
@@ -125,6 +124,8 @@ bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<Header
 	}
 	if (!has_header) {
+		bool all_varchar = true;
+		bool first_row_consistent = true;
 		// We verify if the types are consistent
 		for (idx_t col = 0; col < set_columns.Size(); col++) {
 			// try cast to sql_type of column
@@ -168,7 +169,7 @@ bool EmptyHeader(const string &col_name, bool is_null, bool normalize) {
 vector<string>
 CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &best_header_row,
-                                 CSVStateMachine &state_machine, SetColumns &set_columns,
+                                 CSVStateMachine &state_machine, const SetColumns &set_columns,
                                  unordered_map<idx_t, vector<LogicalType>> &best_sql_types_candidates_per_column_idx,
                                  CSVReaderOptions &options, CSVErrorHandler &error_handler) {
 	vector<string> detected_names;
@@ -187,9 +188,7 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
 		return detected_names;
 	}
 	// information for header detection
-	bool first_row_consistent = true;
 	// check if header row is all null and/or consistent with detected column data types
-	bool first_row_nulls = true;
 	// If null-padding is not allowed and there is a mismatch between our header candidate and the number of columns
 	// We can't detect the dialect/type options properly
 	if (!options.null_padding && best_sql_types_candidates_per_column_idx.size() != best_header_row.size()) {
@@ -198,12 +197,14 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
 		                                  state_machine.dialect_options.state_machine_options.delimiter.GetValue());
 		error_handler.Error(error);
 	}
-	bool all_varchar = true;
 	bool has_header;
 	if (set_columns.IsSet()) {
 		has_header = DetectHeaderWithSetColumn(context, best_header_row, set_columns, options);
 	} else {
+		bool first_row_consistent = true;
+		bool all_varchar = true;
+		bool first_row_nulls = true;
 		for (idx_t col = 0; col < best_header_row.size(); col++) {
 			if (!best_header_row[col].IsNull()) {
 				first_row_nulls = false;

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp CHANGED Viewed

@@ -103,6 +103,10 @@ bool CSVSniffer::CanYouCastIt(ClientContext &context, const string_t value, cons
 	auto value_ptr = value.GetData();
 	auto value_size = value.GetSize();
 	switch (type.id()) {
+	case LogicalTypeId::BOOLEAN: {
+		bool dummy_value;
+		return TryCastStringBool(value_ptr, value_size, dummy_value, true);
+	}
 	case LogicalTypeId::TINYINT: {
 		int8_t dummy_value;
 		return TrySimpleIntegerCast(value_ptr, value_size, dummy_value, false);
@@ -251,19 +255,20 @@ void CSVSniffer::InitializeDateAndTimeStampDetection(CSVStateMachine &candidate,
 		auto user_format = options.dialect_options.date_format.find(sql_type.id());
 		if (user_format->second.IsSetByUser()) {
 			format_candidate.format.emplace_back(user_format->second.GetValue().format_specifier);
-		}
-		// order by preference
-		auto entry = format_template_candidates.find(sql_type.id());
-		if (entry != format_template_candidates.end()) {
-			const auto &format_template_list = entry->second;
-			for (const auto &t : format_template_list) {
-				const auto format_string = GenerateDateFormat(separator, t);
-				// don't parse ISO 8601
-				if (format_string.find("%Y-%m-%d") == string::npos) {
-					format_candidate.format.emplace_back(format_string);
+		} else {
+			auto entry = format_template_candidates.find(sql_type.id());
+			if (entry != format_template_candidates.end()) {
+				const auto &format_template_list = entry->second;
+				for (const auto &t : format_template_list) {
+					const auto format_string = GenerateDateFormat(separator, t);
+					// don't parse ISO 8601
+					if (format_string.find("%Y-%m-%d") == string::npos) {
+						format_candidate.format.emplace_back(format_string);
+					}
 				}
 			}
 		}
+		// order by preference
 		original_format_candidates = format_candidates;
 	}
 	//	initialise the first candidate
@@ -290,7 +295,8 @@ void CSVSniffer::DetectDateAndTimeStampFormats(CSVStateMachine &candidate, const
 	bool had_format_candidates = !save_format_candidates.empty();
 	bool initial_format_candidates =
 	    save_format_candidates.size() == original_format_candidates.at(sql_type.id()).format.size();
-	while (!type_format_candidates.empty()) {
+	bool is_set_by_user = options.dialect_options.date_format.find(sql_type.id())->second.IsSetByUser();
+	while (!type_format_candidates.empty() && !is_set_by_user) {
 		//	avoid using exceptions for flow control...
 		auto &current_format = candidate.dialect_options.date_format[sql_type.id()].GetValue();
 		if (current_format.Parse(dummy_val, result, true)) {
@@ -341,7 +347,7 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
 			// try cast from string to sql_type
 			while (col_type_candidates.size() > 1) {
 				const auto &sql_type = col_type_candidates.back();
-				// try formatting for date types if the user did not specify one and it starts with numeric
+				// try formatting for date types if the user did not specify one, and it starts with numeric
 				// values.
 				string separator;
 				// If Value is not Null, Has a numeric date format, and the current investigated candidate is
@@ -382,7 +388,7 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
 }
 // If we have a predefined date/timestamp format we set it
-void CSVSniffer::SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) {
+void CSVSniffer::SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) const {
 	const vector<LogicalTypeId> data_time_formats {LogicalTypeId::DATE, LogicalTypeId::TIMESTAMP};
 	for (auto &date_time_format : data_time_formats) {
 		auto &user_option = options.dialect_options.date_format.at(date_time_format);
@@ -423,7 +429,7 @@ void CSVSniffer::DetectTypes() {
 					}
 				}
 			}
-			if (break_loop) {
+			if (break_loop && !candidate->state_machine->options.ignore_errors.GetValue()) {
 				continue;
 			}
 		}

package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp CHANGED Viewed

@@ -22,7 +22,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 	auto &transition_array = state_machine_cache[state_machine_options];
 	for (uint32_t i = 0; i < StateMachine::NUM_STATES; i++) {
-		CSVState cur_state = CSVState(i);
+		CSVState cur_state = static_cast<CSVState>(i);
 		switch (cur_state) {
 		case CSVState::QUOTED:
 		case CSVState::QUOTED_NEW_LINE:
@@ -234,11 +234,11 @@ CSVStateMachineCache::CSVStateMachineCache() {
 	auto default_delimiter = DialectCandidates::GetDefaultDelimiter();
 	auto default_comment = DialectCandidates::GetDefaultComment();
-	for (auto quoterule : default_quote_rule) {
-		const auto &quote_candidates = default_quote[static_cast<uint8_t>(quoterule)];
+	for (auto quote_rule : default_quote_rule) {
+		const auto &quote_candidates = default_quote[static_cast<uint8_t>(quote_rule)];
 		for (const auto &quote : quote_candidates) {
 			for (const auto &delimiter : default_delimiter) {
-				const auto &escape_candidates = default_escape[static_cast<uint8_t>(quoterule)];
+				const auto &escape_candidates = default_escape[static_cast<uint8_t>(quote_rule)];
 				for (const auto &escape : escape_candidates) {
 					for (const auto &comment : default_comment) {
 						Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N});

package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp CHANGED Viewed

@@ -302,7 +302,9 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
 	std::ostringstream error;
 	error << "Value with unterminated quote found." << '\n';
 	std::ostringstream how_to_fix_it;
-	how_to_fix_it << "Possible Solution: Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
+	how_to_fix_it << "Possible fixes:" << '\n';
+	how_to_fix_it << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
+	how_to_fix_it << "* Set quote do empty or to a different value (e.g., quote=\'\')" << '\n';
 	return CSVError(error.str(), UNTERMINATED_QUOTES, current_column, csv_row, error_info, row_byte_position,
 	                byte_position, options, how_to_fix_it.str(), current_path);
 }

package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp CHANGED Viewed

@@ -618,7 +618,12 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
 				if (tail_count < result_count) {
 					result_count = tail_count;
-					chunk.Slice(*sel, result_count);
+					if (result_count == 0) {
+						// Need to reset here otherwise we may use the non-flat chunk when constructing LEFT/OUTER
+						chunk.Reset();
+					} else {
+						chunk.Slice(*sel, result_count);
+					}
 				}
 			}

package/src/duckdb/src/function/cast/decimal_cast.cpp CHANGED Viewed

@@ -114,11 +114,42 @@ struct DecimalScaleDownOperator {
 	}
 };
+// This function detects if we can scale a decimal down to another.
+template <class INPUT_TYPE>
+bool CanScaleDownDecimal(INPUT_TYPE input, DecimalScaleInput<INPUT_TYPE> &data) {
+	int64_t divisor = UnsafeNumericCast<int64_t>(NumericHelper::POWERS_OF_TEN[data.source_scale]);
+	auto value = input % divisor;
+	auto rounded_input = input;
+	if (rounded_input < 0) {
+		rounded_input *= -1;
+		value *= -1;
+	}
+	if (value >= divisor / 2) {
+		rounded_input += divisor;
+	}
+	return rounded_input < data.limit && rounded_input > -data.limit;
+}
+template <>
+bool CanScaleDownDecimal<hugeint_t>(hugeint_t input, DecimalScaleInput<hugeint_t> &data) {
+	auto divisor = UnsafeNumericCast<hugeint_t>(Hugeint::POWERS_OF_TEN[data.source_scale]);
+	hugeint_t value = input % divisor;
+	hugeint_t rounded_input = input;
+	if (rounded_input < 0) {
+		rounded_input *= -1;
+		value *= -1;
+	}
+	if (value >= divisor / 2) {
+		rounded_input += divisor;
+	}
+	return rounded_input < data.limit && rounded_input > -data.limit;
+}
 struct DecimalScaleDownCheckOperator {
 	template <class INPUT_TYPE, class RESULT_TYPE>
 	static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) {
-		auto data = (DecimalScaleInput<INPUT_TYPE> *)dataptr;
-		if (input >= data->limit || input <= -data->limit) {
+		auto data = static_cast<DecimalScaleInput<INPUT_TYPE> *>(dataptr);
+		if (!CanScaleDownDecimal(input, *data)) {
 			auto error = StringUtil::Format("Casting value \"%s\" to type %s failed: value is out of range!",
 			                                Decimal::ToString(input, data->source_width, data->source_scale),
 			                                data->result.GetType().ToString());
@@ -145,7 +176,6 @@ bool TemplatedDecimalScaleDown(Vector &source, Vector &result, idx_t count, Cast
 		return true;
 	} else {
 		// type might not fit: check limit
 		auto limit = UnsafeNumericCast<SOURCE>(POWERS_SOURCE::POWERS_OF_TEN[target_width]);
 		DecimalScaleInput<SOURCE> input(result, limit, divide_factor, parameters, source_width, source_scale);
 		UnaryExecutor::GenericExecute<SOURCE, DEST, DecimalScaleDownCheckOperator>(source, result, count, &input,

package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp CHANGED Viewed

@@ -43,6 +43,15 @@ bool ArrowType::RunEndEncoded() const {
 	return run_end_encoded;
 }
+void ArrowType::ThrowIfInvalid() const {
+	if (type.id() == LogicalTypeId::INVALID) {
+		if (not_implemented) {
+			throw NotImplementedException(error_message);
+		}
+		throw InvalidInputException(error_message);
+	}
+}
 LogicalType ArrowType::GetDuckType(bool use_dictionary) const {
 	if (use_dictionary && dictionary_type) {
 		return dictionary_type->GetDuckType();

package/src/duckdb/src/function/table/arrow.cpp CHANGED Viewed

@@ -35,9 +35,11 @@ static unique_ptr<ArrowType> GetArrowExtensionType(const ArrowSchemaMetadata &ex
 	// Check for arrow canonical extensions
 	if (arrow_extension == "arrow.uuid") {
 		if (format != "w:16") {
-			throw InvalidInputException(
-			    "arrow.uuid must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly defined as: %s",
-			    format);
+			std::ostringstream error;
+			error
+			    << "arrow.uuid must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly defined as:"
+			    << format;
+			return make_uniq<ArrowType>(error.str());
 		}
 		return make_uniq<ArrowType>(LogicalType::UUID);
 	} else if (arrow_extension == "arrow.json") {
@@ -49,40 +51,47 @@ static unique_ptr<ArrowType> GetArrowExtensionType(const ArrowSchemaMetadata &ex
 		} else if (format == "vu") {
 			return make_uniq<ArrowType>(LogicalType::JSON(), make_uniq<ArrowStringInfo>(ArrowVariableSizeType::VIEW));
 		} else {
-			throw InvalidInputException("arrow.json must be of a varchar format (i.e., \'u\',\'U\' or \'vu\'). It is "
-			                            "incorrectly defined as: %s",
-			                            format);
+			std::ostringstream error;
+			error
+			    << "arrow.json must be of a varchar format (i.e., \'u\',\'U\' or \'vu\'). It is incorrectly defined as:"
+			    << format;
+			return make_uniq<ArrowType>(error.str());
 		}
 	}
 	// Check for DuckDB canonical extensions
 	else if (arrow_extension == "duckdb.hugeint") {
 		if (format != "w:16") {
-			throw InvalidInputException("duckdb.hugeint must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It "
-			                            "is incorrectly defined as: %s",
-			                            format);
+			std::ostringstream error;
+			error << "duckdb.hugeint must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly "
+			         "defined as:"
+			      << format;
+			return make_uniq<ArrowType>(error.str());
 		}
 		return make_uniq<ArrowType>(LogicalType::HUGEINT);
 	} else if (arrow_extension == "duckdb.uhugeint") {
 		if (format != "w:16") {
-			throw InvalidInputException("duckdb.hugeint must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It "
-			                            "is incorrectly defined as: %s",
-			                            format);
+			std::ostringstream error;
+			error << "duckdb.uhugeint must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly "
+			         "defined as:"
+			      << format;
+			return make_uniq<ArrowType>(error.str());
 		}
 		return make_uniq<ArrowType>(LogicalType::UHUGEINT);
 	} else if (arrow_extension == "duckdb.time_tz") {
 		if (format != "w:8") {
-			throw InvalidInputException("duckdb.time_tz must be a fixed-size binary of 8 bytes (i.e., \'w:8\'). It "
-			                            "is incorrectly defined as: %s",
-			                            format);
+			std::ostringstream error;
+			error << "duckdb.time_tz must be a fixed-size binary of 8 bytes (i.e., \'w:8\'). It is incorrectly defined "
+			         "as:"
+			      << format;
+			return make_uniq<ArrowType>(error.str());
 		}
 		return make_uniq<ArrowType>(LogicalType::TIME_TZ,
 		                            make_uniq<ArrowDateTimeInfo>(ArrowDateTimeType::MICROSECONDS));
 	} else if (arrow_extension == "duckdb.bit") {
 		if (format != "z" && format != "Z") {
-			throw InvalidInputException("duckdb.bit must be a blob (i.e., \'z\' or \'Z\'). It "
-			                            "is incorrectly defined as: %s",
-			                            format);
+			std::ostringstream error;
+			error << "duckdb.bit must be a blob (i.e., \'z\' or \'Z\'). It is incorrectly defined as:" << format;
+			return make_uniq<ArrowType>(error.str());
 		} else if (format == "z") {
 			auto type_info = make_uniq<ArrowStringInfo>(ArrowVariableSizeType::NORMAL);
 			return make_uniq<ArrowType>(LogicalType::BIT, std::move(type_info));
@@ -91,9 +100,10 @@ static unique_ptr<ArrowType> GetArrowExtensionType(const ArrowSchemaMetadata &ex
 		return make_uniq<ArrowType>(LogicalType::BIT, std::move(type_info));
 	} else {
-		throw NotImplementedException(
-		    "Arrow Type with extension name: %s and format: %s, is not currently supported in DuckDB ", arrow_extension,
-		    format);
+		std::ostringstream error;
+		error << "Arrow Type with extension name: " << arrow_extension << " and format: " << format
+		      << ", is not currently supported in DuckDB.";
+		return make_uniq<ArrowType>(error.str(), true);
 	}
 }
 static unique_ptr<ArrowType> GetArrowLogicalTypeNoDictionary(ArrowSchema &schema) {
@@ -384,10 +394,12 @@ unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData
 	//! Generate Projection Pushdown Vector
 	ArrowStreamParameters parameters;
 	D_ASSERT(!column_ids.empty());
+	auto &arrow_types = function.arrow_table.GetColumns();
 	for (idx_t idx = 0; idx < column_ids.size(); idx++) {
 		auto col_idx = column_ids[idx];
 		if (col_idx != COLUMN_IDENTIFIER_ROW_ID) {
 			auto &schema = *function.schema_root.arrow_schema.children[col_idx];
+			arrow_types.at(col_idx)->ThrowIfInvalid();
 			parameters.projected_columns.projection_map[idx] = schema.name;
 			parameters.projected_columns.columns.emplace_back(schema.name);
 			parameters.projected_columns.filter_to_col[idx] = col_idx;

package/src/duckdb/src/function/table/sniff_csv.cpp CHANGED Viewed

@@ -96,6 +96,9 @@ string FormatOptions(char opt) {
 	if (opt == '\'') {
 		return "''";
 	}
+	if (opt == '\0') {
+		return "";
+	}
 	string result;
 	result += opt;
 	return result;
@@ -214,7 +217,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
 		         << "'" << separator;
 	}
 	// 11.2. Quote
-	if (!sniffer_options.dialect_options.header.IsSetByUser()) {
+	if (!sniffer_options.dialect_options.state_machine_options.quote.IsSetByUser()) {
 		csv_read << "quote="
 		         << "'" << FormatOptions(sniffer_options.dialect_options.state_machine_options.quote.GetValue()) << "'"
 		         << separator;

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "0"
+#define DUCKDB_PATCH_VERSION "1"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.0"
+#define DUCKDB_VERSION "v1.1.1"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "fa5c2fe15f"
+#define DUCKDB_SOURCE_ID "af39bd0dcf"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp ADDED Viewed

@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/common/arrow/arrow_util.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+#include "duckdb/common/arrow/arrow.hpp"
+#include "duckdb/main/chunk_scan_state.hpp"
+#include "duckdb/main/client_properties.hpp"
+#include "duckdb/common/helper.hpp"
+#include "duckdb/common/error_data.hpp"
+namespace duckdb {
+class QueryResult;
+class DataChunk;
+class ArrowUtil {
+public:
+	static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
+	                          idx_t &result_count, ErrorData &error);
+	static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
+private:
+	static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, ErrorData &error);
+};
+} // namespace duckdb

package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp CHANGED Viewed

@@ -9,14 +9,9 @@
 #pragma once
 #include "duckdb/common/arrow/arrow.hpp"
 #include "duckdb/common/helper.hpp"
-#include "duckdb/common/error_data.hpp"
-#include "duckdb/main/chunk_scan_state.hpp"
-#include "duckdb/main/client_properties.hpp"
 //! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp)
 namespace duckdb {
-class QueryResult;
-class DataChunk;
 class ArrowSchemaWrapper {
 public:
@@ -49,23 +44,14 @@ public:
 public:
 	void GetSchema(ArrowSchemaWrapper &schema);
-	shared_ptr<ArrowArrayWrapper> GetNextChunk();
+	virtual shared_ptr<ArrowArrayWrapper> GetNextChunk();
 	const char *GetError();
-	~ArrowArrayStreamWrapper();
+	virtual ~ArrowArrayStreamWrapper();
 	ArrowArrayStreamWrapper() {
 		arrow_array_stream.release = nullptr;
 	}
 };
-class ArrowUtil {
-public:
-	static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
-	                          idx_t &result_count, ErrorData &error);
-	static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
-private:
-	static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, ErrorData &error);
-};
 } // namespace duckdb