npm - duckdb - Versions diffs - 0.8.2-dev4653.0 → 0.8.2-dev4711.0 - Mend

duckdb 0.8.2-dev4653.0 → 0.8.2-dev4711.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev4653.0",
+  "version": "0.8.2-dev4711.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/src/common/types/row/tuple_data_collection.cpp CHANGED Viewed

@@ -433,6 +433,13 @@ bool TupleDataCollection::Scan(TupleDataParallelScanState &gstate, TupleDataLoca
 	return true;
 }
+bool TupleDataCollection::ScanComplete(const TupleDataScanState &state) const {
+	if (Count() == 0) {
+		return true;
+	}
+	return state.segment_index == segments.size() - 1 && state.chunk_index == segments.back().ChunkCount();
+}
 void TupleDataCollection::FinalizePinState(TupleDataPinState &pin_state, TupleDataSegment &segment) {
 	segment.allocator->ReleaseOrStoreHandles(pin_state, segment);
 }

package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp CHANGED Viewed

@@ -782,13 +782,13 @@ public:
 		}
 		auto &ht_state = op.sink_state->Cast<HashAggregateGlobalSinkState>();
-		idx_t count = 0;
+		idx_t partitions = 0;
 		for (size_t sidx = 0; sidx < op.groupings.size(); ++sidx) {
 			auto &grouping = op.groupings[sidx];
 			auto &grouping_gstate = ht_state.grouping_states[sidx];
-			count += grouping.table_data.Count(*grouping_gstate.table_state);
+			partitions += grouping.table_data.NumberOfPartitions(*grouping_gstate.table_state);
 		}
-		return MaxValue<idx_t>(1, count / STANDARD_VECTOR_SIZE);
+		return MaxValue<idx_t>(1, partitions);
 	}
 };

package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp CHANGED Viewed

@@ -263,7 +263,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 		return true;
 	}
-	if (mode == ParserMode::SNIFFING_DATATYPES && parse_chunk.size() == options.sample_chunk_size) {
+	if (mode == ParserMode::SNIFFING_DATATYPES) {
 		return true;
 	}
@@ -480,6 +480,10 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
 				bool was_already_null = FlatVector::IsNull(parse_vector, row_idx);
 				if (!was_already_null && FlatVector::IsNull(result_vector, row_idx)) {
+					Increment(buffer_idx);
+					auto bla = GetLineError(global_row_idx, buffer_idx, false);
+					row_idx += bla;
+					row_idx -= bla;
 					row_failed = true;
 					failed_cells.emplace_back(row_idx, col_idx, row_line);
 				}

package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp CHANGED Viewed

@@ -8,10 +8,14 @@ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle
     : context(context), first_buffer(true), file_number(file_number_p), can_seek(file_handle.CanSeek()) {
 	AllocateBuffer(buffer_size_p);
 	auto buffer = Ptr();
-	file_size = file_handle.Read(buffer, buffer_size_p);
+	actual_buffer_size = file_handle.Read(buffer, buffer_size_p);
+	while (actual_buffer_size < buffer_size_p && !file_handle.FinishedReading()) {
+		// We keep reading until this block is full
+		actual_buffer_size += file_handle.Read(&buffer[actual_buffer_size], buffer_size_p - actual_buffer_size);
+	}
 	global_csv_start = global_csv_current_position;
 	// BOM check (https://en.wikipedia.org/wiki/Byte_order_mark)
-	if (file_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
+	if (actual_buffer_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
 		start_position += 3;
 	}
 	last_buffer = file_handle.FinishedReading();
@@ -22,13 +26,18 @@ CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t b
     : context(context), global_csv_start(global_csv_current_position), file_number(file_number_p),
       can_seek(file_handle.CanSeek()) {
 	AllocateBuffer(buffer_size);
-	file_size = file_handle.Read(handle.Ptr(), buffer_size);
+	auto buffer = handle.Ptr();
+	actual_buffer_size = file_handle.Read(handle.Ptr(), buffer_size);
+	while (actual_buffer_size < buffer_size && !file_handle.FinishedReading()) {
+		// We keep reading until this block is full
+		actual_buffer_size += file_handle.Read(&buffer[actual_buffer_size], buffer_size - actual_buffer_size);
+	}
 	last_buffer = file_handle.FinishedReading();
 }
 shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number_p) {
 	auto next_csv_buffer =
-	    make_shared<CSVBuffer>(file_handle, context, buffer_size, global_csv_start + file_size, file_number_p);
+	    make_shared<CSVBuffer>(file_handle, context, buffer_size, global_csv_start + actual_buffer_size, file_number_p);
 	if (next_csv_buffer->GetBufferSize() == 0) {
 		// We are done reading
 		return nullptr;
@@ -43,13 +52,13 @@ void CSVBuffer::AllocateBuffer(idx_t buffer_size) {
 }
 idx_t CSVBuffer::GetBufferSize() {
-	return file_size;
+	return actual_buffer_size;
 }
 void CSVBuffer::Reload(CSVFileHandle &file_handle) {
-	AllocateBuffer(file_size);
+	AllocateBuffer(actual_buffer_size);
 	file_handle.Seek(global_csv_start);
-	file_handle.Read(handle.Ptr(), file_size);
+	file_handle.Read(handle.Ptr(), actual_buffer_size);
 }
 unique_ptr<CSVBufferHandle> CSVBuffer::Pin(CSVFileHandle &file_handle) {
@@ -59,8 +68,8 @@ unique_ptr<CSVBufferHandle> CSVBuffer::Pin(CSVFileHandle &file_handle) {
 		block = nullptr;
 		Reload(file_handle);
 	}
-	return make_uniq<CSVBufferHandle>(buffer_manager.Pin(block), file_size, first_buffer, last_buffer, global_csv_start,
-	                                  start_position, file_number);
+	return make_uniq<CSVBufferHandle>(buffer_manager.Pin(block), actual_buffer_size, first_buffer, last_buffer,
+	                                  global_csv_start, start_position, file_number);
 }
 void CSVBuffer::Unpin() {

package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp CHANGED Viewed

@@ -168,38 +168,24 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 	if (loption == "auto_detect") {
 		auto_detect = ParseBoolean(value, loption);
 	} else if (loption == "sample_size") {
-		int64_t sample_size = ParseInteger(value, loption);
-		if (sample_size < 1 && sample_size != -1) {
+		int64_t sample_size_option = ParseInteger(value, loption);
+		if (sample_size_option < 1 && sample_size_option != -1) {
 			throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1");
 		}
-		if (sample_size == -1) {
-			sample_chunks = std::numeric_limits<uint64_t>::max();
-			sample_chunk_size = STANDARD_VECTOR_SIZE;
-		} else if (sample_size <= STANDARD_VECTOR_SIZE) {
-			sample_chunk_size = sample_size;
-			sample_chunks = 1;
+		if (sample_size_option == -1) {
+			// If -1, we basically read the whole thing
+			sample_size_chunks = NumericLimits<idx_t>().Maximum();
 		} else {
-			sample_chunk_size = STANDARD_VECTOR_SIZE;
-			sample_chunks = sample_size / STANDARD_VECTOR_SIZE + 1;
+			sample_size_chunks = sample_size_option / STANDARD_VECTOR_SIZE;
+			if (sample_size_option % STANDARD_VECTOR_SIZE != 0) {
+				sample_size_chunks++;
+			}
 		}
 	} else if (loption == "skip") {
 		SetSkipRows(ParseInteger(value, loption));
 	} else if (loption == "max_line_size" || loption == "maximum_line_size") {
 		maximum_line_size = ParseInteger(value, loption);
-	} else if (loption == "sample_chunk_size") {
-		sample_chunk_size = ParseInteger(value, loption);
-		if (sample_chunk_size > STANDARD_VECTOR_SIZE) {
-			throw BinderException(
-			    "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d",
-			    STANDARD_VECTOR_SIZE);
-		} else if (sample_chunk_size < 1) {
-			throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1");
-		}
-	} else if (loption == "sample_chunks") {
-		sample_chunks = ParseInteger(value, loption);
-		if (sample_chunks < 1) {
-			throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1");
-		}
 	} else if (loption == "force_not_null") {
 		force_not_null = ParseColumnList(value, expected_names, loption);
 	} else if (loption == "date_format" || loption == "dateformat") {
@@ -322,7 +308,7 @@ string CSVReaderOptions::ToString() const {
 	       (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
 	       "\n  header=" + std::to_string(dialect_options.header) +
 	       (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
-	       "\n  sample_size=" + std::to_string(sample_chunk_size * sample_chunks) +
+	       "\n  sample_size=" + std::to_string(sample_size_chunks * STANDARD_VECTOR_SIZE) +
 	       "\n  ignore_errors=" + std::to_string(ignore_errors) + "\n  all_varchar=" + std::to_string(all_varchar);
 }
@@ -489,8 +475,6 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) {
 	if (skip_rows_set) {
 		named_params["skip"] = Value::BIGINT(GetSkipRows());
 	}
-	named_params["sample_chunks"] = Value::BIGINT(sample_chunks);
-	named_params["sample_chunk_size"] = Value::BIGINT(sample_chunk_size);
 	named_params["null_padding"] = Value::BOOLEAN(null_padding);
 	if (!date_format.at(LogicalType::DATE).format_specifier.empty()) {
 		named_params["dateformat"] = Value(date_format.at(LogicalType::DATE).format_specifier);

package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp CHANGED Viewed

@@ -29,8 +29,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 			InitializeTransitionArray(transition_array[i], quoted_state);
 			break;
 		case unquoted_state:
-			InitializeTransitionArray(transition_array[i], invalid_state);
-			break;
+		case invalid_state:
 		case escape_state:
 			InitializeTransitionArray(transition_array[i], invalid_state);
 			break;

package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp CHANGED Viewed

@@ -647,6 +647,10 @@ idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool s
 	}
 }
+void ParallelCSVReader::Increment(idx_t buffer_idx) {
+	return buffer->line_info->Increment(file_idx, buffer_idx);
+}
 bool ParallelCSVReader::TryParseCSV(ParserMode mode) {
 	DataChunk dummy_chunk;
 	string error_message;

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp CHANGED Viewed

@@ -3,8 +3,9 @@
 namespace duckdb {
 CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager> buffer_manager_p,
-                       CSVStateMachineCache &state_machine_cache_p)
-    : state_machine_cache(state_machine_cache_p), options(options_p), buffer_manager(std::move(buffer_manager_p)) {
+                       CSVStateMachineCache &state_machine_cache_p, bool explicit_set_columns_p)
+    : state_machine_cache(state_machine_cache_p), options(options_p), buffer_manager(std::move(buffer_manager_p)),
+      explicit_set_columns(explicit_set_columns_p) {
 	// Check if any type is BLOB
 	for (auto &type : options.sql_type_list) {
@@ -24,6 +25,14 @@ CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager>
 SnifferResult CSVSniffer::SniffCSV() {
 	// 1. Dialect Detection
 	DetectDialect();
+	if (explicit_set_columns) {
+		if (!candidates.empty()) {
+			options.dialect_options.state_machine_options = candidates[0]->dialect_options.state_machine_options;
+			options.dialect_options.new_line = candidates[0]->dialect_options.new_line;
+		}
+		// We do not need to run type and header detection as these were defined by the user
+		return SnifferResult(detected_types, names);
+	}
 	// 2. Type Detection
 	DetectTypes();
 	// 3. Header Detection

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp CHANGED Viewed

@@ -15,7 +15,7 @@ struct SniffDialect {
 	inline static bool Process(CSVStateMachine &machine, vector<idx_t> &sniffed_column_counts, char current_char,
 	                           idx_t current_pos) {
-		D_ASSERT(sniffed_column_counts.size() == machine.options.sample_chunk_size);
+		D_ASSERT(sniffed_column_counts.size() == STANDARD_VECTOR_SIZE);
 		if (machine.state == CSVState::INVALID) {
 			sniffed_column_counts.clear();
@@ -45,7 +45,7 @@ struct SniffDialect {
 		machine.single_record_separator = ((machine.state != CSVState::RECORD_SEPARATOR && carriage_return) ||
 		                                   (machine.state == CSVState::RECORD_SEPARATOR && !carriage_return)) ||
 		                                  machine.single_record_separator;
-		if (machine.cur_rows >= machine.options.sample_chunk_size) {
+		if (machine.cur_rows >= STANDARD_VECTOR_SIZE) {
 			// We sniffed enough rows
 			return true;
 		}
@@ -55,10 +55,10 @@ struct SniffDialect {
 		if (machine.state == CSVState::INVALID) {
 			return;
 		}
-		if (machine.cur_rows < machine.options.sample_chunk_size && machine.state == CSVState::DELIMITER) {
+		if (machine.cur_rows < STANDARD_VECTOR_SIZE && machine.state == CSVState::DELIMITER) {
 			sniffed_column_counts[machine.cur_rows] = ++machine.column_count;
 		}
-		if (machine.cur_rows < machine.options.sample_chunk_size && machine.state != CSVState::EMPTY_LINE) {
+		if (machine.cur_rows < STANDARD_VECTOR_SIZE && machine.state != CSVState::EMPTY_LINE) {
 			sniffed_column_counts[machine.cur_rows++] = machine.column_count;
 		}
 		NewLineIdentifier suggested_newline;
@@ -145,7 +145,7 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<CSVStateMachi
 void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<CSVStateMachine> state_machine, idx_t &rows_read,
                                          idx_t &best_consistent_rows, idx_t &prev_padding_count) {
 	// The sniffed_column_counts variable keeps track of the number of columns found for each row
-	vector<idx_t> sniffed_column_counts(options.sample_chunk_size);
+	vector<idx_t> sniffed_column_counts(STANDARD_VECTOR_SIZE);
 	state_machine->csv_buffer_iterator.Process<SniffDialect>(*state_machine, sniffed_column_counts);
 	idx_t start_row = options.dialect_options.skip_rows;
@@ -244,7 +244,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<CSVStateMachine> state_machi
 }
 bool CSVSniffer::RefineCandidateNextChunk(CSVStateMachine &candidate) {
-	vector<idx_t> sniffed_column_counts(options.sample_chunk_size);
+	vector<idx_t> sniffed_column_counts(STANDARD_VECTOR_SIZE);
 	candidate.csv_buffer_iterator.Process<SniffDialect>(candidate, sniffed_column_counts);
 	bool allow_padding = options.null_padding;
@@ -268,9 +268,9 @@ void CSVSniffer::RefineCandidates() {
 		return;
 	}
 	for (auto &cur_candidate : candidates) {
-		for (idx_t i = 1; i <= options.sample_chunks; i++) {
+		for (idx_t i = 1; i <= options.sample_size_chunks; i++) {
 			bool finished_file = cur_candidate->csv_buffer_iterator.Finished();
-			if (finished_file || i == options.sample_chunks) {
+			if (finished_file || i == options.sample_size_chunks) {
 				// we finished the file or our chunk sample successfully: stop
 				auto successful_candidate = std::move(cur_candidate);
 				candidates.clear();

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp CHANGED Viewed

@@ -283,11 +283,7 @@ void CSVSniffer::DetectTypes() {
 		candidate->Reset();
 		// Parse chunk and read csv with info candidate
-		idx_t sample_size = options.sample_chunk_size;
-		if (options.sample_chunk_size == 1) {
-			sample_size++;
-		}
-		vector<TupleSniffing> tuples(sample_size);
+		vector<TupleSniffing> tuples(STANDARD_VECTOR_SIZE);
 		candidate->csv_buffer_iterator.Process<SniffValue>(*candidate, tuples);
 		// Potentially Skip empty rows (I find this dirty, but it is what the original code does)
 		idx_t true_start = 0;
@@ -311,8 +307,10 @@ void CSVSniffer::DetectTypes() {
 				break;
 			}
 		}
+		if (values_start > 0) {
+			tuples.erase(tuples.begin(), tuples.begin() + values_start);
+		}
-		tuples.erase(tuples.begin(), tuples.begin() + values_start);
 		idx_t row_idx = 0;
 		if (tuples.size() > 1 && (!options.has_header || (options.has_header && options.dialect_options.header))) {
 			// This means we have more than one row, hence we can use the first row to detect if we have a header
@@ -327,6 +325,9 @@ void CSVSniffer::DetectTypes() {
 		for (; row_idx < tuples.size(); row_idx++) {
 			for (idx_t col = 0; col < tuples[row_idx].values.size(); col++) {
 				auto &col_type_candidates = info_sql_types_candidates[col];
+				// col_type_candidates can't be empty since anything in a CSV file should at least be a string
+				// and we validate utf-8 compatibility when creating the type
+				D_ASSERT(!col_type_candidates.empty());
 				auto cur_top_candidate = col_type_candidates.back();
 				auto dummy_val = tuples[row_idx].values[col];
 				// try cast from string to sql_type

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp CHANGED Viewed

@@ -46,7 +46,8 @@ struct Parse {
 				validity_mask.SetInvalid(machine.cur_rows);
 			}
 		}
-		if (machine.state == CSVState::STANDARD) {
+		if (machine.state == CSVState::STANDARD ||
+		    (machine.state == CSVState::QUOTED && machine.previous_state == CSVState::QUOTED)) {
 			machine.value += current_char;
 		}
 		machine.cur_rows +=
@@ -57,7 +58,7 @@ struct Parse {
 		machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return;
 		machine.column_count -= machine.column_count * (machine.state != CSVState::RECORD_SEPARATOR && carriage_return);
-		if (machine.cur_rows >= machine.options.sample_chunk_size) {
+		if (machine.cur_rows >= STANDARD_VECTOR_SIZE) {
 			// We sniffed enough rows
 			return true;
 		}
@@ -65,11 +66,22 @@ struct Parse {
 	}
 	inline static void Finalize(CSVStateMachine &machine, DataChunk &parse_chunk) {
-		if (machine.cur_rows < machine.options.sample_chunk_size && machine.state != CSVState::EMPTY_LINE) {
+		if (machine.cur_rows < STANDARD_VECTOR_SIZE && machine.state != CSVState::EMPTY_LINE) {
 			machine.VerifyUTF8();
 			auto &v = parse_chunk.data[machine.column_count++];
 			auto parse_data = FlatVector::GetData<string_t>(v);
-			parse_data[machine.cur_rows] = StringVector::AddStringOrBlob(v, string_t(machine.value));
+			if (machine.value.empty()) {
+				auto &validity_mask = FlatVector::Validity(v);
+				validity_mask.SetInvalid(machine.cur_rows);
+			} else {
+				parse_data[machine.cur_rows] = StringVector::AddStringOrBlob(v, string_t(machine.value));
+			}
+			while (machine.column_count < parse_chunk.ColumnCount()) {
+				auto &v_pad = parse_chunk.data[machine.column_count++];
+				auto &validity_mask = FlatVector::Validity(v_pad);
+				validity_mask.SetInvalid(machine.cur_rows);
+			}
+			machine.cur_rows++;
 		}
 		parse_chunk.SetCardinality(machine.cur_rows);
 	}
@@ -104,8 +116,8 @@ void CSVSniffer::RefineTypes() {
 		return;
 	}
 	DataChunk parse_chunk;
-	parse_chunk.Initialize(BufferAllocator::Get(buffer_manager->context), detected_types, options.sample_chunk_size);
-	for (idx_t i = 1; i < best_candidate->options.sample_chunks; i++) {
+	parse_chunk.Initialize(BufferAllocator::Get(buffer_manager->context), detected_types, STANDARD_VECTOR_SIZE);
+	for (idx_t i = 1; i < best_candidate->options.sample_size_chunks; i++) {
 		bool finished_file = best_candidate->csv_buffer_iterator.Finished();
 		if (finished_file) {
 			// we finished the file: stop
@@ -124,6 +136,7 @@ void CSVSniffer::RefineTypes() {
 		best_candidate->csv_buffer_iterator.Process<Parse>(*best_candidate, parse_chunk);
 		for (idx_t col = 0; col < parse_chunk.ColumnCount(); col++) {
 			vector<LogicalType> &col_type_candidates = best_sql_types_candidates_per_column_idx[col];
+			bool is_bool_type = col_type_candidates.back() == LogicalType::BOOLEAN;
 			while (col_type_candidates.size() > 1) {
 				const auto &sql_type = col_type_candidates.back();
 				//	narrow down the date formats
@@ -154,6 +167,14 @@ void CSVSniffer::RefineTypes() {
 				if (TryCastVector(parse_chunk.data[col], parse_chunk.size(), sql_type)) {
 					break;
 				} else {
+					if (col_type_candidates.back() == LogicalType::BOOLEAN && is_bool_type) {
+						// If we thought this was a boolean value (i.e., T,F, True, False) and it is not, we
+						// immediately pop to varchar.
+						while (col_type_candidates.back() != LogicalType::VARCHAR) {
+							col_type_candidates.pop_back();
+						}
+						break;
+					}
 					col_type_candidates.pop_back();
 				}
 			}

package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp CHANGED Viewed

@@ -474,14 +474,9 @@ void RadixPartitionedHashTable::Finalize(ClientContext &, GlobalSinkState &gstat
 //===--------------------------------------------------------------------===//
 // Source
 //===--------------------------------------------------------------------===//
-idx_t RadixPartitionedHashTable::Count(GlobalSinkState &sink_p) const {
-	const auto count = CountInternal(sink_p);
-	return count == 0 && grouping_set.empty() ? 1 : count;
-}
-idx_t RadixPartitionedHashTable::CountInternal(GlobalSinkState &sink_p) const {
+idx_t RadixPartitionedHashTable::NumberOfPartitions(GlobalSinkState &sink_p) const {
 	auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
-	return sink.count_before_combining;
+	return sink.partitions.size();
 }
 void RadixPartitionedHashTable::SetMultiScan(GlobalSinkState &sink_p) {
@@ -570,8 +565,7 @@ bool RadixHTGlobalSourceState::AssignTask(RadixHTGlobalSinkState &sink, RadixHTL
 	D_ASSERT(lstate.scan_status != RadixHTScanStatus::IN_PROGRESS);
 	const auto n_partitions = sink.partitions.size();
-	if (scan_done == n_partitions) {
-		finished = true;
+	if (finished) {
 		return false;
 	}
 	// We first try to assign a Scan task, then a Finalize task if that didn't work, without using any locks
@@ -595,6 +589,11 @@ bool RadixHTGlobalSourceState::AssignTask(RadixHTGlobalSinkState &sink, RadixHTL
 		return true;
 	}
+	// We didn't assign a Scan task
+	if (sink.finalize_idx >= n_partitions) {
+		return false; // No finalize tasks left
+	}
 	// We can just increment the atomic here, much simpler than assigning the scan task
 	lstate.task_idx = sink.finalize_idx++;
 	if (lstate.task_idx < n_partitions) {
@@ -603,7 +602,7 @@ bool RadixHTGlobalSourceState::AssignTask(RadixHTGlobalSinkState &sink, RadixHTL
 		return true;
 	}
-	// We didn't manage to assign a finalize task
+	// We didn't manage to assign a Finalize task
 	return false;
 }
@@ -693,15 +692,18 @@ void RadixHTLocalSourceState::Scan(RadixHTGlobalSinkState &sink, RadixHTGlobalSo
 	if (!data_collection.Scan(scan_state, scan_chunk)) {
 		scan_status = RadixHTScanStatus::DONE;
-		if (++gstate.scan_done == sink.partitions.size()) {
-			gstate.finished = true;
-		}
 		if (sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE) {
 			data_collection.Reset();
 		}
 		return;
 	}
+	if (data_collection.ScanComplete(scan_state)) {
+		if (++gstate.scan_done == sink.partitions.size()) {
+			gstate.finished = true;
+		}
+	}
 	RowOperationsState row_state(aggregate_allocator);
 	const auto group_cols = layout.ColumnCount() - 1;
 	RowOperations::FinalizeStates(row_state, layout, scan_state.chunk_state.row_locations, scan_chunk, group_cols);
@@ -758,36 +760,38 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
 		return SourceResultType::FINISHED;
 	}
-	// Special case hack to sort out aggregating from empty intermediates for aggregations without groups
-	if (CountInternal(sink_p) == 0 && grouping_set.empty()) {
-		D_ASSERT(chunk.ColumnCount() == null_groups.size() + op.aggregates.size() + op.grouping_functions.size());
-		// For each column in the aggregates, set to initial state
-		chunk.SetCardinality(1);
-		for (auto null_group : null_groups) {
-			chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
-			ConstantVector::SetNull(chunk.data[null_group], true);
-		}
-		ArenaAllocator allocator(BufferAllocator::Get(context.client));
-		for (idx_t i = 0; i < op.aggregates.size(); i++) {
-			D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
-			auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
-			auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
-			aggr.function.initialize(aggr_state.get());
-			AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
-			Vector state_vector(Value::POINTER(CastPointerToValue(aggr_state.get())));
-			aggr.function.finalize(state_vector, aggr_input_data, chunk.data[null_groups.size() + i], 1, 0);
-			if (aggr.function.destructor) {
-				aggr.function.destructor(state_vector, aggr_input_data, 1);
+	if (sink.count_before_combining == 0) {
+		if (grouping_set.empty()) {
+			// Special case hack to sort out aggregating from empty intermediates for aggregations without groups
+			D_ASSERT(chunk.ColumnCount() == null_groups.size() + op.aggregates.size() + op.grouping_functions.size());
+			// For each column in the aggregates, set to initial state
+			chunk.SetCardinality(1);
+			for (auto null_group : null_groups) {
+				chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(chunk.data[null_group], true);
+			}
+			ArenaAllocator allocator(BufferAllocator::Get(context.client));
+			for (idx_t i = 0; i < op.aggregates.size(); i++) {
+				D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
+				auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
+				auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
+				aggr.function.initialize(aggr_state.get());
+				AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
+				Vector state_vector(Value::POINTER(CastPointerToValue(aggr_state.get())));
+				aggr.function.finalize(state_vector, aggr_input_data, chunk.data[null_groups.size() + i], 1, 0);
+				if (aggr.function.destructor) {
+					aggr.function.destructor(state_vector, aggr_input_data, 1);
+				}
+			}
+			// Place the grouping values (all the groups of the grouping_set condensed into a single value)
+			// Behind the null groups + aggregates
+			for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
+				chunk.data[null_groups.size() + op.aggregates.size() + i].Reference(grouping_values[i]);
 			}
-		}
-		// Place the grouping values (all the groups of the grouping_set condensed into a single value)
-		// Behind the null groups + aggregates
-		for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
-			chunk.data[null_groups.size() + op.aggregates.size() + i].Reference(grouping_values[i]);
 		}
 		gstate.finished = true;
-		return SourceResultType::HAVE_MORE_OUTPUT;
+		return SourceResultType::FINISHED;
 	}
 	while (!gstate.finished && chunk.size() == 0) {
@@ -796,7 +800,11 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
 		}
 	}
-	return SourceResultType::HAVE_MORE_OUTPUT;
+	if (chunk.size() != 0) {
+		return SourceResultType::HAVE_MORE_OUTPUT;
+	} else {
+		return SourceResultType::FINISHED;
+	}
 }
 } // namespace duckdb

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -107,11 +107,11 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 		// Initialize Buffer Manager and Sniffer
 		auto file_handle = BaseCSVReader::OpenCSV(context, options);
 		result->buffer_manager = make_shared<CSVBufferManager>(context, std::move(file_handle), options);
-		CSVSniffer sniffer(options, result->buffer_manager, result->state_machine_cache);
+		CSVSniffer sniffer(options, result->buffer_manager, result->state_machine_cache, explicitly_set_columns);
 		auto sniffer_result = sniffer.SniffCSV();
-		return_types = sniffer_result.return_types;
 		if (names.empty()) {
 			names = sniffer_result.names;
+			return_types = sniffer_result.return_types;
 		} else {
 			if (explicitly_set_columns) {
 				// The user has influenced the names, can't assume they are valid anymore
@@ -195,6 +195,7 @@ public:
 		auto file_count = files_path_p.size();
 		line_info.current_batches.resize(file_count);
 		line_info.lines_read.resize(file_count);
+		line_info.lines_errored.resize(file_count);
 		tuple_start.resize(file_count);
 		tuple_end.resize(file_count);
 		tuple_end_to_batch.resize(file_count);
@@ -509,6 +510,11 @@ bool LineInfo::CanItGetLine(idx_t file_idx, idx_t batch_idx) {
 	return false;
 }
+void LineInfo::Increment(idx_t file_idx, idx_t batch_idx) {
+	auto parallel_lock = duckdb::make_uniq<lock_guard<mutex>>(main_mutex);
+	lines_errored[file_idx][batch_idx]++;
+}
 // Returns the 1-indexed line number
 idx_t LineInfo::GetLine(idx_t batch_idx, idx_t line_error, idx_t file_idx, idx_t cur_start, bool verify,
                         bool stop_at_first) {
@@ -520,12 +526,11 @@ idx_t LineInfo::GetLine(idx_t batch_idx, idx_t line_error, idx_t file_idx, idx_t
 	if (!stop_at_first) {
 		// Figure out the amount of lines read in the current file
-		auto &file_batches = current_batches[file_idx];
-		for (auto &batch : file_batches) {
-			if (batch > batch_idx) {
-				break;
+		for (idx_t cur_batch_idx = 0; cur_batch_idx <= batch_idx; cur_batch_idx++) {
+			if (cur_batch_idx < batch_idx) {
+				line_count += lines_errored[file_idx][cur_batch_idx];
 			}
-			line_count += lines_read[file_idx][batch];
+			line_count += lines_read[file_idx][cur_batch_idx];
 		}
 		return line_count + line_error + 1;
 	}
@@ -880,8 +885,6 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 	table_function.named_parameters["header"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["auto_detect"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["sample_size"] = LogicalType::BIGINT;
-	table_function.named_parameters["sample_chunk_size"] = LogicalType::BIGINT;
-	table_function.named_parameters["sample_chunks"] = LogicalType::BIGINT;
 	table_function.named_parameters["all_varchar"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["dateformat"] = LogicalType::VARCHAR;
 	table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR;

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev4653"
+#define DUCKDB_VERSION "0.8.2-dev4711"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "bb287d4b22"
+#define DUCKDB_SOURCE_ID "474a0bd683"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp CHANGED Viewed

@@ -159,6 +159,8 @@ public:
 	bool Scan(TupleDataScanState &state, DataChunk &result);
 	//! Scans a DataChunk from the TupleDataCollection
 	bool Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result);
+	//! Whether the last scan has been completed on this TupleDataCollection
+	bool ScanComplete(const TupleDataScanState &state) const;
 	//! Gathers a DataChunk from the TupleDataCollection, given the specific row locations (requires full pin)
 	void Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count, DataChunk &result,

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp CHANGED Viewed

@@ -78,6 +78,10 @@ public:
 		return line_error + 1;
 	};
+	virtual void Increment(idx_t buffer_idx) {
+		return;
+	}
 	//! Initialize projection indices to select all columns
 	void InitializeProjection();

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp CHANGED Viewed

@@ -89,7 +89,7 @@ public:
 private:
 	ClientContext &context;
 	//! Actual size can be smaller than the buffer size in case we allocate it too optimistically.
-	idx_t file_size;
+	idx_t actual_buffer_size;
 	//! We need to check for Byte Order Mark, to define the start position of this buffer
 	//! https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
 	idx_t start_position = 0;

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp CHANGED Viewed

@@ -20,10 +20,14 @@ public:
 	//! Return the 1-indexed line number
 	idx_t GetLine(idx_t batch_idx, idx_t line_error = 0, idx_t file_idx = 0, idx_t cur_start = 0, bool verify = true,
 	              bool stop_at_first = true);
+	//! In case an error happened we have to increment the lines read of that batch
+	void Increment(idx_t file_idx, idx_t batch_idx);
 	//! Verify if the CSV File was read correctly from [0,batch_idx] batches.
 	void Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos);
 	//! Lines read per batch, <file_index, <batch_index, count>>
 	vector<unordered_map<idx_t, idx_t>> lines_read;
+	//! Lines read per batch, <file_index, <batch_index, count>>
+	vector<unordered_map<idx_t, idx_t>> lines_errored;
 	//! Set of batches that have been initialized but are not yet finished.
 	vector<set<idx_t>> current_batches;
 	//! Pointer to CSV Reader Mutex

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp CHANGED Viewed

@@ -126,12 +126,10 @@ struct CSVReaderOptions {
 	bool normalize_names = false;
 	//! True, if column with that index must skip null check
 	vector<bool> force_not_null;
+	//! Number of sample chunks used in auto-detection
+	idx_t sample_size_chunks = 20480 / STANDARD_VECTOR_SIZE;
 	//! Consider all columns to be of type varchar
 	bool all_varchar = false;
-	//! Size of sample chunk used for dialect and type detection
-	idx_t sample_chunk_size = STANDARD_VECTOR_SIZE;
-	//! Number of sample chunks used for type detection
-	idx_t sample_chunks = 10;
 	//! Whether or not to automatically detect dialect and datatypes
 	bool auto_detect = false;
 	//! The file path of the CSV file to read

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp CHANGED Viewed

@@ -28,7 +28,7 @@ struct SnifferResult {
 class CSVSniffer {
 public:
 	explicit CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager> buffer_manager_p,
-	                    CSVStateMachineCache &state_machine_cache);
+	                    CSVStateMachineCache &state_machine_cache, bool explicit_set_columns = false);
 	//! Main method that sniffs the CSV file, returns the types, names and options as a result
 	//! CSV Sniffing consists of five steps:
@@ -110,6 +110,8 @@ private:
 	//! ------------------------------------------------------//
 	void DetectHeader();
 	vector<string> names;
+	//! If Column Names and Types have been explicitly set
+	const bool explicit_set_columns;
 	//! ------------------------------------------------------//
 	//! ------------------ Type Replacement ----------------- //

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp CHANGED Viewed

@@ -13,7 +13,7 @@
 #include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
 namespace duckdb {
-static constexpr uint32_t NUM_STATES = 8;
+static constexpr uint32_t NUM_STATES = 9;
 static constexpr uint32_t NUM_TRANSITIONS = 256;
 typedef uint8_t state_machine_t[NUM_STATES][NUM_TRANSITIONS];

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp CHANGED Viewed

@@ -134,6 +134,7 @@ public:
 	void ParseCSV(DataChunk &insert_chunk);
 	idx_t GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first = true) override;
+	void Increment(idx_t buffer_idx) override;
 private:
 	//! Initialize Parser

package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp CHANGED Viewed

@@ -51,13 +51,12 @@ public:
 	                         OperatorSourceInput &input) const;
 	const TupleDataLayout &GetLayout() const;
-	idx_t Count(GlobalSinkState &sink) const;
+	idx_t NumberOfPartitions(GlobalSinkState &sink) const;
 	static void SetMultiScan(GlobalSinkState &sink);
 private:
 	void SetGroupingValues();
 	void PopulateGroupChunk(DataChunk &group_chunk, DataChunk &input_chunk) const;
-	idx_t CountInternal(GlobalSinkState &sink) const;
 	TupleDataLayout layout;
 };

package/src/duckdb/src/include/duckdb/main/extension_entries.hpp CHANGED Viewed

@@ -240,6 +240,7 @@ static constexpr ExtensionEntry EXTENSION_FILE_CONTAINS[] = {{".parquet?", "parq
 static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
     //    "azure",
+    "arrow",
     "aws",
     "autocomplete",
     "excel",
@@ -249,7 +250,9 @@ static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
     // "icu",
     "json",
     "parquet",
+    "postgres_scanner",
     "sqlsmith",
+    "sqlite_scanner",
     "tpcds",
     "tpch",
     "visualizer",

package/src/duckdb/src/main/query_result.cpp CHANGED Viewed

@@ -1,8 +1,9 @@
 #include "duckdb/main/query_result.hpp"
+#include "duckdb/common/box_renderer.hpp"
 #include "duckdb/common/printer.hpp"
 #include "duckdb/common/vector.hpp"
 #include "duckdb/main/client_context.hpp"
-#include "duckdb/common/box_renderer.hpp"
 namespace duckdb {
 BaseQueryResult::BaseQueryResult(QueryResultType type, StatementType statement_type, StatementProperties properties_p,
@@ -100,9 +101,17 @@ bool QueryResult::Equals(QueryResult &other) { // LCOV_EXCL_START
 	}
 	// now compare the actual values
 	// fetch chunks
+	unique_ptr<DataChunk> lchunk, rchunk;
+	idx_t lindex = 0, rindex = 0;
 	while (true) {
-		auto lchunk = Fetch();
-		auto rchunk = other.Fetch();
+		if (!lchunk || lindex == lchunk->size()) {
+			lchunk = Fetch();
+			lindex = 0;
+		}
+		if (!rchunk || rindex == rchunk->size()) {
+			rchunk = other.Fetch();
+			rindex = 0;
+		}
 		if (!lchunk && !rchunk) {
 			return true;
 		}
@@ -112,14 +121,11 @@ bool QueryResult::Equals(QueryResult &other) { // LCOV_EXCL_START
 		if (lchunk->size() == 0 && rchunk->size() == 0) {
 			return true;
 		}
-		if (lchunk->size() != rchunk->size()) {
-			return false;
-		}
 		D_ASSERT(lchunk->ColumnCount() == rchunk->ColumnCount());
-		for (idx_t col = 0; col < rchunk->ColumnCount(); col++) {
-			for (idx_t row = 0; row < rchunk->size(); row++) {
-				auto lvalue = lchunk->GetValue(col, row);
-				auto rvalue = rchunk->GetValue(col, row);
+		for (; lindex < lchunk->size() && rindex < rchunk->size(); lindex++, rindex++) {
+			for (idx_t col = 0; col < rchunk->ColumnCount(); col++) {
+				auto lvalue = lchunk->GetValue(col, lindex);
+				auto rvalue = rchunk->GetValue(col, rindex);
 				if (lvalue.IsNull() && rvalue.IsNull()) {
 					continue;
 				}

package/src/duckdb/src/storage/serialization/serialize_nodes.cpp CHANGED Viewed

@@ -101,28 +101,27 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
 	serializer.WriteProperty(111, "normalize_names", normalize_names);
 	serializer.WriteProperty(112, "force_not_null", force_not_null);
 	serializer.WriteProperty(113, "all_varchar", all_varchar);
-	serializer.WriteProperty(114, "sample_chunk_size", sample_chunk_size);
-	serializer.WriteProperty(115, "sample_chunks", sample_chunks);
-	serializer.WriteProperty(116, "auto_detect", auto_detect);
-	serializer.WriteProperty(117, "file_path", file_path);
-	serializer.WriteProperty(118, "decimal_separator", decimal_separator);
-	serializer.WriteProperty(119, "null_padding", null_padding);
-	serializer.WriteProperty(120, "buffer_size", buffer_size);
-	serializer.WriteProperty(121, "file_options", file_options);
-	serializer.WriteProperty(122, "force_quote", force_quote);
-	serializer.WriteProperty(123, "rejects_table_name", rejects_table_name);
-	serializer.WriteProperty(124, "rejects_limit", rejects_limit);
-	serializer.WriteProperty(125, "rejects_recovery_columns", rejects_recovery_columns);
-	serializer.WriteProperty(126, "rejects_recovery_column_ids", rejects_recovery_column_ids);
-	serializer.WriteProperty(127, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
-	serializer.WriteProperty(128, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
-	serializer.WriteProperty(129, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
-	serializer.WriteProperty(130, "dialect_options.header", dialect_options.header);
-	serializer.WriteProperty(131, "dialect_options.num_cols", dialect_options.num_cols);
-	serializer.WriteProperty(132, "dialect_options.new_line", dialect_options.new_line);
-	serializer.WriteProperty(133, "dialect_options.skip_rows", dialect_options.skip_rows);
-	serializer.WriteProperty(134, "dialect_options.date_format", dialect_options.date_format);
-	serializer.WriteProperty(135, "dialect_options.has_format", dialect_options.has_format);
+	serializer.WriteProperty(114, "sample_size_chunks", sample_size_chunks);
+	serializer.WriteProperty(115, "auto_detect", auto_detect);
+	serializer.WriteProperty(116, "file_path", file_path);
+	serializer.WriteProperty(117, "decimal_separator", decimal_separator);
+	serializer.WriteProperty(118, "null_padding", null_padding);
+	serializer.WriteProperty(119, "buffer_size", buffer_size);
+	serializer.WriteProperty(120, "file_options", file_options);
+	serializer.WriteProperty(121, "force_quote", force_quote);
+	serializer.WriteProperty(122, "rejects_table_name", rejects_table_name);
+	serializer.WriteProperty(123, "rejects_limit", rejects_limit);
+	serializer.WriteProperty(124, "rejects_recovery_columns", rejects_recovery_columns);
+	serializer.WriteProperty(125, "rejects_recovery_column_ids", rejects_recovery_column_ids);
+	serializer.WriteProperty(126, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
+	serializer.WriteProperty(127, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
+	serializer.WriteProperty(128, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
+	serializer.WriteProperty(129, "dialect_options.header", dialect_options.header);
+	serializer.WriteProperty(130, "dialect_options.num_cols", dialect_options.num_cols);
+	serializer.WriteProperty(131, "dialect_options.new_line", dialect_options.new_line);
+	serializer.WriteProperty(132, "dialect_options.skip_rows", dialect_options.skip_rows);
+	serializer.WriteProperty(133, "dialect_options.date_format", dialect_options.date_format);
+	serializer.WriteProperty(134, "dialect_options.has_format", dialect_options.has_format);
 }
 CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
@@ -141,28 +140,27 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
 	deserializer.ReadProperty(111, "normalize_names", result.normalize_names);
 	deserializer.ReadProperty(112, "force_not_null", result.force_not_null);
 	deserializer.ReadProperty(113, "all_varchar", result.all_varchar);
-	deserializer.ReadProperty(114, "sample_chunk_size", result.sample_chunk_size);
-	deserializer.ReadProperty(115, "sample_chunks", result.sample_chunks);
-	deserializer.ReadProperty(116, "auto_detect", result.auto_detect);
-	deserializer.ReadProperty(117, "file_path", result.file_path);
-	deserializer.ReadProperty(118, "decimal_separator", result.decimal_separator);
-	deserializer.ReadProperty(119, "null_padding", result.null_padding);
-	deserializer.ReadProperty(120, "buffer_size", result.buffer_size);
-	deserializer.ReadProperty(121, "file_options", result.file_options);
-	deserializer.ReadProperty(122, "force_quote", result.force_quote);
-	deserializer.ReadProperty(123, "rejects_table_name", result.rejects_table_name);
-	deserializer.ReadProperty(124, "rejects_limit", result.rejects_limit);
-	deserializer.ReadProperty(125, "rejects_recovery_columns", result.rejects_recovery_columns);
-	deserializer.ReadProperty(126, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
-	deserializer.ReadProperty(127, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
-	deserializer.ReadProperty(128, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
-	deserializer.ReadProperty(129, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
-	deserializer.ReadProperty(130, "dialect_options.header", result.dialect_options.header);
-	deserializer.ReadProperty(131, "dialect_options.num_cols", result.dialect_options.num_cols);
-	deserializer.ReadProperty(132, "dialect_options.new_line", result.dialect_options.new_line);
-	deserializer.ReadProperty(133, "dialect_options.skip_rows", result.dialect_options.skip_rows);
-	deserializer.ReadProperty(134, "dialect_options.date_format", result.dialect_options.date_format);
-	deserializer.ReadProperty(135, "dialect_options.has_format", result.dialect_options.has_format);
+	deserializer.ReadProperty(114, "sample_size_chunks", result.sample_size_chunks);
+	deserializer.ReadProperty(115, "auto_detect", result.auto_detect);
+	deserializer.ReadProperty(116, "file_path", result.file_path);
+	deserializer.ReadProperty(117, "decimal_separator", result.decimal_separator);
+	deserializer.ReadProperty(118, "null_padding", result.null_padding);
+	deserializer.ReadProperty(119, "buffer_size", result.buffer_size);
+	deserializer.ReadProperty(120, "file_options", result.file_options);
+	deserializer.ReadProperty(121, "force_quote", result.force_quote);
+	deserializer.ReadProperty(122, "rejects_table_name", result.rejects_table_name);
+	deserializer.ReadProperty(123, "rejects_limit", result.rejects_limit);
+	deserializer.ReadProperty(124, "rejects_recovery_columns", result.rejects_recovery_columns);
+	deserializer.ReadProperty(125, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
+	deserializer.ReadProperty(126, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
+	deserializer.ReadProperty(127, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
+	deserializer.ReadProperty(128, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
+	deserializer.ReadProperty(129, "dialect_options.header", result.dialect_options.header);
+	deserializer.ReadProperty(130, "dialect_options.num_cols", result.dialect_options.num_cols);
+	deserializer.ReadProperty(131, "dialect_options.new_line", result.dialect_options.new_line);
+	deserializer.ReadProperty(132, "dialect_options.skip_rows", result.dialect_options.skip_rows);
+	deserializer.ReadProperty(133, "dialect_options.date_format", result.dialect_options.date_format);
+	deserializer.ReadProperty(134, "dialect_options.has_format", result.dialect_options.has_format);
 	return result;
 }