npm - duckdb - Versions diffs - 0.6.2-dev735.0 → 0.6.2-dev758.0 - Mend

duckdb 0.6.2-dev735.0 → 0.6.2-dev758.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.6.2-dev735.0",
+  "version": "0.6.2-dev758.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp CHANGED Viewed

@@ -160,7 +160,6 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
 	} else {
 		row_empty = false;
 	}
 	if (!sql_types.empty() && column == sql_types.size() && length == 0) {
 		// skip a single trailing delimiter in last column
 		return;
@@ -249,7 +248,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 				return false;
 			} else {
 				throw InvalidInputException(
-				    "Error in file \"%s\" on line %s: expected %lld values per row, but got %d. (%s)",
+				    "Error in file \"%s\" on line %s: expected %lld values per row, but got %d.\nParser options:\n%s",
 				    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
 				    options.ToString());
 			}
@@ -309,7 +308,7 @@ void BaseCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, i
 		int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
 		D_ASSERT(error_line >= 0);
 		throw InvalidInputException("Error in file \"%s\" at line %llu in column \"%s\": "
-		                            "%s. Parser options: %s",
+		                            "%s. Parser options:\n%s",
 		                            options.file_path, error_line, col_name,
 		                            ErrorManager::InvalidUnicodeError(s.GetString(), "CSV file"), options.ToString());
 	}
@@ -332,29 +331,27 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 	// convert the columns in the parsed chunk to the types of the table
 	insert_chunk.SetCardinality(parse_chunk);
 	for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
-		if (sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
+		auto insert_idx = insert_cols_idx[col_idx];
+		auto &type = sql_types[col_idx];
+		if (type.id() == LogicalTypeId::VARCHAR) {
 			// target type is varchar: no need to convert
 			// just test that all strings are valid utf-8 strings
 			VerifyUTF8(col_idx);
-			insert_chunk.data[insert_cols_idx[col_idx]].Reference(parse_chunk.data[col_idx]);
+			insert_chunk.data[insert_idx].Reference(parse_chunk.data[col_idx]);
 		} else {
 			string error_message;
 			bool success;
-			if (options.has_format[LogicalTypeId::DATE] && sql_types[col_idx].id() == LogicalTypeId::DATE) {
+			if (options.has_format[LogicalTypeId::DATE] && type.id() == LogicalTypeId::DATE) {
 				// use the date format to cast the chunk
-				success =
-				    TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_cols_idx[col_idx]],
-				                      parse_chunk.size(), error_message);
-			} else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
-			           sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
+				success = TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
+				                            parse_chunk.size(), error_message);
+			} else if (options.has_format[LogicalTypeId::TIMESTAMP] && type.id() == LogicalTypeId::TIMESTAMP) {
 				// use the date format to cast the chunk
-				success = TryCastTimestampVector(options, parse_chunk.data[col_idx],
-				                                 insert_chunk.data[insert_cols_idx[col_idx]], parse_chunk.size(),
-				                                 error_message);
+				success = TryCastTimestampVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
+				                                 parse_chunk.size(), error_message);
 			} else {
 				// target type is not varchar: perform a cast
-				success = VectorOperations::DefaultTryCast(parse_chunk.data[col_idx],
-				                                           insert_chunk.data[insert_cols_idx[col_idx]],
+				success = VectorOperations::DefaultTryCast(parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
 				                                           parse_chunk.size(), &error_message);
 			}
 			if (success) {
@@ -385,13 +382,13 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 			auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
 			if (options.auto_detect) {
-				throw InvalidInputException("%s in column %s, at line %llu. Parser "
-				                            "options: %s. Consider either increasing the sample size "
+				throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
+				                            "options:\n%s.\n\nConsider either increasing the sample size "
 				                            "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
 				                            "or skipping column conversion (ALL_VARCHAR=1)",
 				                            error_message, col_name, error_line, options.ToString());
 			} else {
-				throw InvalidInputException("%s at line %llu in column %s. Parser options: %s ", error_message,
+				throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ", error_message,
 				                            error_line, col_name, options.ToString());
 			}
 		}

package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp CHANGED Viewed

@@ -851,10 +851,13 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
 	// #######
 	// type candidates, ordered by descending specificity (~ from high to low)
 	vector<LogicalType> type_candidates = {
-	    LogicalType::VARCHAR, LogicalType::TIMESTAMP,
-	    LogicalType::DATE,    LogicalType::TIME,
-	    LogicalType::DOUBLE,  /* LogicalType::FLOAT,*/ LogicalType::BIGINT,
-	    LogicalType::INTEGER, /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
+	    LogicalType::VARCHAR,
+	    LogicalType::TIMESTAMP,
+	    LogicalType::DATE,
+	    LogicalType::TIME,
+	    LogicalType::DOUBLE,
+	    /* LogicalType::FLOAT,*/ LogicalType::BIGINT,
+	    /*LogicalType::INTEGER,*/ /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
 	    LogicalType::SQLNULL};
 	// format template candidates, ordered by descending specificity (~ from high to low)
 	std::map<LogicalTypeId, vector<const char *>> format_template_candidates = {

package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp CHANGED Viewed

@@ -251,13 +251,14 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
 }
 std::string BufferedCSVReaderOptions::ToString() const {
-	return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
-	       ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
-	       ", ESCAPE='" + escape + (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
-	       ", HEADER=" + std::to_string(header) +
+	return "  file=" + file_path + "\n  delimiter='" + delimiter +
+	       (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + "\n  quote='" + quote +
+	       (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + "\n  escape='" + escape +
+	       (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
+	       "\n  header=" + std::to_string(header) +
 	       (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
-	       ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
-	       ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
+	       "\n  sample_size=" + std::to_string(sample_chunk_size * sample_chunks) +
+	       "\n  ignore_erros=" + std::to_string(ignore_errors) + "\n  all_varchar=" + std::to_string(all_varchar);
 }
 } // namespace duckdb

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -271,6 +271,24 @@ public:
 	atomic<idx_t> bytes_read;
 	//! Size of current file
 	idx_t file_size;
+	//! The index of the next file to read (i.e. current file + 1)
+	idx_t file_index = 1;
+	double GetProgress(ReadCSVData &bind_data) const {
+		idx_t total_files = bind_data.files.size();
+		// get the progress WITHIN the current file
+		double progress;
+		if (file_size == 0) {
+			progress = 1.0;
+		} else {
+			progress = double(bytes_read) / double(file_size);
+		}
+		// now get the total percentage of files read
+		double percentage = double(file_index) / total_files;
+		percentage += (double(1) / double(total_files)) * progress;
+		return percentage * 100;
+	}
 private:
 	//! File Handle for current file
@@ -278,8 +296,6 @@ private:
 	shared_ptr<CSVBuffer> current_buffer;
 	shared_ptr<CSVBuffer> next_buffer;
-	//! The index of the next file to read (i.e. current file + 1)
-	idx_t file_index = 1;
 	//! Mutex to lock when getting next batch of bytes (Parallel Only)
 	mutex main_mutex;
@@ -348,6 +364,7 @@ unique_ptr<CSVBufferRead> ParallelCSVGlobalState::Next(ClientContext &context, R
 	}
 	return result;
 }
 static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
                                                                   TableFunctionInitInput &input) {
 	auto &bind_data = (ReadCSVData &)*input.bind_data;
@@ -359,7 +376,6 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
 	bind_data.options.file_path = bind_data.files[0];
 	file_handle = ReadCSV::OpenCSV(bind_data.options, context);
 	idx_t rows_to_skip = bind_data.options.skip_rows + (bind_data.options.has_header ? 1 : 0);
 	return make_unique<ParallelCSVGlobalState>(context, move(file_handle), bind_data.files,
 	                                           context.db->NumberOfThreads(), bind_data.options.buffer_size,
@@ -379,12 +395,9 @@ public:
 	CSVBufferRead previous_buffer;
 };
-unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
-                                                     GlobalTableFunctionState *global_state_p) {
+unique_ptr<LocalTableFunctionState> ParallelReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
+                                                             GlobalTableFunctionState *global_state_p) {
 	auto &csv_data = (ReadCSVData &)*input.bind_data;
-	if (csv_data.single_threaded) {
-		return nullptr;
-	}
 	auto &global_state = (ParallelCSVGlobalState &)*global_state_p;
 	auto next_local_buffer = global_state.Next(context.client, csv_data);
 	unique_ptr<ParallelCSVReader> csv_reader;
@@ -416,7 +429,6 @@ static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &
 			if (!next_chunk) {
 				break;
 			}
-			//			csv_local_state.previous_buffer = csv_local_state.csv_reader->buffer;
 			csv_local_state.csv_reader->SetBufferRead(move(next_chunk));
 		}
 		csv_local_state.csv_reader->ParseCSV(output);
@@ -434,91 +446,172 @@ static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &
 	}
 }
-static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
-                                    LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
-	auto &bind_data = (ReadCSVData &)*bind_data_p;
-	if (bind_data.single_threaded) {
-		return 0;
-	}
-	auto &data = (ParallelCSVLocalState &)*local_state;
-	return data.csv_reader->buffer->batch_index;
-}
 //===--------------------------------------------------------------------===//
 // Single-Threaded CSV Reader
 //===--------------------------------------------------------------------===//
 struct SingleThreadedCSVState : public GlobalTableFunctionState {
-	//! The CSV reader
-	unique_ptr<BufferedCSVReader> csv_reader;
+	explicit SingleThreadedCSVState(idx_t total_files) : total_files(total_files), next_file(0), progress_in_files(0) {
+	}
+	mutex csv_lock;
+	unique_ptr<BufferedCSVReader> initial_reader;
+	//! The total number of files to read from
+	idx_t total_files;
 	//! The index of the next file to read (i.e. current file + 1)
-	idx_t file_index;
-	//! Total File Size
-	idx_t file_size;
-	//! How many bytes were read up to this point
-	atomic<idx_t> bytes_read;
+	atomic<idx_t> next_file;
+	//! How far along we are in reading the current set of open files
+	//! This goes from [0...next_file] * 100
+	atomic<idx_t> progress_in_files;
+	//! The set of SQL types
+	vector<LogicalType> sql_types;
 	idx_t MaxThreads() const override {
-		return 1;
+		return total_files;
+	}
+	double GetProgress(ReadCSVData &bind_data) const {
+		D_ASSERT(total_files == bind_data.files.size());
+		D_ASSERT(progress_in_files <= total_files * 100);
+		return (double(progress_in_files) / double(total_files));
+	}
+	unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
+	                                           idx_t &total_size) {
+		BufferedCSVReaderOptions options;
+		{
+			lock_guard<mutex> l(csv_lock);
+			if (initial_reader) {
+				return move(initial_reader);
+			}
+			if (next_file >= total_files) {
+				return nullptr;
+			}
+			options = bind_data.options;
+			file_index = next_file;
+			next_file++;
+		}
+		// reuse csv_readers was created during binding
+		unique_ptr<BufferedCSVReader> result;
+		if (options.union_by_name) {
+			result = move(bind_data.union_readers[file_index]);
+		} else {
+			options.file_path = bind_data.files[file_index];
+			result = make_unique<BufferedCSVReader>(context, move(options), sql_types);
+		}
+		total_size = result->file_handle->FileSize();
+		return result;
+	}
+};
+struct SingleThreadedCSVLocalState : public LocalTableFunctionState {
+public:
+	explicit SingleThreadedCSVLocalState() : bytes_read(0), total_size(0), current_progress(0), file_index(0) {
 	}
+	//! The CSV reader
+	unique_ptr<BufferedCSVReader> csv_reader;
+	//! The current amount of bytes read by this reader
+	idx_t bytes_read;
+	//! The total amount of bytes in the file
+	idx_t total_size;
+	//! The current progress from 0..100
+	idx_t current_progress;
+	//! The file index of this reader
+	idx_t file_index;
 };
 static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
                                                                   TableFunctionInitInput &input) {
 	auto &bind_data = (ReadCSVData &)*input.bind_data;
-	auto result = make_unique<SingleThreadedCSVState>();
+	auto result = make_unique<SingleThreadedCSVState>(bind_data.files.size());
 	if (bind_data.initial_reader) {
-		result->csv_reader = move(bind_data.initial_reader);
+		result->initial_reader = move(bind_data.initial_reader);
 	} else if (bind_data.files.empty()) {
 		// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
 		return move(result);
 	} else {
 		bind_data.options.file_path = bind_data.files[0];
-		result->csv_reader = make_unique<BufferedCSVReader>(context, bind_data.options, bind_data.sql_types);
+		result->initial_reader = make_unique<BufferedCSVReader>(context, bind_data.options, bind_data.sql_types);
+		if (bind_data.options.auto_detect) {
+			bind_data.options = result->initial_reader->options;
+		}
+	}
+	if (!bind_data.options.union_by_name) {
+		// if we are reading multiple files - run auto-detect only on the first file
+		// UNLESS union by name is turned on - in that case we assume that different files have different schemas
+		// as such, we need to re-run the auto detection on each file
+		bind_data.options.auto_detect = false;
+	}
+	result->next_file = 1;
+	if (result->initial_reader) {
+		result->sql_types = result->initial_reader->sql_types;
 	}
-	result->file_size = result->csv_reader->file_handle->FileSize();
-	result->file_index = 1;
+	return move(result);
+}
+unique_ptr<LocalTableFunctionState> SingleThreadedReadCSVInitLocal(ExecutionContext &context,
+                                                                   TableFunctionInitInput &input,
+                                                                   GlobalTableFunctionState *global_state_p) {
+	auto &bind_data = (ReadCSVData &)*input.bind_data;
+	auto &data = (SingleThreadedCSVState &)*global_state_p;
+	auto result = make_unique<SingleThreadedCSVLocalState>();
+	result->csv_reader = data.GetCSVReader(context.client, bind_data, result->file_index, result->total_size);
 	return move(result);
 }
 static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
 	auto &bind_data = (ReadCSVData &)*data_p.bind_data;
 	auto &data = (SingleThreadedCSVState &)*data_p.global_state;
-	if (!data.csv_reader) {
+	auto &lstate = (SingleThreadedCSVLocalState &)*data_p.local_state;
+	if (!lstate.csv_reader) {
 		// no csv_reader was set, this can happen when a filename-based filter has filtered out all possible files
 		return;
 	}
 	do {
-		data.csv_reader->ParseCSV(output);
-		data.bytes_read = data.csv_reader->bytes_in_chunk;
-		if (output.size() == 0 && data.file_index < bind_data.files.size()) {
-			// exhausted this file, but we have more files we can read
-			// open the next file and increment the counter
-			bind_data.options.file_path = bind_data.files[data.file_index];
-			// reuse csv_readers was created during binding
-			if (bind_data.options.union_by_name) {
-				data.csv_reader = move(bind_data.union_readers[data.file_index]);
-			} else {
-				data.csv_reader =
-				    make_unique<BufferedCSVReader>(context, bind_data.options, data.csv_reader->sql_types);
+		lstate.csv_reader->ParseCSV(output);
+		// update the number of bytes read
+		D_ASSERT(lstate.bytes_read <= lstate.csv_reader->bytes_in_chunk);
+		auto bytes_read = MinValue<idx_t>(lstate.total_size, lstate.csv_reader->bytes_in_chunk);
+		auto current_progress = lstate.total_size == 0 ? 100 : 100 * bytes_read / lstate.total_size;
+		if (current_progress > lstate.current_progress) {
+			if (current_progress > 100) {
+				throw InternalException("Progress should never exceed 100");
+			}
+			data.progress_in_files += current_progress - lstate.current_progress;
+			lstate.current_progress = current_progress;
+		}
+		if (output.size() == 0) {
+			// exhausted this file, but we might have more files we can read
+			auto csv_reader = data.GetCSVReader(context, bind_data, lstate.file_index, lstate.total_size);
+			// add any left-over progress for this file to the progress bar
+			if (lstate.current_progress < 100) {
+				data.progress_in_files += 100 - lstate.current_progress;
+			}
+			// reset the current progress
+			lstate.current_progress = 0;
+			lstate.bytes_read = 0;
+			lstate.csv_reader = move(csv_reader);
+			if (!lstate.csv_reader) {
+				// no more files - we are done
+				return;
 			}
-			data.file_index++;
+			lstate.bytes_read = 0;
 		} else {
 			break;
 		}
 	} while (true);
 	if (bind_data.options.union_by_name) {
-		data.csv_reader->SetNullUnionCols(output);
+		lstate.csv_reader->SetNullUnionCols(output);
 	}
 	if (bind_data.options.include_file_name) {
 		auto &col = output.data[bind_data.filename_col_idx];
-		col.SetValue(0, Value(data.csv_reader->options.file_path));
+		col.SetValue(0, Value(lstate.csv_reader->options.file_path));
 		col.SetVectorType(VectorType::CONSTANT_VECTOR);
 	}
 	if (bind_data.options.include_parsed_hive_partitions) {
-		auto partitions = HivePartitioning::Parse(data.csv_reader->options.file_path);
+		auto partitions = HivePartitioning::Parse(lstate.csv_reader->options.file_path);
 		idx_t i = bind_data.hive_partition_col_idx;
@@ -531,7 +624,7 @@ static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput
 		for (auto &part : partitions) {
 			if (bind_data.options.names[i] != part.first) {
 				throw IOException("Hive partition names mismatch, expected '" + bind_data.options.names[i] +
-				                  "' but found '" + part.first + "' for file '" + data.csv_reader->options.file_path +
+				                  "' but found '" + part.first + "' for file '" + lstate.csv_reader->options.file_path +
 				                  "'");
 			}
 			auto &col = output.data[i++];
@@ -553,6 +646,16 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
 	}
 }
+unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
+                                                     GlobalTableFunctionState *global_state_p) {
+	auto &csv_data = (ReadCSVData &)*input.bind_data;
+	if (csv_data.single_threaded) {
+		return SingleThreadedReadCSVInitLocal(context, input, global_state_p);
+	} else {
+		return ParallelReadCSVInitLocal(context, input, global_state_p);
+	}
+}
 static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
 	auto &bind_data = (ReadCSVData &)*data_p.bind_data;
 	if (bind_data.single_threaded) {
@@ -562,6 +665,17 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
 	}
 }
+static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
+                                    LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
+	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	if (bind_data.single_threaded) {
+		auto &data = (SingleThreadedCSVLocalState &)*local_state;
+		return data.file_index;
+	}
+	auto &data = (ParallelCSVLocalState &)*local_state;
+	return data.csv_reader->buffer->batch_index;
+}
 static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 	table_function.named_parameters["sep"] = LogicalType::VARCHAR;
 	table_function.named_parameters["delim"] = LogicalType::VARCHAR;
@@ -592,21 +706,13 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
                          const GlobalTableFunctionState *global_state) {
 	auto &bind_data = (ReadCSVData &)*bind_data_p;
-	idx_t file_size, bytes_read;
 	if (bind_data.single_threaded) {
-		auto &data = (const SingleThreadedCSVState &)*global_state;
-		file_size = data.file_size;
-		bytes_read = data.bytes_read;
+		auto &data = (SingleThreadedCSVState &)*global_state;
+		return data.GetProgress(bind_data);
 	} else {
 		auto &data = (const ParallelCSVGlobalState &)*global_state;
-		file_size = data.file_size;
-		bytes_read = data.bytes_read;
-	}
-	if (file_size == 0) {
-		return 100;
+		return data.GetProgress(bind_data);
 	}
-	auto percentage = (bytes_read * 100.0) / file_size;
-	return percentage;
 }
 void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.6.2-dev735"
+#define DUCKDB_VERSION "0.6.2-dev758"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "b85fb31ebf"
+#define DUCKDB_SOURCE_ID "cd29769dcd"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/http_stats.hpp CHANGED Viewed

@@ -41,6 +41,11 @@ public:
 		}
 		return nullptr;
 	}
+	bool IsEmpty() {
+		return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
+		       total_bytes_sent == 0;
+	}
 };
 } // namespace duckdb

package/src/duckdb/src/main/query_profiler.cpp CHANGED Viewed

@@ -377,7 +377,7 @@ void QueryProfiler::QueryTreeToStream(std::ostream &ss) const {
 		return;
 	}
-	if (context.client_data->http_stats) {
+	if (context.client_data->http_stats && !context.client_data->http_stats->IsEmpty()) {
 		string read =
 		    "in: " + StringUtil::BytesToHumanReadableString(context.client_data->http_stats->total_bytes_received);
 		string written =