npm - duckdb - Versions diffs - 0.3.5-dev54.0 → 0.3.5-dev75.0 - Mend

duckdb 0.3.5-dev54.0 → 0.3.5-dev75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +76 -23
package/src/duckdb.hpp +8 -2
package/src/parquet-amalgamation.cpp +31810 -31810

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "duckdb",
   "main": "./lib/duckdb.js",
-  "version": "0.3.5-dev54.0",
+  "version": "0.3.5-dev75.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -63764,7 +63764,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
 	       ", HEADER=" + std::to_string(header) +
 	       (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
 	       ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
-	       ", ALL_VARCHAR=" + std::to_string(all_varchar);
+	       ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
 }
 static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
@@ -65227,9 +65227,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
 		return;
 	}
 	if (column >= sql_types.size()) {
-		throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
-		                            GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
-		                            options.ToString());
+		if (options.ignore_errors) {
+			error_column_overflow = true;
+			return;
+		} else {
+			throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
+			                            GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
+			                            options.ToString());
+		}
 	}
 	// insert the line number into the chunk
@@ -65281,10 +65286,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
 		}
 	}
+	// Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
+	if (error_column_overflow) {
+		D_ASSERT(options.ignore_errors);
+		error_column_overflow = false;
+		column = 0;
+		return false;
+	}
 	if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
-		throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
-		                            GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
-		                            options.ToString());
+		if (options.ignore_errors) {
+			column = 0;
+			return false;
+		} else {
+			throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
+			                            GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
+			                            options.ToString());
+		}
 	}
 	if (mode == ParserMode::SNIFFING_DIALECT) {
@@ -65318,6 +65336,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
 	if (parse_chunk.size() == 0) {
 		return;
 	}
+	bool conversion_error_ignored = false;
 	// convert the columns in the parsed chunk to the types of the table
 	insert_chunk.SetCardinality(parse_chunk);
 	for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
@@ -65359,26 +65380,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
 				success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
 				                                    parse_chunk.size(), &error_message);
 			}
-			if (!success) {
-				string col_name = to_string(col_idx);
-				if (col_idx < col_names.size()) {
-					col_name = "\"" + col_names[col_idx] + "\"";
-				}
+			if (success) {
+				continue;
+			}
+			if (options.ignore_errors) {
+				conversion_error_ignored = true;
+				continue;
+			}
+			string col_name = to_string(col_idx);
+			if (col_idx < col_names.size()) {
+				col_name = "\"" + col_names[col_idx] + "\"";
+			}
-				if (options.auto_detect) {
-					throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
-					                            "options: %s. Consider either increasing the sample size "
-					                            "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
-					                            "or skipping column conversion (ALL_VARCHAR=1)",
-					                            error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
-					                            options.ToString());
-				} else {
-					throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
-					                            error_message, linenr - parse_chunk.size(), linenr, col_name,
-					                            options.ToString());
+			if (options.auto_detect) {
+				throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
+				                            "options: %s. Consider either increasing the sample size "
+				                            "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
+				                            "or skipping column conversion (ALL_VARCHAR=1)",
+				                            error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
+				                            options.ToString());
+			} else {
+				throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
+				                            error_message, linenr - parse_chunk.size(), linenr, col_name,
+				                            options.ToString());
+			}
+		}
+	}
+	if (conversion_error_ignored) {
+		D_ASSERT(options.ignore_errors);
+		SelectionVector succesful_rows;
+		succesful_rows.Initialize(parse_chunk.size());
+		idx_t sel_size = 0;
+		for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
+			bool failed = false;
+			for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
+				auto &inserted_column = insert_chunk.data[column_idx];
+				auto &parsed_column = parse_chunk.data[column_idx];
+				bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
+				if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
+					failed = true;
+					break;
 				}
 			}
+			if (!failed) {
+				succesful_rows.set_index(sel_size++, row_idx);
+			}
 		}
+		insert_chunk.Slice(succesful_rows, sel_size);
 	}
 	parse_chunk.Reset();
 }
@@ -99039,6 +99090,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
 		options.skip_rows = ParseInteger(set);
 	} else if (loption == "max_line_size" || loption == "maximum_line_size") {
 		options.maximum_line_size = ParseInteger(set);
+	} else if (loption == "ignore_errors") {
+		options.ignore_errors = ParseBoolean(set);
 	} else {
 		// unrecognized option in base CSV
 		return false;

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "c8e258878"
-#define DUCKDB_VERSION "v0.3.5-dev54"
+#define DUCKDB_SOURCE_ID "517ff64d6"
+#define DUCKDB_VERSION "v0.3.5-dev75"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -22103,6 +22103,8 @@ struct BufferedCSVReaderOptions {
 	bool has_header = false;
 	//! Whether or not the file has a header line
 	bool header = false;
+	//! Whether or not we should ignore InvalidInput errors
+	bool ignore_errors = false;
 	//! Whether or not header names shall be normalized
 	bool normalize_names = false;
 	//! How many leading rows to skip
@@ -22255,6 +22257,10 @@ private:
 	                                        const vector<LogicalType> &requested_types,
 	                                        vector<vector<LogicalType>> &best_sql_types_candidates,
 	                                        map<LogicalTypeId, vector<string>> &best_format_candidates);
+private:
+	//! Whether or not the current row's columns have overflown sql_types.size()
+	bool error_column_overflow = false;
 };
 } // namespace duckdb