duckdb 0.3.5-dev54.0 → 0.3.5-dev75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev54.0",
4
+ "version": "0.3.5-dev75.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -63764,7 +63764,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
63764
63764
  ", HEADER=" + std::to_string(header) +
63765
63765
  (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
63766
63766
  ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
63767
- ", ALL_VARCHAR=" + std::to_string(all_varchar);
63767
+ ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
63768
63768
  }
63769
63769
 
63770
63770
  static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
@@ -65227,9 +65227,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
65227
65227
  return;
65228
65228
  }
65229
65229
  if (column >= sql_types.size()) {
65230
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
65231
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
65232
- options.ToString());
65230
+ if (options.ignore_errors) {
65231
+ error_column_overflow = true;
65232
+ return;
65233
+ } else {
65234
+ throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
65235
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
65236
+ options.ToString());
65237
+ }
65233
65238
  }
65234
65239
 
65235
65240
  // insert the line number into the chunk
@@ -65281,10 +65286,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
65281
65286
  }
65282
65287
  }
65283
65288
 
65289
+ // Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
65290
+ if (error_column_overflow) {
65291
+ D_ASSERT(options.ignore_errors);
65292
+ error_column_overflow = false;
65293
+ column = 0;
65294
+ return false;
65295
+ }
65296
+
65284
65297
  if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
65285
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
65286
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
65287
- options.ToString());
65298
+ if (options.ignore_errors) {
65299
+ column = 0;
65300
+ return false;
65301
+ } else {
65302
+ throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
65303
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
65304
+ options.ToString());
65305
+ }
65288
65306
  }
65289
65307
 
65290
65308
  if (mode == ParserMode::SNIFFING_DIALECT) {
@@ -65318,6 +65336,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
65318
65336
  if (parse_chunk.size() == 0) {
65319
65337
  return;
65320
65338
  }
65339
+
65340
+ bool conversion_error_ignored = false;
65341
+
65321
65342
  // convert the columns in the parsed chunk to the types of the table
65322
65343
  insert_chunk.SetCardinality(parse_chunk);
65323
65344
  for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
@@ -65359,26 +65380,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
65359
65380
  success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
65360
65381
  parse_chunk.size(), &error_message);
65361
65382
  }
65362
- if (!success) {
65363
- string col_name = to_string(col_idx);
65364
- if (col_idx < col_names.size()) {
65365
- col_name = "\"" + col_names[col_idx] + "\"";
65366
- }
65383
+ if (success) {
65384
+ continue;
65385
+ }
65386
+ if (options.ignore_errors) {
65387
+ conversion_error_ignored = true;
65388
+ continue;
65389
+ }
65390
+ string col_name = to_string(col_idx);
65391
+ if (col_idx < col_names.size()) {
65392
+ col_name = "\"" + col_names[col_idx] + "\"";
65393
+ }
65367
65394
 
65368
- if (options.auto_detect) {
65369
- throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
65370
- "options: %s. Consider either increasing the sample size "
65371
- "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
65372
- "or skipping column conversion (ALL_VARCHAR=1)",
65373
- error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
65374
- options.ToString());
65375
- } else {
65376
- throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
65377
- error_message, linenr - parse_chunk.size(), linenr, col_name,
65378
- options.ToString());
65395
+ if (options.auto_detect) {
65396
+ throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
65397
+ "options: %s. Consider either increasing the sample size "
65398
+ "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
65399
+ "or skipping column conversion (ALL_VARCHAR=1)",
65400
+ error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
65401
+ options.ToString());
65402
+ } else {
65403
+ throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
65404
+ error_message, linenr - parse_chunk.size(), linenr, col_name,
65405
+ options.ToString());
65406
+ }
65407
+ }
65408
+ }
65409
+ if (conversion_error_ignored) {
65410
+ D_ASSERT(options.ignore_errors);
65411
+ SelectionVector succesful_rows;
65412
+ succesful_rows.Initialize(parse_chunk.size());
65413
+ idx_t sel_size = 0;
65414
+
65415
+ for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
65416
+ bool failed = false;
65417
+ for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
65418
+
65419
+ auto &inserted_column = insert_chunk.data[column_idx];
65420
+ auto &parsed_column = parse_chunk.data[column_idx];
65421
+
65422
+ bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
65423
+ if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
65424
+ failed = true;
65425
+ break;
65379
65426
  }
65380
65427
  }
65428
+ if (!failed) {
65429
+ succesful_rows.set_index(sel_size++, row_idx);
65430
+ }
65381
65431
  }
65432
+ insert_chunk.Slice(succesful_rows, sel_size);
65382
65433
  }
65383
65434
  parse_chunk.Reset();
65384
65435
  }
@@ -99039,6 +99090,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
99039
99090
  options.skip_rows = ParseInteger(set);
99040
99091
  } else if (loption == "max_line_size" || loption == "maximum_line_size") {
99041
99092
  options.maximum_line_size = ParseInteger(set);
99093
+ } else if (loption == "ignore_errors") {
99094
+ options.ignore_errors = ParseBoolean(set);
99042
99095
  } else {
99043
99096
  // unrecognized option in base CSV
99044
99097
  return false;
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "c8e258878"
15
- #define DUCKDB_VERSION "v0.3.5-dev54"
14
+ #define DUCKDB_SOURCE_ID "517ff64d6"
15
+ #define DUCKDB_VERSION "v0.3.5-dev75"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -22103,6 +22103,8 @@ struct BufferedCSVReaderOptions {
22103
22103
  bool has_header = false;
22104
22104
  //! Whether or not the file has a header line
22105
22105
  bool header = false;
22106
+ //! Whether or not we should ignore InvalidInput errors
22107
+ bool ignore_errors = false;
22106
22108
  //! Whether or not header names shall be normalized
22107
22109
  bool normalize_names = false;
22108
22110
  //! How many leading rows to skip
@@ -22255,6 +22257,10 @@ private:
22255
22257
  const vector<LogicalType> &requested_types,
22256
22258
  vector<vector<LogicalType>> &best_sql_types_candidates,
22257
22259
  map<LogicalTypeId, vector<string>> &best_format_candidates);
22260
+
22261
+ private:
22262
+ //! Whether or not the current row's columns have overflown sql_types.size()
22263
+ bool error_column_overflow = false;
22258
22264
  };
22259
22265
 
22260
22266
  } // namespace duckdb