duckdb 0.3.5-dev46.0 → 0.3.5-dev77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev46.0",
4
+ "version": "0.3.5-dev77.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -63764,7 +63764,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
63764
63764
  ", HEADER=" + std::to_string(header) +
63765
63765
  (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
63766
63766
  ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
63767
- ", ALL_VARCHAR=" + std::to_string(all_varchar);
63767
+ ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
63768
63768
  }
63769
63769
 
63770
63770
  static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
@@ -65227,9 +65227,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
65227
65227
  return;
65228
65228
  }
65229
65229
  if (column >= sql_types.size()) {
65230
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
65231
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
65232
- options.ToString());
65230
+ if (options.ignore_errors) {
65231
+ error_column_overflow = true;
65232
+ return;
65233
+ } else {
65234
+ throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
65235
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
65236
+ options.ToString());
65237
+ }
65233
65238
  }
65234
65239
 
65235
65240
  // insert the line number into the chunk
@@ -65281,10 +65286,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
65281
65286
  }
65282
65287
  }
65283
65288
 
65289
+ // Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
65290
+ if (error_column_overflow) {
65291
+ D_ASSERT(options.ignore_errors);
65292
+ error_column_overflow = false;
65293
+ column = 0;
65294
+ return false;
65295
+ }
65296
+
65284
65297
  if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
65285
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
65286
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
65287
- options.ToString());
65298
+ if (options.ignore_errors) {
65299
+ column = 0;
65300
+ return false;
65301
+ } else {
65302
+ throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
65303
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
65304
+ options.ToString());
65305
+ }
65288
65306
  }
65289
65307
 
65290
65308
  if (mode == ParserMode::SNIFFING_DIALECT) {
@@ -65318,6 +65336,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
65318
65336
  if (parse_chunk.size() == 0) {
65319
65337
  return;
65320
65338
  }
65339
+
65340
+ bool conversion_error_ignored = false;
65341
+
65321
65342
  // convert the columns in the parsed chunk to the types of the table
65322
65343
  insert_chunk.SetCardinality(parse_chunk);
65323
65344
  for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
@@ -65359,26 +65380,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
65359
65380
  success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
65360
65381
  parse_chunk.size(), &error_message);
65361
65382
  }
65362
- if (!success) {
65363
- string col_name = to_string(col_idx);
65364
- if (col_idx < col_names.size()) {
65365
- col_name = "\"" + col_names[col_idx] + "\"";
65366
- }
65383
+ if (success) {
65384
+ continue;
65385
+ }
65386
+ if (options.ignore_errors) {
65387
+ conversion_error_ignored = true;
65388
+ continue;
65389
+ }
65390
+ string col_name = to_string(col_idx);
65391
+ if (col_idx < col_names.size()) {
65392
+ col_name = "\"" + col_names[col_idx] + "\"";
65393
+ }
65367
65394
 
65368
- if (options.auto_detect) {
65369
- throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
65370
- "options: %s. Consider either increasing the sample size "
65371
- "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
65372
- "or skipping column conversion (ALL_VARCHAR=1)",
65373
- error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
65374
- options.ToString());
65375
- } else {
65376
- throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
65377
- error_message, linenr - parse_chunk.size(), linenr, col_name,
65378
- options.ToString());
65395
+ if (options.auto_detect) {
65396
+ throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
65397
+ "options: %s. Consider either increasing the sample size "
65398
+ "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
65399
+ "or skipping column conversion (ALL_VARCHAR=1)",
65400
+ error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
65401
+ options.ToString());
65402
+ } else {
65403
+ throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
65404
+ error_message, linenr - parse_chunk.size(), linenr, col_name,
65405
+ options.ToString());
65406
+ }
65407
+ }
65408
+ }
65409
+ if (conversion_error_ignored) {
65410
+ D_ASSERT(options.ignore_errors);
65411
+ SelectionVector succesful_rows;
65412
+ succesful_rows.Initialize(parse_chunk.size());
65413
+ idx_t sel_size = 0;
65414
+
65415
+ for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
65416
+ bool failed = false;
65417
+ for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
65418
+
65419
+ auto &inserted_column = insert_chunk.data[column_idx];
65420
+ auto &parsed_column = parse_chunk.data[column_idx];
65421
+
65422
+ bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
65423
+ if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
65424
+ failed = true;
65425
+ break;
65379
65426
  }
65380
65427
  }
65428
+ if (!failed) {
65429
+ succesful_rows.set_index(sel_size++, row_idx);
65430
+ }
65381
65431
  }
65432
+ insert_chunk.Slice(succesful_rows, sel_size);
65382
65433
  }
65383
65434
  parse_chunk.Reset();
65384
65435
  }
@@ -99039,6 +99090,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
99039
99090
  options.skip_rows = ParseInteger(set);
99040
99091
  } else if (loption == "max_line_size" || loption == "maximum_line_size") {
99041
99092
  options.maximum_line_size = ParseInteger(set);
99093
+ } else if (loption == "ignore_errors") {
99094
+ options.ignore_errors = ParseBoolean(set);
99042
99095
  } else {
99043
99096
  // unrecognized option in base CSV
99044
99097
  return false;
@@ -122242,6 +122295,18 @@ string Relation::RenderWhitespace(idx_t depth) {
122242
122295
  return string(depth * 2, ' ');
122243
122296
  }
122244
122297
 
122298
+ vector<shared_ptr<ExternalDependency>> Relation::GetAllDependencies() {
122299
+ vector<shared_ptr<ExternalDependency>> all_dependencies;
122300
+ Relation *cur = this;
122301
+ while (cur) {
122302
+ if (cur->extra_dependencies) {
122303
+ all_dependencies.push_back(cur->extra_dependencies);
122304
+ }
122305
+ cur = ChildRelation();
122306
+ }
122307
+ return all_dependencies;
122308
+ }
122309
+
122245
122310
  } // namespace duckdb
122246
122311
 
122247
122312
 
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "0b96d12d4"
15
- #define DUCKDB_VERSION "v0.3.5-dev46"
14
+ #define DUCKDB_SOURCE_ID "0e5906961"
15
+ #define DUCKDB_VERSION "v0.3.5-dev77"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -17214,6 +17214,27 @@ public:
17214
17214
 
17215
17215
  } // namespace duckdb
17216
17216
 
17217
+ //===----------------------------------------------------------------------===//
17218
+ // DuckDB
17219
+ //
17220
+ // duckdb/main/external_dependencies.hpp
17221
+ //
17222
+ //
17223
+ //===----------------------------------------------------------------------===//
17224
+
17225
+
17226
+
17227
+ namespace duckdb {
17228
+
17229
+ enum ExternalDependenciesType { PYTHON_DEPENDENCY };
17230
+ class ExternalDependency {
17231
+ public:
17232
+ explicit ExternalDependency(ExternalDependenciesType type_p) : type(type_p) {};
17233
+ virtual ~ExternalDependency() {};
17234
+ ExternalDependenciesType type;
17235
+ };
17236
+
17237
+ } // namespace duckdb
17217
17238
 
17218
17239
  namespace duckdb {
17219
17240
  class Appender;
@@ -17255,6 +17276,8 @@ public:
17255
17276
  TransactionContext transaction;
17256
17277
  //! Whether or not the query is interrupted
17257
17278
  atomic<bool> interrupted;
17279
+ //! External Objects (e.g., Python objects) that views depend of
17280
+ unordered_map<string, vector<shared_ptr<ExternalDependency>>> external_dependencies;
17258
17281
 
17259
17282
  unique_ptr<SchemaCatalogEntry> temporary_objects;
17260
17283
  unordered_map<string, shared_ptr<PreparedStatementData>> prepared_statements;
@@ -17472,6 +17495,7 @@ private:
17472
17495
  } // namespace duckdb
17473
17496
 
17474
17497
 
17498
+
17475
17499
  #include <memory>
17476
17500
 
17477
17501
  namespace duckdb {
@@ -17483,8 +17507,6 @@ class LogicalOperator;
17483
17507
  class QueryNode;
17484
17508
  class TableRef;
17485
17509
 
17486
- class ExtraDependencies {};
17487
-
17488
17510
  class Relation : public std::enable_shared_from_this<Relation> {
17489
17511
  public:
17490
17512
  DUCKDB_API Relation(const std::shared_ptr<ClientContext> &context, RelationType type)
@@ -17499,7 +17521,7 @@ public:
17499
17521
 
17500
17522
  RelationType type;
17501
17523
 
17502
- unique_ptr<ExtraDependencies> extra_dependencies;
17524
+ shared_ptr<ExternalDependency> extra_dependencies;
17503
17525
 
17504
17526
  public:
17505
17527
  DUCKDB_API virtual const vector<ColumnDefinition> &Columns() = 0;
@@ -17599,6 +17621,7 @@ public:
17599
17621
  DUCKDB_API virtual Relation *ChildRelation() {
17600
17622
  return nullptr;
17601
17623
  }
17624
+ DUCKDB_API vector<shared_ptr<ExternalDependency>> GetAllDependencies();
17602
17625
 
17603
17626
  protected:
17604
17627
  DUCKDB_API string RenderWhitespace(idx_t depth);
@@ -22080,6 +22103,8 @@ struct BufferedCSVReaderOptions {
22080
22103
  bool has_header = false;
22081
22104
  //! Whether or not the file has a header line
22082
22105
  bool header = false;
22106
+ //! Whether or not we should ignore InvalidInput errors
22107
+ bool ignore_errors = false;
22083
22108
  //! Whether or not header names shall be normalized
22084
22109
  bool normalize_names = false;
22085
22110
  //! How many leading rows to skip
@@ -22232,6 +22257,10 @@ private:
22232
22257
  const vector<LogicalType> &requested_types,
22233
22258
  vector<vector<LogicalType>> &best_sql_types_candidates,
22234
22259
  map<LogicalTypeId, vector<string>> &best_format_candidates);
22260
+
22261
+ private:
22262
+ //! Whether or not the current row's columns have overflown sql_types.size()
22263
+ bool error_column_overflow = false;
22235
22264
  };
22236
22265
 
22237
22266
  } // namespace duckdb