duckdb 0.3.5-dev54.0 → 0.3.5-dev75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +76 -23
- package/src/duckdb.hpp +8 -2
- package/src/parquet-amalgamation.cpp +31810 -31810
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -63764,7 +63764,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
|
|
|
63764
63764
|
", HEADER=" + std::to_string(header) +
|
|
63765
63765
|
(has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
|
|
63766
63766
|
", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
|
|
63767
|
-
", ALL_VARCHAR=" + std::to_string(all_varchar);
|
|
63767
|
+
", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
|
|
63768
63768
|
}
|
|
63769
63769
|
|
|
63770
63770
|
static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
|
|
@@ -65227,9 +65227,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
65227
65227
|
return;
|
|
65228
65228
|
}
|
|
65229
65229
|
if (column >= sql_types.size()) {
|
|
65230
|
-
|
|
65231
|
-
|
|
65232
|
-
|
|
65230
|
+
if (options.ignore_errors) {
|
|
65231
|
+
error_column_overflow = true;
|
|
65232
|
+
return;
|
|
65233
|
+
} else {
|
|
65234
|
+
throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
|
|
65235
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
|
|
65236
|
+
options.ToString());
|
|
65237
|
+
}
|
|
65233
65238
|
}
|
|
65234
65239
|
|
|
65235
65240
|
// insert the line number into the chunk
|
|
@@ -65281,10 +65286,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
|
|
|
65281
65286
|
}
|
|
65282
65287
|
}
|
|
65283
65288
|
|
|
65289
|
+
// Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
|
|
65290
|
+
if (error_column_overflow) {
|
|
65291
|
+
D_ASSERT(options.ignore_errors);
|
|
65292
|
+
error_column_overflow = false;
|
|
65293
|
+
column = 0;
|
|
65294
|
+
return false;
|
|
65295
|
+
}
|
|
65296
|
+
|
|
65284
65297
|
if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
|
|
65285
|
-
|
|
65286
|
-
|
|
65287
|
-
|
|
65298
|
+
if (options.ignore_errors) {
|
|
65299
|
+
column = 0;
|
|
65300
|
+
return false;
|
|
65301
|
+
} else {
|
|
65302
|
+
throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
|
|
65303
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
|
|
65304
|
+
options.ToString());
|
|
65305
|
+
}
|
|
65288
65306
|
}
|
|
65289
65307
|
|
|
65290
65308
|
if (mode == ParserMode::SNIFFING_DIALECT) {
|
|
@@ -65318,6 +65336,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
65318
65336
|
if (parse_chunk.size() == 0) {
|
|
65319
65337
|
return;
|
|
65320
65338
|
}
|
|
65339
|
+
|
|
65340
|
+
bool conversion_error_ignored = false;
|
|
65341
|
+
|
|
65321
65342
|
// convert the columns in the parsed chunk to the types of the table
|
|
65322
65343
|
insert_chunk.SetCardinality(parse_chunk);
|
|
65323
65344
|
for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
|
|
@@ -65359,26 +65380,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
65359
65380
|
success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
|
|
65360
65381
|
parse_chunk.size(), &error_message);
|
|
65361
65382
|
}
|
|
65362
|
-
if (
|
|
65363
|
-
|
|
65364
|
-
|
|
65365
|
-
|
|
65366
|
-
|
|
65383
|
+
if (success) {
|
|
65384
|
+
continue;
|
|
65385
|
+
}
|
|
65386
|
+
if (options.ignore_errors) {
|
|
65387
|
+
conversion_error_ignored = true;
|
|
65388
|
+
continue;
|
|
65389
|
+
}
|
|
65390
|
+
string col_name = to_string(col_idx);
|
|
65391
|
+
if (col_idx < col_names.size()) {
|
|
65392
|
+
col_name = "\"" + col_names[col_idx] + "\"";
|
|
65393
|
+
}
|
|
65367
65394
|
|
|
65368
|
-
|
|
65369
|
-
|
|
65370
|
-
|
|
65371
|
-
|
|
65372
|
-
|
|
65373
|
-
|
|
65374
|
-
|
|
65375
|
-
|
|
65376
|
-
|
|
65377
|
-
|
|
65378
|
-
|
|
65395
|
+
if (options.auto_detect) {
|
|
65396
|
+
throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
|
|
65397
|
+
"options: %s. Consider either increasing the sample size "
|
|
65398
|
+
"(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
|
|
65399
|
+
"or skipping column conversion (ALL_VARCHAR=1)",
|
|
65400
|
+
error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
|
|
65401
|
+
options.ToString());
|
|
65402
|
+
} else {
|
|
65403
|
+
throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
|
|
65404
|
+
error_message, linenr - parse_chunk.size(), linenr, col_name,
|
|
65405
|
+
options.ToString());
|
|
65406
|
+
}
|
|
65407
|
+
}
|
|
65408
|
+
}
|
|
65409
|
+
if (conversion_error_ignored) {
|
|
65410
|
+
D_ASSERT(options.ignore_errors);
|
|
65411
|
+
SelectionVector succesful_rows;
|
|
65412
|
+
succesful_rows.Initialize(parse_chunk.size());
|
|
65413
|
+
idx_t sel_size = 0;
|
|
65414
|
+
|
|
65415
|
+
for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
|
65416
|
+
bool failed = false;
|
|
65417
|
+
for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
|
|
65418
|
+
|
|
65419
|
+
auto &inserted_column = insert_chunk.data[column_idx];
|
|
65420
|
+
auto &parsed_column = parse_chunk.data[column_idx];
|
|
65421
|
+
|
|
65422
|
+
bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
|
|
65423
|
+
if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
|
|
65424
|
+
failed = true;
|
|
65425
|
+
break;
|
|
65379
65426
|
}
|
|
65380
65427
|
}
|
|
65428
|
+
if (!failed) {
|
|
65429
|
+
succesful_rows.set_index(sel_size++, row_idx);
|
|
65430
|
+
}
|
|
65381
65431
|
}
|
|
65432
|
+
insert_chunk.Slice(succesful_rows, sel_size);
|
|
65382
65433
|
}
|
|
65383
65434
|
parse_chunk.Reset();
|
|
65384
65435
|
}
|
|
@@ -99039,6 +99090,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
|
|
|
99039
99090
|
options.skip_rows = ParseInteger(set);
|
|
99040
99091
|
} else if (loption == "max_line_size" || loption == "maximum_line_size") {
|
|
99041
99092
|
options.maximum_line_size = ParseInteger(set);
|
|
99093
|
+
} else if (loption == "ignore_errors") {
|
|
99094
|
+
options.ignore_errors = ParseBoolean(set);
|
|
99042
99095
|
} else {
|
|
99043
99096
|
// unrecognized option in base CSV
|
|
99044
99097
|
return false;
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "517ff64d6"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev75"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -22103,6 +22103,8 @@ struct BufferedCSVReaderOptions {
|
|
|
22103
22103
|
bool has_header = false;
|
|
22104
22104
|
//! Whether or not the file has a header line
|
|
22105
22105
|
bool header = false;
|
|
22106
|
+
//! Whether or not we should ignore InvalidInput errors
|
|
22107
|
+
bool ignore_errors = false;
|
|
22106
22108
|
//! Whether or not header names shall be normalized
|
|
22107
22109
|
bool normalize_names = false;
|
|
22108
22110
|
//! How many leading rows to skip
|
|
@@ -22255,6 +22257,10 @@ private:
|
|
|
22255
22257
|
const vector<LogicalType> &requested_types,
|
|
22256
22258
|
vector<vector<LogicalType>> &best_sql_types_candidates,
|
|
22257
22259
|
map<LogicalTypeId, vector<string>> &best_format_candidates);
|
|
22260
|
+
|
|
22261
|
+
private:
|
|
22262
|
+
//! Whether or not the current row's columns have overflown sql_types.size()
|
|
22263
|
+
bool error_column_overflow = false;
|
|
22258
22264
|
};
|
|
22259
22265
|
|
|
22260
22266
|
} // namespace duckdb
|