duckdb 0.3.5-dev46.0 → 0.3.5-dev77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +88 -23
- package/src/duckdb.hpp +34 -5
- package/src/parquet-amalgamation.cpp +36313 -36313
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -63764,7 +63764,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
|
|
|
63764
63764
|
", HEADER=" + std::to_string(header) +
|
|
63765
63765
|
(has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
|
|
63766
63766
|
", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
|
|
63767
|
-
", ALL_VARCHAR=" + std::to_string(all_varchar);
|
|
63767
|
+
", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
|
|
63768
63768
|
}
|
|
63769
63769
|
|
|
63770
63770
|
static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
|
|
@@ -65227,9 +65227,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
65227
65227
|
return;
|
|
65228
65228
|
}
|
|
65229
65229
|
if (column >= sql_types.size()) {
|
|
65230
|
-
|
|
65231
|
-
|
|
65232
|
-
|
|
65230
|
+
if (options.ignore_errors) {
|
|
65231
|
+
error_column_overflow = true;
|
|
65232
|
+
return;
|
|
65233
|
+
} else {
|
|
65234
|
+
throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
|
|
65235
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
|
|
65236
|
+
options.ToString());
|
|
65237
|
+
}
|
|
65233
65238
|
}
|
|
65234
65239
|
|
|
65235
65240
|
// insert the line number into the chunk
|
|
@@ -65281,10 +65286,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
|
|
|
65281
65286
|
}
|
|
65282
65287
|
}
|
|
65283
65288
|
|
|
65289
|
+
// Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
|
|
65290
|
+
if (error_column_overflow) {
|
|
65291
|
+
D_ASSERT(options.ignore_errors);
|
|
65292
|
+
error_column_overflow = false;
|
|
65293
|
+
column = 0;
|
|
65294
|
+
return false;
|
|
65295
|
+
}
|
|
65296
|
+
|
|
65284
65297
|
if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
|
|
65285
|
-
|
|
65286
|
-
|
|
65287
|
-
|
|
65298
|
+
if (options.ignore_errors) {
|
|
65299
|
+
column = 0;
|
|
65300
|
+
return false;
|
|
65301
|
+
} else {
|
|
65302
|
+
throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
|
|
65303
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
|
|
65304
|
+
options.ToString());
|
|
65305
|
+
}
|
|
65288
65306
|
}
|
|
65289
65307
|
|
|
65290
65308
|
if (mode == ParserMode::SNIFFING_DIALECT) {
|
|
@@ -65318,6 +65336,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
65318
65336
|
if (parse_chunk.size() == 0) {
|
|
65319
65337
|
return;
|
|
65320
65338
|
}
|
|
65339
|
+
|
|
65340
|
+
bool conversion_error_ignored = false;
|
|
65341
|
+
|
|
65321
65342
|
// convert the columns in the parsed chunk to the types of the table
|
|
65322
65343
|
insert_chunk.SetCardinality(parse_chunk);
|
|
65323
65344
|
for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
|
|
@@ -65359,26 +65380,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
65359
65380
|
success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
|
|
65360
65381
|
parse_chunk.size(), &error_message);
|
|
65361
65382
|
}
|
|
65362
|
-
if (
|
|
65363
|
-
|
|
65364
|
-
|
|
65365
|
-
|
|
65366
|
-
|
|
65383
|
+
if (success) {
|
|
65384
|
+
continue;
|
|
65385
|
+
}
|
|
65386
|
+
if (options.ignore_errors) {
|
|
65387
|
+
conversion_error_ignored = true;
|
|
65388
|
+
continue;
|
|
65389
|
+
}
|
|
65390
|
+
string col_name = to_string(col_idx);
|
|
65391
|
+
if (col_idx < col_names.size()) {
|
|
65392
|
+
col_name = "\"" + col_names[col_idx] + "\"";
|
|
65393
|
+
}
|
|
65367
65394
|
|
|
65368
|
-
|
|
65369
|
-
|
|
65370
|
-
|
|
65371
|
-
|
|
65372
|
-
|
|
65373
|
-
|
|
65374
|
-
|
|
65375
|
-
|
|
65376
|
-
|
|
65377
|
-
|
|
65378
|
-
|
|
65395
|
+
if (options.auto_detect) {
|
|
65396
|
+
throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
|
|
65397
|
+
"options: %s. Consider either increasing the sample size "
|
|
65398
|
+
"(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
|
|
65399
|
+
"or skipping column conversion (ALL_VARCHAR=1)",
|
|
65400
|
+
error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
|
|
65401
|
+
options.ToString());
|
|
65402
|
+
} else {
|
|
65403
|
+
throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
|
|
65404
|
+
error_message, linenr - parse_chunk.size(), linenr, col_name,
|
|
65405
|
+
options.ToString());
|
|
65406
|
+
}
|
|
65407
|
+
}
|
|
65408
|
+
}
|
|
65409
|
+
if (conversion_error_ignored) {
|
|
65410
|
+
D_ASSERT(options.ignore_errors);
|
|
65411
|
+
SelectionVector succesful_rows;
|
|
65412
|
+
succesful_rows.Initialize(parse_chunk.size());
|
|
65413
|
+
idx_t sel_size = 0;
|
|
65414
|
+
|
|
65415
|
+
for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
|
65416
|
+
bool failed = false;
|
|
65417
|
+
for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
|
|
65418
|
+
|
|
65419
|
+
auto &inserted_column = insert_chunk.data[column_idx];
|
|
65420
|
+
auto &parsed_column = parse_chunk.data[column_idx];
|
|
65421
|
+
|
|
65422
|
+
bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
|
|
65423
|
+
if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
|
|
65424
|
+
failed = true;
|
|
65425
|
+
break;
|
|
65379
65426
|
}
|
|
65380
65427
|
}
|
|
65428
|
+
if (!failed) {
|
|
65429
|
+
succesful_rows.set_index(sel_size++, row_idx);
|
|
65430
|
+
}
|
|
65381
65431
|
}
|
|
65432
|
+
insert_chunk.Slice(succesful_rows, sel_size);
|
|
65382
65433
|
}
|
|
65383
65434
|
parse_chunk.Reset();
|
|
65384
65435
|
}
|
|
@@ -99039,6 +99090,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
|
|
|
99039
99090
|
options.skip_rows = ParseInteger(set);
|
|
99040
99091
|
} else if (loption == "max_line_size" || loption == "maximum_line_size") {
|
|
99041
99092
|
options.maximum_line_size = ParseInteger(set);
|
|
99093
|
+
} else if (loption == "ignore_errors") {
|
|
99094
|
+
options.ignore_errors = ParseBoolean(set);
|
|
99042
99095
|
} else {
|
|
99043
99096
|
// unrecognized option in base CSV
|
|
99044
99097
|
return false;
|
|
@@ -122242,6 +122295,18 @@ string Relation::RenderWhitespace(idx_t depth) {
|
|
|
122242
122295
|
return string(depth * 2, ' ');
|
|
122243
122296
|
}
|
|
122244
122297
|
|
|
122298
|
+
vector<shared_ptr<ExternalDependency>> Relation::GetAllDependencies() {
|
|
122299
|
+
vector<shared_ptr<ExternalDependency>> all_dependencies;
|
|
122300
|
+
Relation *cur = this;
|
|
122301
|
+
while (cur) {
|
|
122302
|
+
if (cur->extra_dependencies) {
|
|
122303
|
+
all_dependencies.push_back(cur->extra_dependencies);
|
|
122304
|
+
}
|
|
122305
|
+
cur = ChildRelation();
|
|
122306
|
+
}
|
|
122307
|
+
return all_dependencies;
|
|
122308
|
+
}
|
|
122309
|
+
|
|
122245
122310
|
} // namespace duckdb
|
|
122246
122311
|
|
|
122247
122312
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "0e5906961"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev77"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -17214,6 +17214,27 @@ public:
|
|
|
17214
17214
|
|
|
17215
17215
|
} // namespace duckdb
|
|
17216
17216
|
|
|
17217
|
+
//===----------------------------------------------------------------------===//
|
|
17218
|
+
// DuckDB
|
|
17219
|
+
//
|
|
17220
|
+
// duckdb/main/external_dependencies.hpp
|
|
17221
|
+
//
|
|
17222
|
+
//
|
|
17223
|
+
//===----------------------------------------------------------------------===//
|
|
17224
|
+
|
|
17225
|
+
|
|
17226
|
+
|
|
17227
|
+
namespace duckdb {
|
|
17228
|
+
|
|
17229
|
+
enum ExternalDependenciesType { PYTHON_DEPENDENCY };
|
|
17230
|
+
class ExternalDependency {
|
|
17231
|
+
public:
|
|
17232
|
+
explicit ExternalDependency(ExternalDependenciesType type_p) : type(type_p) {};
|
|
17233
|
+
virtual ~ExternalDependency() {};
|
|
17234
|
+
ExternalDependenciesType type;
|
|
17235
|
+
};
|
|
17236
|
+
|
|
17237
|
+
} // namespace duckdb
|
|
17217
17238
|
|
|
17218
17239
|
namespace duckdb {
|
|
17219
17240
|
class Appender;
|
|
@@ -17255,6 +17276,8 @@ public:
|
|
|
17255
17276
|
TransactionContext transaction;
|
|
17256
17277
|
//! Whether or not the query is interrupted
|
|
17257
17278
|
atomic<bool> interrupted;
|
|
17279
|
+
//! External Objects (e.g., Python objects) that views depend of
|
|
17280
|
+
unordered_map<string, vector<shared_ptr<ExternalDependency>>> external_dependencies;
|
|
17258
17281
|
|
|
17259
17282
|
unique_ptr<SchemaCatalogEntry> temporary_objects;
|
|
17260
17283
|
unordered_map<string, shared_ptr<PreparedStatementData>> prepared_statements;
|
|
@@ -17472,6 +17495,7 @@ private:
|
|
|
17472
17495
|
} // namespace duckdb
|
|
17473
17496
|
|
|
17474
17497
|
|
|
17498
|
+
|
|
17475
17499
|
#include <memory>
|
|
17476
17500
|
|
|
17477
17501
|
namespace duckdb {
|
|
@@ -17483,8 +17507,6 @@ class LogicalOperator;
|
|
|
17483
17507
|
class QueryNode;
|
|
17484
17508
|
class TableRef;
|
|
17485
17509
|
|
|
17486
|
-
class ExtraDependencies {};
|
|
17487
|
-
|
|
17488
17510
|
class Relation : public std::enable_shared_from_this<Relation> {
|
|
17489
17511
|
public:
|
|
17490
17512
|
DUCKDB_API Relation(const std::shared_ptr<ClientContext> &context, RelationType type)
|
|
@@ -17499,7 +17521,7 @@ public:
|
|
|
17499
17521
|
|
|
17500
17522
|
RelationType type;
|
|
17501
17523
|
|
|
17502
|
-
|
|
17524
|
+
shared_ptr<ExternalDependency> extra_dependencies;
|
|
17503
17525
|
|
|
17504
17526
|
public:
|
|
17505
17527
|
DUCKDB_API virtual const vector<ColumnDefinition> &Columns() = 0;
|
|
@@ -17599,6 +17621,7 @@ public:
|
|
|
17599
17621
|
DUCKDB_API virtual Relation *ChildRelation() {
|
|
17600
17622
|
return nullptr;
|
|
17601
17623
|
}
|
|
17624
|
+
DUCKDB_API vector<shared_ptr<ExternalDependency>> GetAllDependencies();
|
|
17602
17625
|
|
|
17603
17626
|
protected:
|
|
17604
17627
|
DUCKDB_API string RenderWhitespace(idx_t depth);
|
|
@@ -22080,6 +22103,8 @@ struct BufferedCSVReaderOptions {
|
|
|
22080
22103
|
bool has_header = false;
|
|
22081
22104
|
//! Whether or not the file has a header line
|
|
22082
22105
|
bool header = false;
|
|
22106
|
+
//! Whether or not we should ignore InvalidInput errors
|
|
22107
|
+
bool ignore_errors = false;
|
|
22083
22108
|
//! Whether or not header names shall be normalized
|
|
22084
22109
|
bool normalize_names = false;
|
|
22085
22110
|
//! How many leading rows to skip
|
|
@@ -22232,6 +22257,10 @@ private:
|
|
|
22232
22257
|
const vector<LogicalType> &requested_types,
|
|
22233
22258
|
vector<vector<LogicalType>> &best_sql_types_candidates,
|
|
22234
22259
|
map<LogicalTypeId, vector<string>> &best_format_candidates);
|
|
22260
|
+
|
|
22261
|
+
private:
|
|
22262
|
+
//! Whether or not the current row's columns have overflown sql_types.size()
|
|
22263
|
+
bool error_column_overflow = false;
|
|
22235
22264
|
};
|
|
22236
22265
|
|
|
22237
22266
|
} // namespace duckdb
|