duckdb 0.5.2-dev1275.0 → 0.5.2-dev1292.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +203 -83
- package/src/duckdb.hpp +9 -5
- package/src/parquet-amalgamation.cpp +29577 -29577
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -623,7 +623,7 @@ public:
|
|
|
623
623
|
//===----------------------------------------------------------------------===//
|
|
624
624
|
// DuckDB
|
|
625
625
|
//
|
|
626
|
-
// extension_functions.hpp
|
|
626
|
+
// duckdb/main/extension_functions.hpp
|
|
627
627
|
//
|
|
628
628
|
//
|
|
629
629
|
//===----------------------------------------------------------------------===//
|
|
@@ -20261,7 +20261,7 @@ public:
|
|
|
20261
20261
|
//! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is
|
|
20262
20262
|
//! equivalent to calling SetFilePointer(location) followed by calling Read().
|
|
20263
20263
|
void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override;
|
|
20264
|
-
//! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be
|
|
20264
|
+
//! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is
|
|
20265
20265
|
//! equivalent to calling SetFilePointer(location) followed by calling Write().
|
|
20266
20266
|
void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override;
|
|
20267
20267
|
//! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the
|
|
@@ -47492,6 +47492,66 @@ private:
|
|
|
47492
47492
|
|
|
47493
47493
|
} // namespace duckdb
|
|
47494
47494
|
|
|
47495
|
+
//===----------------------------------------------------------------------===//
|
|
47496
|
+
// DuckDB
|
|
47497
|
+
//
|
|
47498
|
+
// duckdb/main/error_manager.hpp
|
|
47499
|
+
//
|
|
47500
|
+
//
|
|
47501
|
+
//===----------------------------------------------------------------------===//
|
|
47502
|
+
|
|
47503
|
+
|
|
47504
|
+
|
|
47505
|
+
|
|
47506
|
+
|
|
47507
|
+
|
|
47508
|
+
|
|
47509
|
+
namespace duckdb {
|
|
47510
|
+
class ClientContext;
|
|
47511
|
+
class DatabaseInstance;
|
|
47512
|
+
|
|
47513
|
+
enum class ErrorType : uint16_t {
|
|
47514
|
+
// error message types
|
|
47515
|
+
UNSIGNED_EXTENSION = 0,
|
|
47516
|
+
|
|
47517
|
+
// this should always be the last value
|
|
47518
|
+
ERROR_COUNT,
|
|
47519
|
+
INVALID = 65535,
|
|
47520
|
+
};
|
|
47521
|
+
|
|
47522
|
+
//! The error manager class is responsible for formatting error messages
|
|
47523
|
+
//! It allows for error messages to be overridden by extensions and clients
|
|
47524
|
+
class ErrorManager {
|
|
47525
|
+
public:
|
|
47526
|
+
template <typename... Args>
|
|
47527
|
+
string FormatException(ErrorType error_type, Args... params) {
|
|
47528
|
+
vector<ExceptionFormatValue> values;
|
|
47529
|
+
return FormatExceptionRecursive(error_type, values, params...);
|
|
47530
|
+
}
|
|
47531
|
+
|
|
47532
|
+
DUCKDB_API string FormatExceptionRecursive(ErrorType error_type, vector<ExceptionFormatValue> &values);
|
|
47533
|
+
|
|
47534
|
+
template <class T, typename... Args>
|
|
47535
|
+
string FormatExceptionRecursive(ErrorType error_type, vector<ExceptionFormatValue> &values, T param,
|
|
47536
|
+
Args... params) {
|
|
47537
|
+
values.push_back(ExceptionFormatValue::CreateFormatValue<T>(param));
|
|
47538
|
+
return FormatExceptionRecursive(error_type, values, params...);
|
|
47539
|
+
}
|
|
47540
|
+
|
|
47541
|
+
DUCKDB_API static string InvalidUnicodeError(const string &input, const string &context);
|
|
47542
|
+
|
|
47543
|
+
//! Adds a custom error for a specific error type
|
|
47544
|
+
void AddCustomError(ErrorType type, string new_error);
|
|
47545
|
+
|
|
47546
|
+
DUCKDB_API ErrorManager &Get(ClientContext &context);
|
|
47547
|
+
DUCKDB_API ErrorManager &Get(DatabaseInstance &context);
|
|
47548
|
+
|
|
47549
|
+
private:
|
|
47550
|
+
map<ErrorType, string> custom_errors;
|
|
47551
|
+
};
|
|
47552
|
+
|
|
47553
|
+
} // namespace duckdb
|
|
47554
|
+
|
|
47495
47555
|
|
|
47496
47556
|
#include <utility>
|
|
47497
47557
|
#include <cmath>
|
|
@@ -47528,7 +47588,7 @@ Value::Value(string_t val) : Value(string(val.GetDataUnsafe(), val.GetSize())) {
|
|
|
47528
47588
|
|
|
47529
47589
|
Value::Value(string val) : type_(LogicalType::VARCHAR), is_null(false), str_value(move(val)) {
|
|
47530
47590
|
if (!Value::StringIsValid(str_value.c_str(), str_value.size())) {
|
|
47531
|
-
throw Exception("
|
|
47591
|
+
throw Exception(ErrorManager::InvalidUnicodeError(str_value, "value construction"));
|
|
47532
47592
|
}
|
|
47533
47593
|
}
|
|
47534
47594
|
|
|
@@ -76473,6 +76533,7 @@ string PhysicalTopN::ParamsToString() const {
|
|
|
76473
76533
|
|
|
76474
76534
|
|
|
76475
76535
|
|
|
76536
|
+
|
|
76476
76537
|
#include <algorithm>
|
|
76477
76538
|
#include <cctype>
|
|
76478
76539
|
#include <cstring>
|
|
@@ -77506,8 +77567,10 @@ void BufferedCSVReader::DetectCandidateTypes(const vector<LogicalType> &type_can
|
|
|
77506
77567
|
// try cast from string to sql_type
|
|
77507
77568
|
Value dummy_val;
|
|
77508
77569
|
if (is_header_row) {
|
|
77570
|
+
VerifyUTF8(col, 0, header_row, -int64_t(parse_chunk.size()));
|
|
77509
77571
|
dummy_val = header_row.GetValue(col, 0);
|
|
77510
77572
|
} else {
|
|
77573
|
+
VerifyUTF8(col, row, parse_chunk);
|
|
77511
77574
|
dummy_val = parse_chunk.GetValue(col, row);
|
|
77512
77575
|
}
|
|
77513
77576
|
// try formatting for date types if the user did not specify one and it starts with numeric values.
|
|
@@ -78361,9 +78424,9 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
78361
78424
|
error_column_overflow = true;
|
|
78362
78425
|
return;
|
|
78363
78426
|
} else {
|
|
78364
|
-
throw InvalidInputException(
|
|
78365
|
-
|
|
78366
|
-
|
|
78427
|
+
throw InvalidInputException(
|
|
78428
|
+
"Error in file \"%s\", on line %s: expected %lld values per row, but got more. (%s)", options.file_path,
|
|
78429
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), options.ToString());
|
|
78367
78430
|
}
|
|
78368
78431
|
}
|
|
78369
78432
|
|
|
@@ -78433,9 +78496,9 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
|
|
|
78433
78496
|
column = 0;
|
|
78434
78497
|
return false;
|
|
78435
78498
|
} else {
|
|
78436
|
-
throw InvalidInputException(
|
|
78437
|
-
|
|
78438
|
-
|
|
78499
|
+
throw InvalidInputException(
|
|
78500
|
+
"Error in file \"%s\" on line %s: expected %lld values per row, but got %d. (%s)", options.file_path,
|
|
78501
|
+
GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column, options.ToString());
|
|
78439
78502
|
}
|
|
78440
78503
|
}
|
|
78441
78504
|
|
|
@@ -78473,6 +78536,38 @@ void BufferedCSVReader::SetNullUnionCols(DataChunk &insert_chunk) {
|
|
|
78473
78536
|
}
|
|
78474
78537
|
}
|
|
78475
78538
|
|
|
78539
|
+
void BufferedCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset) {
|
|
78540
|
+
D_ASSERT(col_idx < chunk.data.size());
|
|
78541
|
+
D_ASSERT(row_idx < chunk.size());
|
|
78542
|
+
auto &v = chunk.data[col_idx];
|
|
78543
|
+
if (FlatVector::IsNull(v, row_idx)) {
|
|
78544
|
+
return;
|
|
78545
|
+
}
|
|
78546
|
+
|
|
78547
|
+
auto parse_data = FlatVector::GetData<string_t>(chunk.data[col_idx]);
|
|
78548
|
+
auto s = parse_data[row_idx];
|
|
78549
|
+
auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
|
|
78550
|
+
if (utf_type == UnicodeType::INVALID) {
|
|
78551
|
+
string col_name = to_string(col_idx);
|
|
78552
|
+
if (col_idx < col_names.size()) {
|
|
78553
|
+
col_name = "\"" + col_names[col_idx] + "\"";
|
|
78554
|
+
}
|
|
78555
|
+
int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
|
|
78556
|
+
D_ASSERT(error_line >= 0);
|
|
78557
|
+
throw InvalidInputException("Error in file \"%s\" at line %llu in column \"%s\": "
|
|
78558
|
+
"%s. Parser options: %s",
|
|
78559
|
+
options.file_path, error_line, col_name,
|
|
78560
|
+
ErrorManager::InvalidUnicodeError(s.GetString(), "CSV file"), options.ToString());
|
|
78561
|
+
}
|
|
78562
|
+
}
|
|
78563
|
+
|
|
78564
|
+
void BufferedCSVReader::VerifyUTF8(idx_t col_idx) {
|
|
78565
|
+
D_ASSERT(col_idx < parse_chunk.data.size());
|
|
78566
|
+
for (idx_t i = 0; i < parse_chunk.size(); i++) {
|
|
78567
|
+
VerifyUTF8(col_idx, i, parse_chunk);
|
|
78568
|
+
}
|
|
78569
|
+
}
|
|
78570
|
+
|
|
78476
78571
|
void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
78477
78572
|
if (parse_chunk.size() == 0) {
|
|
78478
78573
|
return;
|
|
@@ -78486,23 +78581,7 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
78486
78581
|
if (sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
|
|
78487
78582
|
// target type is varchar: no need to convert
|
|
78488
78583
|
// just test that all strings are valid utf-8 strings
|
|
78489
|
-
|
|
78490
|
-
for (idx_t i = 0; i < parse_chunk.size(); i++) {
|
|
78491
|
-
if (!FlatVector::IsNull(parse_chunk.data[col_idx], i)) {
|
|
78492
|
-
auto s = parse_data[i];
|
|
78493
|
-
auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
|
|
78494
|
-
if (utf_type == UnicodeType::INVALID) {
|
|
78495
|
-
string col_name = to_string(col_idx);
|
|
78496
|
-
if (col_idx < col_names.size()) {
|
|
78497
|
-
col_name = "\"" + col_names[col_idx] + "\"";
|
|
78498
|
-
}
|
|
78499
|
-
throw InvalidInputException("Error in file \"%s\" between line %llu and %llu in column \"%s\": "
|
|
78500
|
-
"file is not valid UTF8. Parser options: %s",
|
|
78501
|
-
options.file_path, linenr - parse_chunk.size(), linenr, col_name,
|
|
78502
|
-
options.ToString());
|
|
78503
|
-
}
|
|
78504
|
-
}
|
|
78505
|
-
}
|
|
78584
|
+
VerifyUTF8(col_idx);
|
|
78506
78585
|
insert_chunk.data[insert_cols_idx[col_idx]].Reference(parse_chunk.data[col_idx]);
|
|
78507
78586
|
} else {
|
|
78508
78587
|
string error_message;
|
|
@@ -78536,17 +78615,27 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
|
|
|
78536
78615
|
col_name = "\"" + col_names[col_idx] + "\"";
|
|
78537
78616
|
}
|
|
78538
78617
|
|
|
78618
|
+
// figure out the exact line number
|
|
78619
|
+
idx_t row_idx;
|
|
78620
|
+
for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
|
78621
|
+
auto &inserted_column = insert_chunk.data[col_idx];
|
|
78622
|
+
auto &parsed_column = parse_chunk.data[col_idx];
|
|
78623
|
+
|
|
78624
|
+
if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
|
|
78625
|
+
break;
|
|
78626
|
+
}
|
|
78627
|
+
}
|
|
78628
|
+
auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
|
|
78629
|
+
|
|
78539
78630
|
if (options.auto_detect) {
|
|
78540
|
-
throw InvalidInputException("%s in column %s,
|
|
78631
|
+
throw InvalidInputException("%s in column %s, at line %llu. Parser "
|
|
78541
78632
|
"options: %s. Consider either increasing the sample size "
|
|
78542
78633
|
"(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
|
|
78543
78634
|
"or skipping column conversion (ALL_VARCHAR=1)",
|
|
78544
|
-
error_message, col_name,
|
|
78545
|
-
options.ToString());
|
|
78635
|
+
error_message, col_name, error_line, options.ToString());
|
|
78546
78636
|
} else {
|
|
78547
|
-
throw InvalidInputException("%s
|
|
78548
|
-
|
|
78549
|
-
options.ToString());
|
|
78637
|
+
throw InvalidInputException("%s at line %llu in column %s. Parser options: %s ", error_message,
|
|
78638
|
+
error_line, col_name, options.ToString());
|
|
78550
78639
|
}
|
|
78551
78640
|
}
|
|
78552
78641
|
}
|
|
@@ -132105,14 +132194,6 @@ struct DebugForceNoCrossProduct {
|
|
|
132105
132194
|
static Value GetSetting(ClientContext &context);
|
|
132106
132195
|
};
|
|
132107
132196
|
|
|
132108
|
-
struct DebugManyFreeListBlocks {
|
|
132109
|
-
static constexpr const char *Name = "debug_many_free_list_blocks";
|
|
132110
|
-
static constexpr const char *Description = "DEBUG SETTING: add additional blocks to the free list";
|
|
132111
|
-
static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
|
|
132112
|
-
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
|
132113
|
-
static Value GetSetting(ClientContext &context);
|
|
132114
|
-
};
|
|
132115
|
-
|
|
132116
132197
|
struct DebugWindowMode {
|
|
132117
132198
|
static constexpr const char *Name = "debug_window_mode";
|
|
132118
132199
|
static constexpr const char *Description = "DEBUG SETTING: switch window mode to use";
|
|
@@ -132396,7 +132477,6 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
|
132396
132477
|
DUCKDB_GLOBAL(DebugCheckpointAbort),
|
|
132397
132478
|
DUCKDB_LOCAL(DebugForceExternal),
|
|
132398
132479
|
DUCKDB_LOCAL(DebugForceNoCrossProduct),
|
|
132399
|
-
DUCKDB_GLOBAL(DebugManyFreeListBlocks),
|
|
132400
132480
|
DUCKDB_GLOBAL(DebugWindowMode),
|
|
132401
132481
|
DUCKDB_GLOBAL_LOCAL(DefaultCollationSetting),
|
|
132402
132482
|
DUCKDB_GLOBAL(DefaultOrderSetting),
|
|
@@ -133130,6 +133210,7 @@ public:
|
|
|
133130
133210
|
|
|
133131
133211
|
|
|
133132
133212
|
|
|
133213
|
+
|
|
133133
133214
|
#ifndef DUCKDB_NO_THREADS
|
|
133134
133215
|
|
|
133135
133216
|
#endif
|
|
@@ -133140,6 +133221,7 @@ DBConfig::DBConfig() {
|
|
|
133140
133221
|
compression_functions = make_unique<CompressionFunctionSet>();
|
|
133141
133222
|
replacement_opens.push_back(ExtensionPrefixReplacementOpen());
|
|
133142
133223
|
cast_functions = make_unique<CastFunctionSet>();
|
|
133224
|
+
error_manager = make_unique<ErrorManager>();
|
|
133143
133225
|
}
|
|
133144
133226
|
|
|
133145
133227
|
DBConfig::DBConfig(std::unordered_map<string, string> &config_dict, bool read_only) {
|
|
@@ -133357,17 +133439,15 @@ Allocator &Allocator::Get(DatabaseInstance &db) {
|
|
|
133357
133439
|
}
|
|
133358
133440
|
|
|
133359
133441
|
void DatabaseInstance::Configure(DBConfig &new_config) {
|
|
133360
|
-
config.options
|
|
133361
|
-
config.options.access_mode
|
|
133362
|
-
|
|
133363
|
-
config.options.access_mode = new_config.options.access_mode;
|
|
133442
|
+
config.options = new_config.options;
|
|
133443
|
+
if (config.options.access_mode == AccessMode::UNDEFINED) {
|
|
133444
|
+
config.options.access_mode = AccessMode::READ_WRITE;
|
|
133364
133445
|
}
|
|
133365
133446
|
if (new_config.file_system) {
|
|
133366
133447
|
config.file_system = move(new_config.file_system);
|
|
133367
133448
|
} else {
|
|
133368
133449
|
config.file_system = make_unique<VirtualFileSystem>();
|
|
133369
133450
|
}
|
|
133370
|
-
config.options.maximum_memory = new_config.options.maximum_memory;
|
|
133371
133451
|
if (config.options.maximum_memory == (idx_t)-1) {
|
|
133372
133452
|
auto memory = FileSystem::GetAvailableMemory();
|
|
133373
133453
|
if (memory != DConstants::INVALID_INDEX) {
|
|
@@ -133380,29 +133460,15 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
|
|
|
133380
133460
|
#else
|
|
133381
133461
|
config.options.maximum_threads = 1;
|
|
133382
133462
|
#endif
|
|
133383
|
-
} else {
|
|
133384
|
-
config.options.maximum_threads = new_config.options.maximum_threads;
|
|
133385
133463
|
}
|
|
133386
|
-
config.options.external_threads = new_config.options.external_threads;
|
|
133387
|
-
config.options.load_extensions = new_config.options.load_extensions;
|
|
133388
|
-
config.options.force_compression = new_config.options.force_compression;
|
|
133389
133464
|
config.allocator = move(new_config.allocator);
|
|
133390
133465
|
if (!config.allocator) {
|
|
133391
133466
|
config.allocator = make_unique<Allocator>();
|
|
133392
133467
|
}
|
|
133393
|
-
config.options.checkpoint_wal_size = new_config.options.checkpoint_wal_size;
|
|
133394
|
-
config.options.use_direct_io = new_config.options.use_direct_io;
|
|
133395
|
-
config.options.temporary_directory = new_config.options.temporary_directory;
|
|
133396
|
-
config.options.collation = new_config.options.collation;
|
|
133397
|
-
config.options.default_order_type = new_config.options.default_order_type;
|
|
133398
|
-
config.options.default_null_order = new_config.options.default_null_order;
|
|
133399
|
-
config.options.enable_external_access = new_config.options.enable_external_access;
|
|
133400
|
-
config.options.allow_unsigned_extensions = new_config.options.allow_unsigned_extensions;
|
|
133401
133468
|
config.replacement_scans = move(new_config.replacement_scans);
|
|
133402
|
-
config.replacement_opens = move(new_config.replacement_opens);
|
|
133403
|
-
config.options.initialize_default_database = new_config.options.initialize_default_database;
|
|
133404
|
-
config.options.disabled_optimizers = move(new_config.options.disabled_optimizers);
|
|
133469
|
+
config.replacement_opens = move(new_config.replacement_opens);
|
|
133405
133470
|
config.parser_extensions = move(new_config.parser_extensions);
|
|
133471
|
+
config.error_manager = move(new_config.error_manager);
|
|
133406
133472
|
}
|
|
133407
133473
|
|
|
133408
133474
|
DBConfig &DBConfig::GetConfig(ClientContext &context) {
|
|
@@ -133558,6 +133624,76 @@ shared_ptr<DuckDB> DBInstanceCache::CreateInstance(const string &database, DBCon
|
|
|
133558
133624
|
|
|
133559
133625
|
|
|
133560
133626
|
|
|
133627
|
+
namespace duckdb {
|
|
133628
|
+
|
|
133629
|
+
struct DefaultError {
|
|
133630
|
+
ErrorType type;
|
|
133631
|
+
const char *error;
|
|
133632
|
+
};
|
|
133633
|
+
|
|
133634
|
+
static DefaultError internal_errors[] = {
|
|
133635
|
+
{ErrorType::UNSIGNED_EXTENSION,
|
|
133636
|
+
"Extension \"%s\" could not be loaded because its signature is either missing or invalid and unsigned extensions "
|
|
133637
|
+
"are disabled by configuration (allow_unsigned_extensions)"},
|
|
133638
|
+
{ErrorType::INVALID, nullptr}};
|
|
133639
|
+
|
|
133640
|
+
string ErrorManager::FormatExceptionRecursive(ErrorType error_type, vector<ExceptionFormatValue> &values) {
|
|
133641
|
+
if (error_type >= ErrorType::ERROR_COUNT) {
|
|
133642
|
+
throw InternalException("Invalid error type passed to ErrorManager::FormatError");
|
|
133643
|
+
}
|
|
133644
|
+
auto entry = custom_errors.find(error_type);
|
|
133645
|
+
string error;
|
|
133646
|
+
if (entry == custom_errors.end()) {
|
|
133647
|
+
// error was not overwritten
|
|
133648
|
+
error = internal_errors[int(error_type)].error;
|
|
133649
|
+
} else {
|
|
133650
|
+
// error was overwritten
|
|
133651
|
+
error = entry->second;
|
|
133652
|
+
}
|
|
133653
|
+
return ExceptionFormatValue::Format(error, values);
|
|
133654
|
+
}
|
|
133655
|
+
|
|
133656
|
+
string ErrorManager::InvalidUnicodeError(const string &input, const string &context) {
|
|
133657
|
+
UnicodeInvalidReason reason;
|
|
133658
|
+
size_t pos;
|
|
133659
|
+
auto unicode = Utf8Proc::Analyze((const char *)input.c_str(), input.size(), &reason, &pos);
|
|
133660
|
+
if (unicode != UnicodeType::INVALID) {
|
|
133661
|
+
return "Invalid unicode error thrown but no invalid unicode detected in " + context;
|
|
133662
|
+
}
|
|
133663
|
+
string base_message;
|
|
133664
|
+
switch (reason) {
|
|
133665
|
+
case UnicodeInvalidReason::NULL_BYTE:
|
|
133666
|
+
base_message = "Null-byte (\\0)";
|
|
133667
|
+
break;
|
|
133668
|
+
case UnicodeInvalidReason::BYTE_MISMATCH:
|
|
133669
|
+
base_message = "Invalid unicode (byte sequence mismatch)";
|
|
133670
|
+
break;
|
|
133671
|
+
case UnicodeInvalidReason::INVALID_UNICODE:
|
|
133672
|
+
base_message = "Invalid unicode";
|
|
133673
|
+
break;
|
|
133674
|
+
default:
|
|
133675
|
+
break;
|
|
133676
|
+
}
|
|
133677
|
+
return base_message + " detected in " + context;
|
|
133678
|
+
}
|
|
133679
|
+
|
|
133680
|
+
void ErrorManager::AddCustomError(ErrorType type, string new_error) {
|
|
133681
|
+
custom_errors.insert(make_pair(type, move(new_error)));
|
|
133682
|
+
}
|
|
133683
|
+
|
|
133684
|
+
ErrorManager &ErrorManager::Get(ClientContext &context) {
|
|
133685
|
+
return *DBConfig::GetConfig(context).error_manager;
|
|
133686
|
+
}
|
|
133687
|
+
|
|
133688
|
+
ErrorManager &ErrorManager::Get(DatabaseInstance &context) {
|
|
133689
|
+
return *DBConfig::GetConfig(context).error_manager;
|
|
133690
|
+
}
|
|
133691
|
+
|
|
133692
|
+
} // namespace duckdb
|
|
133693
|
+
|
|
133694
|
+
|
|
133695
|
+
|
|
133696
|
+
|
|
133561
133697
|
|
|
133562
133698
|
|
|
133563
133699
|
|
|
@@ -142416,6 +142552,7 @@ inline std::string GetDLError(void) {
|
|
|
142416
142552
|
|
|
142417
142553
|
|
|
142418
142554
|
|
|
142555
|
+
|
|
142419
142556
|
// LICENSE_CHANGE_BEGIN
|
|
142420
142557
|
// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #12
|
|
142421
142558
|
// See the end of this file for a list
|
|
@@ -142513,10 +142650,7 @@ ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileOpener *o
|
|
|
142513
142650
|
}
|
|
142514
142651
|
}
|
|
142515
142652
|
if (!any_valid && !config.options.allow_unsigned_extensions) {
|
|
142516
|
-
throw IOException(
|
|
142517
|
-
"Extension \"%s\" could not be loaded because its signature is either missing or "
|
|
142518
|
-
"invalid and unsigned extensions are disabled by configuration (allow_unsigned_extensions)",
|
|
142519
|
-
filename);
|
|
142653
|
+
throw IOException(config.error_manager->FormatException(ErrorType::UNSIGNED_EXTENSION, filename));
|
|
142520
142654
|
}
|
|
142521
142655
|
}
|
|
142522
142656
|
auto lib_hdl = dlopen(filename.c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
@@ -146358,18 +146492,6 @@ Value DebugForceNoCrossProduct::GetSetting(ClientContext &context) {
|
|
|
146358
146492
|
return Value::BOOLEAN(ClientConfig::GetConfig(context).force_no_cross_product);
|
|
146359
146493
|
}
|
|
146360
146494
|
|
|
146361
|
-
//===--------------------------------------------------------------------===//
|
|
146362
|
-
// Debug Many Free List blocks
|
|
146363
|
-
//===--------------------------------------------------------------------===//
|
|
146364
|
-
void DebugManyFreeListBlocks::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
|
146365
|
-
config.options.debug_many_free_list_blocks = input.GetValue<bool>();
|
|
146366
|
-
}
|
|
146367
|
-
|
|
146368
|
-
Value DebugManyFreeListBlocks::GetSetting(ClientContext &context) {
|
|
146369
|
-
auto &config = DBConfig::GetConfig(context);
|
|
146370
|
-
return Value::BOOLEAN(config.options.debug_many_free_list_blocks);
|
|
146371
|
-
}
|
|
146372
|
-
|
|
146373
146495
|
//===--------------------------------------------------------------------===//
|
|
146374
146496
|
// Debug Window Mode
|
|
146375
146497
|
//===--------------------------------------------------------------------===//
|
|
@@ -207987,10 +208109,6 @@ vector<block_id_t> SingleFileBlockManager::GetFreeListBlocks() {
|
|
|
207987
208109
|
// a bit from the max block size
|
|
207988
208110
|
auto space_in_block = Storage::BLOCK_SIZE - 4 * sizeof(block_id_t);
|
|
207989
208111
|
auto total_blocks = (total_size + space_in_block - 1) / space_in_block;
|
|
207990
|
-
auto &config = DBConfig::GetConfig(db);
|
|
207991
|
-
if (config.options.debug_many_free_list_blocks) {
|
|
207992
|
-
total_blocks++;
|
|
207993
|
-
}
|
|
207994
208112
|
D_ASSERT(total_size > 0);
|
|
207995
208113
|
D_ASSERT(total_blocks > 0);
|
|
207996
208114
|
|
|
@@ -208749,6 +208867,7 @@ void SegmentStatistics::Reset() {
|
|
|
208749
208867
|
|
|
208750
208868
|
|
|
208751
208869
|
|
|
208870
|
+
|
|
208752
208871
|
namespace duckdb {
|
|
208753
208872
|
|
|
208754
208873
|
StringStatistics::StringStatistics(LogicalType type_p, StatisticsType stats_type)
|
|
@@ -208837,7 +208956,8 @@ void StringStatistics::Update(const string_t &value) {
|
|
|
208837
208956
|
if (unicode == UnicodeType::UNICODE) {
|
|
208838
208957
|
has_unicode = true;
|
|
208839
208958
|
} else if (unicode == UnicodeType::INVALID) {
|
|
208840
|
-
throw InternalException(
|
|
208959
|
+
throw InternalException(
|
|
208960
|
+
ErrorManager::InvalidUnicodeError(string((char *)data, size), "segment statistics update"));
|
|
208841
208961
|
}
|
|
208842
208962
|
}
|
|
208843
208963
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "d4790e5b65"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1292"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -2149,7 +2149,7 @@ public:
|
|
|
2149
2149
|
//! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is
|
|
2150
2150
|
//! equivalent to calling SetFilePointer(location) followed by calling Read().
|
|
2151
2151
|
DUCKDB_API virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location);
|
|
2152
|
-
//! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be
|
|
2152
|
+
//! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is
|
|
2153
2153
|
//! equivalent to calling SetFilePointer(location) followed by calling Write().
|
|
2154
2154
|
DUCKDB_API virtual void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location);
|
|
2155
2155
|
//! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the
|
|
@@ -18644,6 +18644,7 @@ public:
|
|
|
18644
18644
|
namespace duckdb {
|
|
18645
18645
|
class CastFunctionSet;
|
|
18646
18646
|
class ClientContext;
|
|
18647
|
+
class ErrorManager;
|
|
18647
18648
|
class CompressionFunction;
|
|
18648
18649
|
class TableFunctionRef;
|
|
18649
18650
|
|
|
@@ -18728,8 +18729,6 @@ struct DBConfigOptions {
|
|
|
18728
18729
|
set<OptimizerType> disabled_optimizers;
|
|
18729
18730
|
//! Force a specific compression method to be used when checkpointing (if available)
|
|
18730
18731
|
CompressionType force_compression = CompressionType::COMPRESSION_AUTO;
|
|
18731
|
-
//! Debug flag that adds additional (unnecessary) free_list blocks to the storage
|
|
18732
|
-
bool debug_many_free_list_blocks = false;
|
|
18733
18732
|
//! Debug setting for window aggregation mode: (window, combine, separate)
|
|
18734
18733
|
WindowAggregationMode window_mode = WindowAggregationMode::WINDOW;
|
|
18735
18734
|
//! Whether or not preserving insertion order should be preserved
|
|
@@ -18772,6 +18771,8 @@ public:
|
|
|
18772
18771
|
vector<ParserExtension> parser_extensions;
|
|
18773
18772
|
//! Extensions made to the optimizer
|
|
18774
18773
|
vector<OptimizerExtension> optimizer_extensions;
|
|
18774
|
+
//! Error manager
|
|
18775
|
+
unique_ptr<ErrorManager> error_manager;
|
|
18775
18776
|
|
|
18776
18777
|
DUCKDB_API void AddExtensionOption(string name, string description, LogicalType parameter,
|
|
18777
18778
|
set_option_callback_t function = nullptr);
|
|
@@ -27846,6 +27847,9 @@ private:
|
|
|
27846
27847
|
vector<vector<LogicalType>> &best_sql_types_candidates,
|
|
27847
27848
|
map<LogicalTypeId, vector<string>> &best_format_candidates);
|
|
27848
27849
|
|
|
27850
|
+
void VerifyUTF8(idx_t col_idx);
|
|
27851
|
+
void VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset = 0);
|
|
27852
|
+
|
|
27849
27853
|
private:
|
|
27850
27854
|
//! Whether or not the current row's columns have overflown sql_types.size()
|
|
27851
27855
|
bool error_column_overflow = false;
|