duckdb 0.5.2-dev1275.0 → 0.5.2-dev1292.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1275.0",
5
+ "version": "0.5.2-dev1292.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -623,7 +623,7 @@ public:
623
623
  //===----------------------------------------------------------------------===//
624
624
  // DuckDB
625
625
  //
626
- // extension_functions.hpp
626
+ // duckdb/main/extension_functions.hpp
627
627
  //
628
628
  //
629
629
  //===----------------------------------------------------------------------===//
@@ -20261,7 +20261,7 @@ public:
20261
20261
  //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is
20262
20262
  //! equivalent to calling SetFilePointer(location) followed by calling Read().
20263
20263
  void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override;
20264
- //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be read. This is
20264
+ //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is
20265
20265
  //! equivalent to calling SetFilePointer(location) followed by calling Write().
20266
20266
  void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override;
20267
20267
  //! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the
@@ -47492,6 +47492,66 @@ private:
47492
47492
 
47493
47493
  } // namespace duckdb
47494
47494
 
47495
+ //===----------------------------------------------------------------------===//
47496
+ // DuckDB
47497
+ //
47498
+ // duckdb/main/error_manager.hpp
47499
+ //
47500
+ //
47501
+ //===----------------------------------------------------------------------===//
47502
+
47503
+
47504
+
47505
+
47506
+
47507
+
47508
+
47509
+ namespace duckdb {
47510
+ class ClientContext;
47511
+ class DatabaseInstance;
47512
+
47513
+ enum class ErrorType : uint16_t {
47514
+ // error message types
47515
+ UNSIGNED_EXTENSION = 0,
47516
+
47517
+ // this should always be the last value
47518
+ ERROR_COUNT,
47519
+ INVALID = 65535,
47520
+ };
47521
+
47522
+ //! The error manager class is responsible for formatting error messages
47523
+ //! It allows for error messages to be overridden by extensions and clients
47524
+ class ErrorManager {
47525
+ public:
47526
+ template <typename... Args>
47527
+ string FormatException(ErrorType error_type, Args... params) {
47528
+ vector<ExceptionFormatValue> values;
47529
+ return FormatExceptionRecursive(error_type, values, params...);
47530
+ }
47531
+
47532
+ DUCKDB_API string FormatExceptionRecursive(ErrorType error_type, vector<ExceptionFormatValue> &values);
47533
+
47534
+ template <class T, typename... Args>
47535
+ string FormatExceptionRecursive(ErrorType error_type, vector<ExceptionFormatValue> &values, T param,
47536
+ Args... params) {
47537
+ values.push_back(ExceptionFormatValue::CreateFormatValue<T>(param));
47538
+ return FormatExceptionRecursive(error_type, values, params...);
47539
+ }
47540
+
47541
+ DUCKDB_API static string InvalidUnicodeError(const string &input, const string &context);
47542
+
47543
+ //! Adds a custom error for a specific error type
47544
+ void AddCustomError(ErrorType type, string new_error);
47545
+
47546
+ DUCKDB_API ErrorManager &Get(ClientContext &context);
47547
+ DUCKDB_API ErrorManager &Get(DatabaseInstance &context);
47548
+
47549
+ private:
47550
+ map<ErrorType, string> custom_errors;
47551
+ };
47552
+
47553
+ } // namespace duckdb
47554
+
47495
47555
 
47496
47556
  #include <utility>
47497
47557
  #include <cmath>
@@ -47528,7 +47588,7 @@ Value::Value(string_t val) : Value(string(val.GetDataUnsafe(), val.GetSize())) {
47528
47588
 
47529
47589
  Value::Value(string val) : type_(LogicalType::VARCHAR), is_null(false), str_value(move(val)) {
47530
47590
  if (!Value::StringIsValid(str_value.c_str(), str_value.size())) {
47531
- throw Exception("String value is not valid UTF8");
47591
+ throw Exception(ErrorManager::InvalidUnicodeError(str_value, "value construction"));
47532
47592
  }
47533
47593
  }
47534
47594
 
@@ -76473,6 +76533,7 @@ string PhysicalTopN::ParamsToString() const {
76473
76533
 
76474
76534
 
76475
76535
 
76536
+
76476
76537
  #include <algorithm>
76477
76538
  #include <cctype>
76478
76539
  #include <cstring>
@@ -77506,8 +77567,10 @@ void BufferedCSVReader::DetectCandidateTypes(const vector<LogicalType> &type_can
77506
77567
  // try cast from string to sql_type
77507
77568
  Value dummy_val;
77508
77569
  if (is_header_row) {
77570
+ VerifyUTF8(col, 0, header_row, -int64_t(parse_chunk.size()));
77509
77571
  dummy_val = header_row.GetValue(col, 0);
77510
77572
  } else {
77573
+ VerifyUTF8(col, row, parse_chunk);
77511
77574
  dummy_val = parse_chunk.GetValue(col, row);
77512
77575
  }
77513
77576
  // try formatting for date types if the user did not specify one and it starts with numeric values.
@@ -78361,9 +78424,9 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
78361
78424
  error_column_overflow = true;
78362
78425
  return;
78363
78426
  } else {
78364
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
78365
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
78366
- options.ToString());
78427
+ throw InvalidInputException(
78428
+ "Error in file \"%s\", on line %s: expected %lld values per row, but got more. (%s)", options.file_path,
78429
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), options.ToString());
78367
78430
  }
78368
78431
  }
78369
78432
 
@@ -78433,9 +78496,9 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
78433
78496
  column = 0;
78434
78497
  return false;
78435
78498
  } else {
78436
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
78437
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
78438
- options.ToString());
78499
+ throw InvalidInputException(
78500
+ "Error in file \"%s\" on line %s: expected %lld values per row, but got %d. (%s)", options.file_path,
78501
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column, options.ToString());
78439
78502
  }
78440
78503
  }
78441
78504
 
@@ -78473,6 +78536,38 @@ void BufferedCSVReader::SetNullUnionCols(DataChunk &insert_chunk) {
78473
78536
  }
78474
78537
  }
78475
78538
 
78539
+ void BufferedCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset) {
78540
+ D_ASSERT(col_idx < chunk.data.size());
78541
+ D_ASSERT(row_idx < chunk.size());
78542
+ auto &v = chunk.data[col_idx];
78543
+ if (FlatVector::IsNull(v, row_idx)) {
78544
+ return;
78545
+ }
78546
+
78547
+ auto parse_data = FlatVector::GetData<string_t>(chunk.data[col_idx]);
78548
+ auto s = parse_data[row_idx];
78549
+ auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
78550
+ if (utf_type == UnicodeType::INVALID) {
78551
+ string col_name = to_string(col_idx);
78552
+ if (col_idx < col_names.size()) {
78553
+ col_name = "\"" + col_names[col_idx] + "\"";
78554
+ }
78555
+ int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
78556
+ D_ASSERT(error_line >= 0);
78557
+ throw InvalidInputException("Error in file \"%s\" at line %llu in column \"%s\": "
78558
+ "%s. Parser options: %s",
78559
+ options.file_path, error_line, col_name,
78560
+ ErrorManager::InvalidUnicodeError(s.GetString(), "CSV file"), options.ToString());
78561
+ }
78562
+ }
78563
+
78564
+ void BufferedCSVReader::VerifyUTF8(idx_t col_idx) {
78565
+ D_ASSERT(col_idx < parse_chunk.data.size());
78566
+ for (idx_t i = 0; i < parse_chunk.size(); i++) {
78567
+ VerifyUTF8(col_idx, i, parse_chunk);
78568
+ }
78569
+ }
78570
+
78476
78571
  void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
78477
78572
  if (parse_chunk.size() == 0) {
78478
78573
  return;
@@ -78486,23 +78581,7 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
78486
78581
  if (sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
78487
78582
  // target type is varchar: no need to convert
78488
78583
  // just test that all strings are valid utf-8 strings
78489
- auto parse_data = FlatVector::GetData<string_t>(parse_chunk.data[col_idx]);
78490
- for (idx_t i = 0; i < parse_chunk.size(); i++) {
78491
- if (!FlatVector::IsNull(parse_chunk.data[col_idx], i)) {
78492
- auto s = parse_data[i];
78493
- auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
78494
- if (utf_type == UnicodeType::INVALID) {
78495
- string col_name = to_string(col_idx);
78496
- if (col_idx < col_names.size()) {
78497
- col_name = "\"" + col_names[col_idx] + "\"";
78498
- }
78499
- throw InvalidInputException("Error in file \"%s\" between line %llu and %llu in column \"%s\": "
78500
- "file is not valid UTF8. Parser options: %s",
78501
- options.file_path, linenr - parse_chunk.size(), linenr, col_name,
78502
- options.ToString());
78503
- }
78504
- }
78505
- }
78584
+ VerifyUTF8(col_idx);
78506
78585
  insert_chunk.data[insert_cols_idx[col_idx]].Reference(parse_chunk.data[col_idx]);
78507
78586
  } else {
78508
78587
  string error_message;
@@ -78536,17 +78615,27 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
78536
78615
  col_name = "\"" + col_names[col_idx] + "\"";
78537
78616
  }
78538
78617
 
78618
+ // figure out the exact line number
78619
+ idx_t row_idx;
78620
+ for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
78621
+ auto &inserted_column = insert_chunk.data[col_idx];
78622
+ auto &parsed_column = parse_chunk.data[col_idx];
78623
+
78624
+ if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
78625
+ break;
78626
+ }
78627
+ }
78628
+ auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
78629
+
78539
78630
  if (options.auto_detect) {
78540
- throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
78631
+ throw InvalidInputException("%s in column %s, at line %llu. Parser "
78541
78632
  "options: %s. Consider either increasing the sample size "
78542
78633
  "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
78543
78634
  "or skipping column conversion (ALL_VARCHAR=1)",
78544
- error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
78545
- options.ToString());
78635
+ error_message, col_name, error_line, options.ToString());
78546
78636
  } else {
78547
- throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
78548
- error_message, linenr - parse_chunk.size(), linenr, col_name,
78549
- options.ToString());
78637
+ throw InvalidInputException("%s at line %llu in column %s. Parser options: %s ", error_message,
78638
+ error_line, col_name, options.ToString());
78550
78639
  }
78551
78640
  }
78552
78641
  }
@@ -132105,14 +132194,6 @@ struct DebugForceNoCrossProduct {
132105
132194
  static Value GetSetting(ClientContext &context);
132106
132195
  };
132107
132196
 
132108
- struct DebugManyFreeListBlocks {
132109
- static constexpr const char *Name = "debug_many_free_list_blocks";
132110
- static constexpr const char *Description = "DEBUG SETTING: add additional blocks to the free list";
132111
- static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
132112
- static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
132113
- static Value GetSetting(ClientContext &context);
132114
- };
132115
-
132116
132197
  struct DebugWindowMode {
132117
132198
  static constexpr const char *Name = "debug_window_mode";
132118
132199
  static constexpr const char *Description = "DEBUG SETTING: switch window mode to use";
@@ -132396,7 +132477,6 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
132396
132477
  DUCKDB_GLOBAL(DebugCheckpointAbort),
132397
132478
  DUCKDB_LOCAL(DebugForceExternal),
132398
132479
  DUCKDB_LOCAL(DebugForceNoCrossProduct),
132399
- DUCKDB_GLOBAL(DebugManyFreeListBlocks),
132400
132480
  DUCKDB_GLOBAL(DebugWindowMode),
132401
132481
  DUCKDB_GLOBAL_LOCAL(DefaultCollationSetting),
132402
132482
  DUCKDB_GLOBAL(DefaultOrderSetting),
@@ -133130,6 +133210,7 @@ public:
133130
133210
 
133131
133211
 
133132
133212
 
133213
+
133133
133214
  #ifndef DUCKDB_NO_THREADS
133134
133215
 
133135
133216
  #endif
@@ -133140,6 +133221,7 @@ DBConfig::DBConfig() {
133140
133221
  compression_functions = make_unique<CompressionFunctionSet>();
133141
133222
  replacement_opens.push_back(ExtensionPrefixReplacementOpen());
133142
133223
  cast_functions = make_unique<CastFunctionSet>();
133224
+ error_manager = make_unique<ErrorManager>();
133143
133225
  }
133144
133226
 
133145
133227
  DBConfig::DBConfig(std::unordered_map<string, string> &config_dict, bool read_only) {
@@ -133357,17 +133439,15 @@ Allocator &Allocator::Get(DatabaseInstance &db) {
133357
133439
  }
133358
133440
 
133359
133441
  void DatabaseInstance::Configure(DBConfig &new_config) {
133360
- config.options.database_path = new_config.options.database_path;
133361
- config.options.access_mode = AccessMode::READ_WRITE;
133362
- if (new_config.options.access_mode != AccessMode::UNDEFINED) {
133363
- config.options.access_mode = new_config.options.access_mode;
133442
+ config.options = new_config.options;
133443
+ if (config.options.access_mode == AccessMode::UNDEFINED) {
133444
+ config.options.access_mode = AccessMode::READ_WRITE;
133364
133445
  }
133365
133446
  if (new_config.file_system) {
133366
133447
  config.file_system = move(new_config.file_system);
133367
133448
  } else {
133368
133449
  config.file_system = make_unique<VirtualFileSystem>();
133369
133450
  }
133370
- config.options.maximum_memory = new_config.options.maximum_memory;
133371
133451
  if (config.options.maximum_memory == (idx_t)-1) {
133372
133452
  auto memory = FileSystem::GetAvailableMemory();
133373
133453
  if (memory != DConstants::INVALID_INDEX) {
@@ -133380,29 +133460,15 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
133380
133460
  #else
133381
133461
  config.options.maximum_threads = 1;
133382
133462
  #endif
133383
- } else {
133384
- config.options.maximum_threads = new_config.options.maximum_threads;
133385
133463
  }
133386
- config.options.external_threads = new_config.options.external_threads;
133387
- config.options.load_extensions = new_config.options.load_extensions;
133388
- config.options.force_compression = new_config.options.force_compression;
133389
133464
  config.allocator = move(new_config.allocator);
133390
133465
  if (!config.allocator) {
133391
133466
  config.allocator = make_unique<Allocator>();
133392
133467
  }
133393
- config.options.checkpoint_wal_size = new_config.options.checkpoint_wal_size;
133394
- config.options.use_direct_io = new_config.options.use_direct_io;
133395
- config.options.temporary_directory = new_config.options.temporary_directory;
133396
- config.options.collation = new_config.options.collation;
133397
- config.options.default_order_type = new_config.options.default_order_type;
133398
- config.options.default_null_order = new_config.options.default_null_order;
133399
- config.options.enable_external_access = new_config.options.enable_external_access;
133400
- config.options.allow_unsigned_extensions = new_config.options.allow_unsigned_extensions;
133401
133468
  config.replacement_scans = move(new_config.replacement_scans);
133402
- config.replacement_opens = move(new_config.replacement_opens); // TODO is this okay?
133403
- config.options.initialize_default_database = new_config.options.initialize_default_database;
133404
- config.options.disabled_optimizers = move(new_config.options.disabled_optimizers);
133469
+ config.replacement_opens = move(new_config.replacement_opens);
133405
133470
  config.parser_extensions = move(new_config.parser_extensions);
133471
+ config.error_manager = move(new_config.error_manager);
133406
133472
  }
133407
133473
 
133408
133474
  DBConfig &DBConfig::GetConfig(ClientContext &context) {
@@ -133558,6 +133624,76 @@ shared_ptr<DuckDB> DBInstanceCache::CreateInstance(const string &database, DBCon
133558
133624
 
133559
133625
 
133560
133626
 
133627
+ namespace duckdb {
133628
+
133629
+ struct DefaultError {
133630
+ ErrorType type;
133631
+ const char *error;
133632
+ };
133633
+
133634
+ static DefaultError internal_errors[] = {
133635
+ {ErrorType::UNSIGNED_EXTENSION,
133636
+ "Extension \"%s\" could not be loaded because its signature is either missing or invalid and unsigned extensions "
133637
+ "are disabled by configuration (allow_unsigned_extensions)"},
133638
+ {ErrorType::INVALID, nullptr}};
133639
+
133640
+ string ErrorManager::FormatExceptionRecursive(ErrorType error_type, vector<ExceptionFormatValue> &values) {
133641
+ if (error_type >= ErrorType::ERROR_COUNT) {
133642
+ throw InternalException("Invalid error type passed to ErrorManager::FormatError");
133643
+ }
133644
+ auto entry = custom_errors.find(error_type);
133645
+ string error;
133646
+ if (entry == custom_errors.end()) {
133647
+ // error was not overwritten
133648
+ error = internal_errors[int(error_type)].error;
133649
+ } else {
133650
+ // error was overwritten
133651
+ error = entry->second;
133652
+ }
133653
+ return ExceptionFormatValue::Format(error, values);
133654
+ }
133655
+
133656
+ string ErrorManager::InvalidUnicodeError(const string &input, const string &context) {
133657
+ UnicodeInvalidReason reason;
133658
+ size_t pos;
133659
+ auto unicode = Utf8Proc::Analyze((const char *)input.c_str(), input.size(), &reason, &pos);
133660
+ if (unicode != UnicodeType::INVALID) {
133661
+ return "Invalid unicode error thrown but no invalid unicode detected in " + context;
133662
+ }
133663
+ string base_message;
133664
+ switch (reason) {
133665
+ case UnicodeInvalidReason::NULL_BYTE:
133666
+ base_message = "Null-byte (\\0)";
133667
+ break;
133668
+ case UnicodeInvalidReason::BYTE_MISMATCH:
133669
+ base_message = "Invalid unicode (byte sequence mismatch)";
133670
+ break;
133671
+ case UnicodeInvalidReason::INVALID_UNICODE:
133672
+ base_message = "Invalid unicode";
133673
+ break;
133674
+ default:
133675
+ break;
133676
+ }
133677
+ return base_message + " detected in " + context;
133678
+ }
133679
+
133680
+ void ErrorManager::AddCustomError(ErrorType type, string new_error) {
133681
+ custom_errors.insert(make_pair(type, move(new_error)));
133682
+ }
133683
+
133684
+ ErrorManager &ErrorManager::Get(ClientContext &context) {
133685
+ return *DBConfig::GetConfig(context).error_manager;
133686
+ }
133687
+
133688
+ ErrorManager &ErrorManager::Get(DatabaseInstance &context) {
133689
+ return *DBConfig::GetConfig(context).error_manager;
133690
+ }
133691
+
133692
+ } // namespace duckdb
133693
+
133694
+
133695
+
133696
+
133561
133697
 
133562
133698
 
133563
133699
 
@@ -142416,6 +142552,7 @@ inline std::string GetDLError(void) {
142416
142552
 
142417
142553
 
142418
142554
 
142555
+
142419
142556
  // LICENSE_CHANGE_BEGIN
142420
142557
  // The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #12
142421
142558
  // See the end of this file for a list
@@ -142513,10 +142650,7 @@ ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileOpener *o
142513
142650
  }
142514
142651
  }
142515
142652
  if (!any_valid && !config.options.allow_unsigned_extensions) {
142516
- throw IOException(
142517
- "Extension \"%s\" could not be loaded because its signature is either missing or "
142518
- "invalid and unsigned extensions are disabled by configuration (allow_unsigned_extensions)",
142519
- filename);
142653
+ throw IOException(config.error_manager->FormatException(ErrorType::UNSIGNED_EXTENSION, filename));
142520
142654
  }
142521
142655
  }
142522
142656
  auto lib_hdl = dlopen(filename.c_str(), RTLD_NOW | RTLD_LOCAL);
@@ -146358,18 +146492,6 @@ Value DebugForceNoCrossProduct::GetSetting(ClientContext &context) {
146358
146492
  return Value::BOOLEAN(ClientConfig::GetConfig(context).force_no_cross_product);
146359
146493
  }
146360
146494
 
146361
- //===--------------------------------------------------------------------===//
146362
- // Debug Many Free List blocks
146363
- //===--------------------------------------------------------------------===//
146364
- void DebugManyFreeListBlocks::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
146365
- config.options.debug_many_free_list_blocks = input.GetValue<bool>();
146366
- }
146367
-
146368
- Value DebugManyFreeListBlocks::GetSetting(ClientContext &context) {
146369
- auto &config = DBConfig::GetConfig(context);
146370
- return Value::BOOLEAN(config.options.debug_many_free_list_blocks);
146371
- }
146372
-
146373
146495
  //===--------------------------------------------------------------------===//
146374
146496
  // Debug Window Mode
146375
146497
  //===--------------------------------------------------------------------===//
@@ -207987,10 +208109,6 @@ vector<block_id_t> SingleFileBlockManager::GetFreeListBlocks() {
207987
208109
  // a bit from the max block size
207988
208110
  auto space_in_block = Storage::BLOCK_SIZE - 4 * sizeof(block_id_t);
207989
208111
  auto total_blocks = (total_size + space_in_block - 1) / space_in_block;
207990
- auto &config = DBConfig::GetConfig(db);
207991
- if (config.options.debug_many_free_list_blocks) {
207992
- total_blocks++;
207993
- }
207994
208112
  D_ASSERT(total_size > 0);
207995
208113
  D_ASSERT(total_blocks > 0);
207996
208114
 
@@ -208749,6 +208867,7 @@ void SegmentStatistics::Reset() {
208749
208867
 
208750
208868
 
208751
208869
 
208870
+
208752
208871
  namespace duckdb {
208753
208872
 
208754
208873
  StringStatistics::StringStatistics(LogicalType type_p, StatisticsType stats_type)
@@ -208837,7 +208956,8 @@ void StringStatistics::Update(const string_t &value) {
208837
208956
  if (unicode == UnicodeType::UNICODE) {
208838
208957
  has_unicode = true;
208839
208958
  } else if (unicode == UnicodeType::INVALID) {
208840
- throw InternalException("Invalid unicode detected in segment statistics update!");
208959
+ throw InternalException(
208960
+ ErrorManager::InvalidUnicodeError(string((char *)data, size), "segment statistics update"));
208841
208961
  }
208842
208962
  }
208843
208963
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "469752987"
15
- #define DUCKDB_VERSION "v0.5.2-dev1275"
14
+ #define DUCKDB_SOURCE_ID "d4790e5b65"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1292"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -2149,7 +2149,7 @@ public:
2149
2149
  //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is
2150
2150
  //! equivalent to calling SetFilePointer(location) followed by calling Read().
2151
2151
  DUCKDB_API virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location);
2152
- //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be read. This is
2152
+ //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is
2153
2153
  //! equivalent to calling SetFilePointer(location) followed by calling Write().
2154
2154
  DUCKDB_API virtual void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location);
2155
2155
  //! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the
@@ -18644,6 +18644,7 @@ public:
18644
18644
  namespace duckdb {
18645
18645
  class CastFunctionSet;
18646
18646
  class ClientContext;
18647
+ class ErrorManager;
18647
18648
  class CompressionFunction;
18648
18649
  class TableFunctionRef;
18649
18650
 
@@ -18728,8 +18729,6 @@ struct DBConfigOptions {
18728
18729
  set<OptimizerType> disabled_optimizers;
18729
18730
  //! Force a specific compression method to be used when checkpointing (if available)
18730
18731
  CompressionType force_compression = CompressionType::COMPRESSION_AUTO;
18731
- //! Debug flag that adds additional (unnecessary) free_list blocks to the storage
18732
- bool debug_many_free_list_blocks = false;
18733
18732
  //! Debug setting for window aggregation mode: (window, combine, separate)
18734
18733
  WindowAggregationMode window_mode = WindowAggregationMode::WINDOW;
18735
18734
  //! Whether or not preserving insertion order should be preserved
@@ -18772,6 +18771,8 @@ public:
18772
18771
  vector<ParserExtension> parser_extensions;
18773
18772
  //! Extensions made to the optimizer
18774
18773
  vector<OptimizerExtension> optimizer_extensions;
18774
+ //! Error manager
18775
+ unique_ptr<ErrorManager> error_manager;
18775
18776
 
18776
18777
  DUCKDB_API void AddExtensionOption(string name, string description, LogicalType parameter,
18777
18778
  set_option_callback_t function = nullptr);
@@ -27846,6 +27847,9 @@ private:
27846
27847
  vector<vector<LogicalType>> &best_sql_types_candidates,
27847
27848
  map<LogicalTypeId, vector<string>> &best_format_candidates);
27848
27849
 
27850
+ void VerifyUTF8(idx_t col_idx);
27851
+ void VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset = 0);
27852
+
27849
27853
  private:
27850
27854
  //! Whether or not the current row's columns have overflown sql_types.size()
27851
27855
  bool error_column_overflow = false;