duckdb 1.2.1-dev6.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
  3. package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
  4. package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
  5. package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
  6. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
  7. package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
  8. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
  9. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
  10. package/src/duckdb/extension/json/json_extension.cpp +8 -3
  11. package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
  12. package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
  13. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
  14. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
  15. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
  16. package/src/duckdb/src/catalog/catalog.cpp +12 -0
  17. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  18. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
  19. package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
  20. package/src/duckdb/src/common/bind_helpers.cpp +3 -0
  21. package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
  22. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  23. package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
  24. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  25. package/src/duckdb/src/execution/index/art/art.cpp +19 -6
  26. package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
  27. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
  28. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  29. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
  30. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
  34. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
  35. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
  38. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
  39. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
  40. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
  41. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
  42. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
  43. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
  44. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
  45. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
  46. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
  47. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
  48. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  49. package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
  50. package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
  51. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
  52. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
  53. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
  55. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
  58. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
  66. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
  67. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
  68. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
  69. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
  71. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
  73. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
  74. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
  75. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
  76. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
  77. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
  78. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
  79. package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
  80. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
  81. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
  83. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
  84. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
  85. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
  86. package/src/duckdb/src/include/duckdb.h +495 -480
  87. package/src/duckdb/src/main/attached_database.cpp +1 -1
  88. package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
  89. package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
  90. package/src/duckdb/src/main/config.cpp +7 -1
  91. package/src/duckdb/src/main/database.cpp +8 -8
  92. package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
  93. package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
  94. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  95. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
  96. package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
  97. package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
  98. package/src/duckdb/src/parallel/executor_task.cpp +10 -6
  99. package/src/duckdb/src/parallel/task_executor.cpp +4 -1
  100. package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
  101. package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
  102. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
  103. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  104. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
  105. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
  107. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
  108. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  109. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
  110. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  111. package/src/duckdb/src/storage/storage_manager.cpp +2 -2
  112. package/src/duckdb/src/storage/table/row_group.cpp +5 -6
  113. package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
  114. package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
  115. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
  116. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
  117. package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -247,7 +247,7 @@ void AttachedDatabase::Close() {
247
247
  }
248
248
  CheckpointOptions options;
249
249
  options.wal_action = CheckpointWALAction::DELETE_WAL;
250
- storage->CreateCheckpoint(options);
250
+ storage->CreateCheckpoint(nullptr, options);
251
251
  }
252
252
  } catch (...) { // NOLINT
253
253
  }
@@ -35,7 +35,11 @@ duckdb_state duckdb_open_internal(DBInstanceCacheWrapper *cache, const char *pat
35
35
  }
36
36
 
37
37
  if (cache) {
38
- wrapper->database = cache->instance_cache->GetOrCreateInstance(path, *db_config, true);
38
+ duckdb::string path_str;
39
+ if (path) {
40
+ path_str = path;
41
+ }
42
+ wrapper->database = cache->instance_cache->GetOrCreateInstance(path_str, *db_config, true);
39
43
  } else {
40
44
  wrapper->database = duckdb::make_shared_ptr<DuckDB>(path, db_config);
41
45
  }
@@ -78,6 +78,10 @@ LogicalTypeId ConvertCTypeToCPP(duckdb_type c_type) {
78
78
  return LogicalTypeId::VARINT;
79
79
  case DUCKDB_TYPE_SQLNULL:
80
80
  return LogicalTypeId::SQLNULL;
81
+ case DUCKDB_TYPE_STRING_LITERAL:
82
+ return LogicalTypeId::STRING_LITERAL;
83
+ case DUCKDB_TYPE_INTEGER_LITERAL:
84
+ return LogicalTypeId::INTEGER_LITERAL;
81
85
  default: // LCOV_EXCL_START
82
86
  D_ASSERT(0);
83
87
  return LogicalTypeId::INVALID;
@@ -160,6 +164,10 @@ duckdb_type ConvertCPPTypeToC(const LogicalType &sql_type) {
160
164
  return DUCKDB_TYPE_ANY;
161
165
  case LogicalTypeId::SQLNULL:
162
166
  return DUCKDB_TYPE_SQLNULL;
167
+ case LogicalTypeId::STRING_LITERAL:
168
+ return DUCKDB_TYPE_STRING_LITERAL;
169
+ case LogicalTypeId::INTEGER_LITERAL:
170
+ return DUCKDB_TYPE_INTEGER_LITERAL;
163
171
  default: // LCOV_EXCL_START
164
172
  D_ASSERT(0);
165
173
  return DUCKDB_TYPE_INVALID;
@@ -380,7 +380,13 @@ void DBConfig::AddExtensionOption(const string &name, string description, Logica
380
380
  const Value &default_value, set_option_callback_t function) {
381
381
  extension_parameters.insert(
382
382
  make_pair(name, ExtensionOption(std::move(description), std::move(parameter), function, default_value)));
383
- if (!default_value.IsNull()) {
383
+ // copy over unrecognized options, if they match the new extension option
384
+ auto iter = options.unrecognized_options.find(name);
385
+ if (iter != options.unrecognized_options.end()) {
386
+ options.set_variables[name] = iter->second;
387
+ options.unrecognized_options.erase(iter);
388
+ }
389
+ if (!default_value.IsNull() && options.set_variables.find(name) == options.set_variables.end()) {
384
390
  // Default value is set, insert it into the 'set_variables' list
385
391
  options.set_variables[name] = default_value;
386
392
  }
@@ -225,10 +225,11 @@ static void ThrowExtensionSetUnrecognizedOptions(const case_insensitive_map_t<Va
225
225
  }
226
226
 
227
227
  void DatabaseInstance::LoadExtensionSettings() {
228
- auto &unrecognized_options = config.options.unrecognized_options;
228
+ // copy the map, to protect against modifications during
229
+ auto unrecognized_options_copy = config.options.unrecognized_options;
229
230
 
230
231
  if (config.options.autoload_known_extensions) {
231
- if (unrecognized_options.empty()) {
232
+ if (unrecognized_options_copy.empty()) {
232
233
  // Nothing to do
233
234
  return;
234
235
  }
@@ -237,7 +238,7 @@ void DatabaseInstance::LoadExtensionSettings() {
237
238
  con.BeginTransaction();
238
239
 
239
240
  vector<string> extension_options;
240
- for (auto &option : unrecognized_options) {
241
+ for (auto &option : unrecognized_options_copy) {
241
242
  auto &name = option.first;
242
243
  auto &value = option.second;
243
244
 
@@ -254,18 +255,17 @@ void DatabaseInstance::LoadExtensionSettings() {
254
255
  if (it == config.extension_parameters.end()) {
255
256
  throw InternalException("Extension %s did not provide the '%s' config setting", extension_name, name);
256
257
  }
258
+ // if the extension provided the option, it should no longer be unrecognized.
259
+ D_ASSERT(config.options.unrecognized_options.find(name) == config.options.unrecognized_options.end());
257
260
  auto &context = *con.context;
258
261
  PhysicalSet::SetExtensionVariable(context, it->second, name, SetScope::GLOBAL, value);
259
262
  extension_options.push_back(name);
260
263
  }
261
264
 
262
- for (auto &option : extension_options) {
263
- unrecognized_options.erase(option);
264
- }
265
265
  con.Commit();
266
266
  }
267
- if (!unrecognized_options.empty()) {
268
- ThrowExtensionSetUnrecognizedOptions(unrecognized_options);
267
+ if (!config.options.unrecognized_options.empty()) {
268
+ ThrowExtensionSetUnrecognizedOptions(config.options.unrecognized_options);
269
269
  }
270
270
  }
271
271
 
@@ -121,6 +121,7 @@ static const DefaultExtension internal_extensions[] = {
121
121
  {"vss", "Adds indexing support to accelerate Vector Similarity Search", false},
122
122
  {"delta", "Adds support for Delta Lake", false},
123
123
  {"fts", "Adds support for Full-Text Search Indexes", false},
124
+ {"ui", "Adds local UI for DuckDB", false},
124
125
  {nullptr, nullptr, false}};
125
126
 
126
127
  idx_t ExtensionHelper::DefaultExtensionCount() {
@@ -139,7 +140,8 @@ DefaultExtension ExtensionHelper::GetDefaultExtension(idx_t index) {
139
140
  // Allow Auto-Install Extensions
140
141
  //===--------------------------------------------------------------------===//
141
142
  static const char *const auto_install[] = {"motherduck", "postgres_scanner", "mysql_scanner", "sqlite_scanner",
142
- "delta", "iceberg", "uc_catalog", nullptr};
143
+ "delta", "iceberg", "uc_catalog", "ui",
144
+ nullptr};
143
145
 
144
146
  // TODO: unify with new autoload mechanism
145
147
  bool ExtensionHelper::AllowAutoInstall(const string &extension) {
@@ -71,15 +71,11 @@ struct ExtensionAccess {
71
71
  static void SetError(duckdb_extension_info info, const char *error) {
72
72
  auto &load_state = DuckDBExtensionLoadState::Get(info);
73
73
 
74
- if (error) {
75
- load_state.has_error = true;
76
- load_state.error_data = ErrorData(error);
77
- } else {
78
- load_state.has_error = true;
79
- load_state.error_data = ErrorData(
80
- ExceptionType::UNKNOWN_TYPE,
81
- "Extension has indicated an error occured during initialization, but did not set an error message.");
82
- }
74
+ load_state.has_error = true;
75
+ load_state.error_data =
76
+ error ? ErrorData(error)
77
+ : ErrorData(ExceptionType::UNKNOWN_TYPE, "Extension has indicated an error occured during "
78
+ "initialization, but did not set an error message.");
83
79
  }
84
80
 
85
81
  //! Called by the extension get a pointer to the database that is loading it
@@ -92,9 +88,11 @@ struct ExtensionAccess {
92
88
  load_state.database_data->database = make_shared_ptr<DuckDB>(load_state.db);
93
89
  return reinterpret_cast<duckdb_database *>(load_state.database_data.get());
94
90
  } catch (std::exception &ex) {
91
+ load_state.has_error = true;
95
92
  load_state.error_data = ErrorData(ex);
96
93
  return nullptr;
97
94
  } catch (...) {
95
+ load_state.has_error = true;
98
96
  load_state.error_data =
99
97
  ErrorData(ExceptionType::UNKNOWN_TYPE, "Unknown error in GetDatabase when trying to load extension!");
100
98
  return nullptr;
@@ -125,8 +123,9 @@ struct ExtensionAccess {
125
123
  load_state.has_error = true;
126
124
  load_state.error_data =
127
125
  ErrorData(ExceptionType::UNKNOWN_TYPE,
128
- StringUtil::Format("Unknown ABI Type '%s' found when loading extension '%s'",
129
- load_state.init_result.abi_type, load_state.init_result.filename));
126
+ StringUtil::Format("Unknown ABI Type of value '%d' found when loading extension '%s'",
127
+ static_cast<uint8_t>(load_state.init_result.abi_type),
128
+ load_state.init_result.filename));
130
129
  return nullptr;
131
130
  }
132
131
 
@@ -590,7 +589,8 @@ void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs
590
589
  return;
591
590
  }
592
591
 
593
- throw IOException("Unknown ABI type '%s' for extension '%s'", extension_init_result.abi_type, extension);
592
+ throw IOException("Unknown ABI type of value '%s' for extension '%s'",
593
+ static_cast<uint8_t>(extension_init_result.abi_type), extension);
594
594
  #endif
595
595
  }
596
596
 
@@ -108,6 +108,7 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
108
108
  //! When RETURNING is used, a PROJECTION is the top level operator for INSERTS, UPDATES, and DELETES
109
109
  //! We still need to project all values from these operators so the projection
110
110
  //! on top of them can select from only the table values being inserted.
111
+ case LogicalOperatorType::LOGICAL_GET:
111
112
  case LogicalOperatorType::LOGICAL_UNION:
112
113
  case LogicalOperatorType::LOGICAL_EXCEPT:
113
114
  case LogicalOperatorType::LOGICAL_INTERSECT:
@@ -265,7 +265,6 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector<unique_ptr<LogicalO
265
265
  break;
266
266
  }
267
267
  }
268
-
269
268
  auto join = make_uniq<LogicalComparisonJoin>(chosen_filter->join_type);
270
269
  // Here we optimize build side probe side. Our build side is the right side
271
270
  // So the right plans should have lower cardinalities.
@@ -288,8 +287,9 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector<unique_ptr<LogicalO
288
287
  bool invert = !JoinRelationSet::IsSubset(*left.set, *f->left_set);
289
288
  // If the left and right set are inverted AND it is a semi or anti join
290
289
  // swap left and right children back.
290
+
291
291
  if (invert && (f->join_type == JoinType::SEMI || f->join_type == JoinType::ANTI)) {
292
- std::swap(left, right);
292
+ std::swap(join->children[0], join->children[1]);
293
293
  invert = false;
294
294
  }
295
295
 
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/planner/expression_iterator.hpp"
14
14
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
15
15
  #include "duckdb/main/client_config.hpp"
16
+ #include "duckdb/main/config.hpp"
16
17
 
17
18
  namespace duckdb {
18
19
 
@@ -357,9 +358,22 @@ bool LateMaterialization::TryLateMaterialization(unique_ptr<LogicalOperator> &op
357
358
  return true;
358
359
  }
359
360
 
360
- bool LateMaterialization::OptimizeLargeLimit(LogicalOperator &child) {
361
- // we only support large limits if the only
362
- reference<LogicalOperator> current_op = child;
361
+ bool LateMaterialization::OptimizeLargeLimit(LogicalLimit &limit, idx_t limit_val, bool has_offset) {
362
+ auto &config = DBConfig::GetConfig(optimizer.context);
363
+ if (!has_offset && !config.options.preserve_insertion_order) {
364
+ // we avoid optimizing large limits if preserve insertion order is false
365
+ // since the limit is executed in parallel anyway
366
+ return false;
367
+ }
368
+ // we only perform this optimization until a certain amount of maximum values to reduce memory constraints
369
+ // since we still materialize the set of row-ids in the hash table this optimization can increase memory pressure
370
+ // FIXME: make this configurable as well
371
+ static constexpr const idx_t LIMIT_MAX_VAL = 1000000;
372
+ if (limit_val > LIMIT_MAX_VAL) {
373
+ return false;
374
+ }
375
+ // we only support large limits if they are directly below the source
376
+ reference<LogicalOperator> current_op = *limit.children[0];
363
377
  while (current_op.get().type != LogicalOperatorType::LOGICAL_GET) {
364
378
  if (current_op.get().type != LogicalOperatorType::LOGICAL_PROJECTION) {
365
379
  return false;
@@ -376,11 +390,18 @@ unique_ptr<LogicalOperator> LateMaterialization::Optimize(unique_ptr<LogicalOper
376
390
  if (limit.limit_val.Type() != LimitNodeType::CONSTANT_VALUE) {
377
391
  break;
378
392
  }
379
- if (limit.limit_val.GetConstantValue() > max_row_count) {
393
+ auto limit_val = limit.limit_val.GetConstantValue();
394
+ bool has_offset = limit.offset_val.Type() != LimitNodeType::UNSET;
395
+ if (limit_val > max_row_count) {
380
396
  // for large limits - we may still want to do this optimization if the limit is consecutive
381
397
  // this is the case if there are only projections/get below the limit
382
398
  // if the row-ids are not consecutive doing the join can worsen performance
383
- if (!OptimizeLargeLimit(*limit.children[0])) {
399
+ if (!OptimizeLargeLimit(limit, limit_val, has_offset)) {
400
+ break;
401
+ }
402
+ } else {
403
+ // optimizing small limits really only makes sense if we have an offset
404
+ if (!has_offset) {
384
405
  break;
385
406
  }
386
407
  }
@@ -272,12 +272,23 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
272
272
 
273
273
  this->plan = std::move(plan_p);
274
274
 
275
+ for (auto &pre_optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) {
276
+ RunOptimizer(OptimizerType::EXTENSION, [&]() {
277
+ OptimizerExtensionInput input {GetContext(), *this, pre_optimizer_extension.optimizer_info.get()};
278
+ if (pre_optimizer_extension.pre_optimize_function) {
279
+ pre_optimizer_extension.pre_optimize_function(input, plan);
280
+ }
281
+ });
282
+ }
283
+
275
284
  RunBuiltInOptimizers();
276
285
 
277
286
  for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) {
278
287
  RunOptimizer(OptimizerType::EXTENSION, [&]() {
279
288
  OptimizerExtensionInput input {GetContext(), *this, optimizer_extension.optimizer_info.get()};
280
- optimizer_extension.optimize_function(input, plan);
289
+ if (optimizer_extension.optimize_function) {
290
+ optimizer_extension.optimize_function(input, plan);
291
+ }
281
292
  });
282
293
  }
283
294
 
@@ -1,4 +1,5 @@
1
- #include "duckdb/parallel/task.hpp"
1
+ #include "duckdb/parallel/executor_task.hpp"
2
+ #include "duckdb/parallel/task_notifier.hpp"
2
3
  #include "duckdb/execution/executor.hpp"
3
4
  #include "duckdb/main/client_context.hpp"
4
5
  #include "duckdb/parallel/thread_context.hpp"
@@ -6,13 +7,13 @@
6
7
  namespace duckdb {
7
8
 
8
9
  ExecutorTask::ExecutorTask(Executor &executor_p, shared_ptr<Event> event_p)
9
- : executor(executor_p), event(std::move(event_p)) {
10
+ : executor(executor_p), event(std::move(event_p)), context(executor_p.context) {
10
11
  executor.RegisterTask();
11
12
  }
12
13
 
13
- ExecutorTask::ExecutorTask(ClientContext &context, shared_ptr<Event> event_p, const PhysicalOperator &op_p)
14
- : executor(Executor::Get(context)), event(std::move(event_p)), op(&op_p) {
15
- thread_context = make_uniq<ThreadContext>(context);
14
+ ExecutorTask::ExecutorTask(ClientContext &context_p, shared_ptr<Event> event_p, const PhysicalOperator &op_p)
15
+ : executor(Executor::Get(context_p)), event(std::move(event_p)), op(&op_p), context(context_p) {
16
+ thread_context = make_uniq<ThreadContext>(context_p);
16
17
  executor.RegisterTask();
17
18
  }
18
19
 
@@ -38,6 +39,7 @@ TaskExecutionResult ExecutorTask::Execute(TaskExecutionMode mode) {
38
39
  if (thread_context) {
39
40
  TaskExecutionResult result;
40
41
  do {
42
+ TaskNotifier task_notifier {context};
41
43
  thread_context->profiler.StartOperator(op);
42
44
  // to allow continuous profiling, always execute in small steps
43
45
  result = ExecuteTask(TaskExecutionMode::PROCESS_PARTIAL);
@@ -46,7 +48,9 @@ TaskExecutionResult ExecutorTask::Execute(TaskExecutionMode mode) {
46
48
  } while (mode == TaskExecutionMode::PROCESS_ALL && result == TaskExecutionResult::TASK_NOT_FINISHED);
47
49
  return result;
48
50
  } else {
49
- return ExecuteTask(mode);
51
+ TaskNotifier task_notifier {context};
52
+ auto result = ExecuteTask(mode);
53
+ return result;
50
54
  }
51
55
  } catch (std::exception &ex) {
52
56
  executor.PushError(ErrorData(ex));
@@ -1,4 +1,5 @@
1
1
  #include "duckdb/parallel/task_executor.hpp"
2
+ #include "duckdb/parallel/task_notifier.hpp"
2
3
  #include "duckdb/parallel/task_scheduler.hpp"
3
4
 
4
5
  namespace duckdb {
@@ -7,7 +8,8 @@ TaskExecutor::TaskExecutor(TaskScheduler &scheduler)
7
8
  : scheduler(scheduler), token(scheduler.CreateProducer()), completed_tasks(0), total_tasks(0) {
8
9
  }
9
10
 
10
- TaskExecutor::TaskExecutor(ClientContext &context) : TaskExecutor(TaskScheduler::GetScheduler(context)) {
11
+ TaskExecutor::TaskExecutor(ClientContext &context_p) : TaskExecutor(TaskScheduler::GetScheduler(context_p)) {
12
+ context = context_p;
11
13
  }
12
14
 
13
15
  TaskExecutor::~TaskExecutor() {
@@ -69,6 +71,7 @@ TaskExecutionResult BaseExecutorTask::Execute(TaskExecutionMode mode) {
69
71
  return TaskExecutionResult::TASK_FINISHED;
70
72
  }
71
73
  try {
74
+ TaskNotifier task_notifier {executor.context};
72
75
  ExecuteTask();
73
76
  executor.FinishTask();
74
77
  return TaskExecutionResult::TASK_FINISHED;
@@ -0,0 +1,23 @@
1
+ #include "duckdb/parallel/task_notifier.hpp"
2
+ #include "duckdb/main/client_context.hpp"
3
+ #include "duckdb/main/client_context_state.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ TaskNotifier::TaskNotifier(optional_ptr<ClientContext> context_p) : context(context_p) {
8
+ if (context) {
9
+ for (auto &state : context->registered_state->States()) {
10
+ state->OnTaskStart(*context);
11
+ }
12
+ }
13
+ }
14
+
15
+ TaskNotifier::~TaskNotifier() {
16
+ if (context) {
17
+ for (auto &state : context->registered_state->States()) {
18
+ state->OnTaskStop(*context);
19
+ }
20
+ }
21
+ }
22
+
23
+ } // namespace duckdb
@@ -284,6 +284,39 @@ int32_t TaskScheduler::NumberOfThreads() {
284
284
  return current_thread_count.load();
285
285
  }
286
286
 
287
+ idx_t TaskScheduler::GetNumberOfTasks() const {
288
+ #ifndef DUCKDB_NO_THREADS
289
+ return queue->q.size_approx();
290
+ #else
291
+ idx_t task_count = 0;
292
+ for (auto &producer : queue->q) {
293
+ task_count += producer.second.size();
294
+ }
295
+ return task_count;
296
+ #endif
297
+ }
298
+
299
+ idx_t TaskScheduler::GetProducerCount() const {
300
+ #ifndef DUCKDB_NO_THREADS
301
+ return queue->q.size_producers_approx();
302
+ #else
303
+ return queue->q.size();
304
+ #endif
305
+ }
306
+
307
+ idx_t TaskScheduler::GetTaskCountForProducer(ProducerToken &token) const {
308
+ #ifndef DUCKDB_NO_THREADS
309
+ lock_guard<mutex> producer_lock(token.producer_lock);
310
+ return queue->q.size_producer_approx(token.token->queue_token);
311
+ #else
312
+ const auto it = queue->q.find(std::ref(*token.token));
313
+ if (it == queue->q.end()) {
314
+ return 0;
315
+ }
316
+ return it->second.size();
317
+ #endif
318
+ }
319
+
287
320
  void TaskScheduler::SetThreads(idx_t total_threads, idx_t external_threads) {
288
321
  if (total_threads == 0) {
289
322
  throw SyntaxException("Number of threads must be positive!");
@@ -107,6 +107,7 @@ unique_ptr<ParsedExpression> Transformer::TransformSubquery(duckdb_libpgquery::P
107
107
  }
108
108
  }
109
109
  // transform constants (e.g. ORDER BY 1) into positional references (ORDER BY #1)
110
+ idx_t array_idx = 0;
110
111
  if (aggr->order_bys) {
111
112
  for (auto &order : aggr->order_bys->orders) {
112
113
  if (order.expression->GetExpressionType() == ExpressionType::VALUE_CONSTANT) {
@@ -120,8 +121,10 @@ unique_ptr<ParsedExpression> Transformer::TransformSubquery(duckdb_libpgquery::P
120
121
  }
121
122
  } else if (sub_select) {
122
123
  // if we have a SELECT we can push the ORDER BY clause into the SELECT list and reference it
124
+ auto alias = "__array_internal_idx_" + to_string(++array_idx);
125
+ order.expression->alias = alias;
123
126
  sub_select->select_list.push_back(std::move(order.expression));
124
- order.expression = make_uniq<PositionalReferenceExpression>(sub_select->select_list.size() - 1);
127
+ order.expression = make_uniq<ColumnRefExpression>(alias);
125
128
  } else {
126
129
  // otherwise we remove order qualifications
127
130
  RemoveOrderQualificationRecursive(order.expression);
@@ -153,7 +153,7 @@ BindResult ExpressionBinder::BindExpression(SubqueryExpression &expr, idx_t dept
153
153
  }
154
154
  child = BoundCastExpression::AddCastToType(context, std::move(child), compare_type);
155
155
  result->child_types.push_back(subquery_type);
156
- result->child_target = compare_type;
156
+ result->child_targets.push_back(compare_type);
157
157
  result->children.push_back(std::move(child));
158
158
  }
159
159
  }
@@ -168,7 +168,8 @@ static unique_ptr<Expression> PlanUncorrelatedSubquery(Binder &binder, BoundSubq
168
168
  cond.left = std::move(expr.children[child_idx]);
169
169
  auto &child_type = expr.child_types[child_idx];
170
170
  cond.right = BoundCastExpression::AddDefaultCastToType(
171
- make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]), expr.child_target);
171
+ make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]),
172
+ expr.child_targets[child_idx]);
172
173
  cond.comparison = expr.comparison_type;
173
174
  join->conditions.push_back(std::move(cond));
174
175
  }
@@ -371,7 +372,8 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
371
372
  compare_cond.left = std::move(expr.children[child_idx]);
372
373
  auto &child_type = expr.child_types[child_idx];
373
374
  compare_cond.right = BoundCastExpression::AddDefaultCastToType(
374
- make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]), expr.child_target);
375
+ make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]),
376
+ expr.child_targets[child_idx]);
375
377
  compare_cond.comparison = expr.comparison_type;
376
378
  delim_join->conditions.push_back(std::move(compare_cond));
377
379
  }
@@ -46,6 +46,7 @@
46
46
  namespace duckdb {
47
47
 
48
48
  void Binder::BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema) {
49
+ CatalogEntryRetriever retriever(context);
49
50
  if (catalog.empty() && !schema.empty()) {
50
51
  // schema is specified - but catalog is not
51
52
  // try searching for the catalog instead
@@ -60,8 +61,12 @@ void Binder::BindSchemaOrCatalog(ClientContext &context, string &catalog, string
60
61
  catalog_names.push_back(DatabaseManager::GetDefaultDatabase(context));
61
62
  }
62
63
  for (auto &catalog_name : catalog_names) {
63
- auto &catalog = Catalog::GetCatalog(context, catalog_name);
64
- if (catalog.CheckAmbiguousCatalogOrSchema(context, schema)) {
64
+ auto catalog = Catalog::GetCatalogEntry(retriever, catalog_name);
65
+ if (!catalog) {
66
+ continue;
67
+ }
68
+ if (catalog->CheckAmbiguousCatalogOrSchema(context, schema)) {
69
+
65
70
  throw BinderException(
66
71
  "Ambiguous reference to catalog or schema \"%s\" - use a fully qualified path like \"%s.%s\"",
67
72
  schema, catalog_name, schema);
@@ -271,13 +271,14 @@ void Binder::BindDefaultValues(const ColumnList &columns, vector<unique_ptr<Expr
271
271
  schema_name = DEFAULT_SCHEMA;
272
272
  }
273
273
 
274
- // FIXME: We might want to save the existing search path of the binder
275
274
  vector<CatalogSearchEntry> defaults_search_path;
276
275
  defaults_search_path.emplace_back(catalog_name, schema_name);
277
276
  if (schema_name != DEFAULT_SCHEMA) {
278
277
  defaults_search_path.emplace_back(catalog_name, DEFAULT_SCHEMA);
279
278
  }
280
- entry_retriever.SetSearchPath(std::move(defaults_search_path));
279
+
280
+ auto default_binder = Binder::CreateBinder(context, *this);
281
+ default_binder->entry_retriever.SetSearchPath(std::move(defaults_search_path));
281
282
 
282
283
  for (auto &column : columns.Physical()) {
283
284
  unique_ptr<Expression> bound_default;
@@ -288,9 +289,9 @@ void Binder::BindDefaultValues(const ColumnList &columns, vector<unique_ptr<Expr
288
289
  if (default_copy->HasParameter()) {
289
290
  throw BinderException("DEFAULT values cannot contain parameters");
290
291
  }
291
- ConstantBinder default_binder(*this, context, "DEFAULT value");
292
- default_binder.target_type = column.Type();
293
- bound_default = default_binder.Bind(default_copy);
292
+ ConstantBinder default_value_binder(*default_binder, context, "DEFAULT value");
293
+ default_value_binder.target_type = column.Type();
294
+ bound_default = default_value_binder.Bind(default_copy);
294
295
  } else {
295
296
  // no default value specified: push a default value of constant null
296
297
  bound_default = make_uniq<BoundConstantExpression>(Value(column.Type()));
@@ -10,7 +10,8 @@
10
10
 
11
11
  namespace duckdb {
12
12
 
13
- TableDataWriter::TableDataWriter(TableCatalogEntry &table_p) : table(table_p.Cast<DuckTableEntry>()) {
13
+ TableDataWriter::TableDataWriter(TableCatalogEntry &table_p, optional_ptr<ClientContext> client_context_p)
14
+ : table(table_p.Cast<DuckTableEntry>()), client_context(client_context_p) {
14
15
  D_ASSERT(table_p.IsDuckTable());
15
16
  }
16
17
 
@@ -40,7 +41,8 @@ DatabaseInstance &TableDataWriter::GetDatabase() {
40
41
 
41
42
  SingleFileTableDataWriter::SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager,
42
43
  TableCatalogEntry &table, MetadataWriter &table_data_writer)
43
- : TableDataWriter(table), checkpoint_manager(checkpoint_manager), table_data_writer(table_data_writer) {
44
+ : TableDataWriter(table, checkpoint_manager.GetClientContext()), checkpoint_manager(checkpoint_manager),
45
+ table_data_writer(table_data_writer) {
44
46
  }
45
47
 
46
48
  unique_ptr<RowGroupWriter> SingleFileTableDataWriter::GetRowGroupWriter(RowGroup &row_group) {
@@ -36,10 +36,11 @@ namespace duckdb {
36
36
 
37
37
  void ReorderTableEntries(catalog_entry_vector_t &tables);
38
38
 
39
- SingleFileCheckpointWriter::SingleFileCheckpointWriter(AttachedDatabase &db, BlockManager &block_manager,
39
+ SingleFileCheckpointWriter::SingleFileCheckpointWriter(optional_ptr<ClientContext> client_context_p,
40
+ AttachedDatabase &db, BlockManager &block_manager,
40
41
  CheckpointType checkpoint_type)
41
- : CheckpointWriter(db), partial_block_manager(block_manager, PartialBlockType::FULL_CHECKPOINT),
42
- checkpoint_type(checkpoint_type) {
42
+ : CheckpointWriter(db), client_context(client_context_p),
43
+ partial_block_manager(block_manager, PartialBlockType::FULL_CHECKPOINT), checkpoint_type(checkpoint_type) {
43
44
  }
44
45
 
45
46
  BlockManager &SingleFileCheckpointWriter::GetBlockManager() {
@@ -385,12 +385,19 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
385
385
  uint32_t remaining = length;
386
386
  offset += sizeof(uint32_t);
387
387
 
388
- // allocate a buffer to store the string
389
- auto alloc_size = MaxValue<idx_t>(block_manager.GetBlockSize(), length);
390
- // allocate a buffer to store the compressed string
391
- // TODO: profile this to check if we need to reuse buffer
392
- auto target_handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, alloc_size);
393
- auto target_ptr = target_handle.Ptr();
388
+ BufferHandle target_handle;
389
+ string_t overflow_string;
390
+ data_ptr_t target_ptr;
391
+ bool allocate_block = length >= block_manager.GetBlockSize();
392
+ if (allocate_block) {
393
+ // overflow string is bigger than a block - allocate a temporary buffer for it
394
+ target_handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, length);
395
+ target_ptr = target_handle.Ptr();
396
+ } else {
397
+ // overflow string is smaller than a block - add it to the vector directly
398
+ overflow_string = StringVector::EmptyString(result, length);
399
+ target_ptr = data_ptr_cast(overflow_string.GetDataWriteable());
400
+ }
394
401
 
395
402
  // now append the string to the single buffer
396
403
  while (remaining > 0) {
@@ -408,10 +415,14 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
408
415
  offset = 0;
409
416
  }
410
417
  }
411
-
412
- auto final_buffer = target_handle.Ptr();
413
- StringVector::AddHandle(result, std::move(target_handle));
414
- return ReadString(final_buffer, 0, length);
418
+ if (allocate_block) {
419
+ auto final_buffer = target_handle.Ptr();
420
+ StringVector::AddHandle(result, std::move(target_handle));
421
+ return ReadString(final_buffer, 0, length);
422
+ } else {
423
+ overflow_string.Finalize();
424
+ return overflow_string;
425
+ }
415
426
  }
416
427
 
417
428
  // read the overflow string from memory
@@ -77,6 +77,7 @@ static const StorageVersionInfo storage_version_info[] = {
77
77
  {"v1.1.2", 64},
78
78
  {"v1.1.3", 64},
79
79
  {"v1.2.0", 65},
80
+ {"v1.2.1", 65},
80
81
  {nullptr, 0}
81
82
  };
82
83
  // END OF STORAGE VERSION INFO
@@ -96,6 +97,7 @@ static const SerializationVersionInfo serialization_version_info[] = {
96
97
  {"v1.1.2", 3},
97
98
  {"v1.1.3", 3},
98
99
  {"v1.2.0", 4},
100
+ {"v1.2.1", 4},
99
101
  {"latest", 4},
100
102
  {nullptr, 0}
101
103
  };
@@ -355,7 +355,7 @@ bool SingleFileStorageManager::IsCheckpointClean(MetaBlockPointer checkpoint_id)
355
355
  return block_manager->IsRootBlock(checkpoint_id);
356
356
  }
357
357
 
358
- void SingleFileStorageManager::CreateCheckpoint(CheckpointOptions options) {
358
+ void SingleFileStorageManager::CreateCheckpoint(optional_ptr<ClientContext> client_context, CheckpointOptions options) {
359
359
  if (InMemory() || read_only || !load_complete) {
360
360
  return;
361
361
  }
@@ -366,7 +366,7 @@ void SingleFileStorageManager::CreateCheckpoint(CheckpointOptions options) {
366
366
  if (GetWALSize() > 0 || config.options.force_checkpoint || options.action == CheckpointAction::ALWAYS_CHECKPOINT) {
367
367
  // we only need to checkpoint if there is anything in the WAL
368
368
  try {
369
- SingleFileCheckpointWriter checkpointer(db, *block_manager, options.type);
369
+ SingleFileCheckpointWriter checkpointer(client_context, db, *block_manager, options.type);
370
370
  checkpointer.CreateCheckpoint();
371
371
  } catch (std::exception &ex) {
372
372
  ErrorData error(ex);