duckdb 0.7.2-dev1803.0 → 0.7.2-dev1867.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/catalog/catalog.cpp +27 -27
  3. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +6 -6
  4. package/src/duckdb/src/catalog/catalog_set.cpp +27 -25
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +4 -4
  6. package/src/duckdb/src/catalog/default/default_types.cpp +4 -4
  7. package/src/duckdb/src/catalog/default/default_views.cpp +4 -4
  8. package/src/duckdb/src/catalog/dependency_list.cpp +7 -6
  9. package/src/duckdb/src/catalog/dependency_manager.cpp +44 -38
  10. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +11 -6
  11. package/src/duckdb/src/common/types/batched_data_collection.cpp +2 -1
  12. package/src/duckdb/src/common/types/column_data_allocator.cpp +1 -0
  13. package/src/duckdb/src/common/types/vector.cpp +2 -2
  14. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +14 -11
  15. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  16. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +51 -50
  17. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -13
  18. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +20 -20
  19. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +2 -2
  20. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +1 -1
  21. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +3 -3
  22. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  23. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  24. package/src/duckdb/src/execution/physical_plan/plan_update.cpp +1 -1
  25. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +3 -3
  26. package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -1
  27. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +29 -29
  28. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +106 -0
  29. package/src/duckdb/src/function/scalar/string/regexp.cpp +145 -28
  30. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  31. package/src/duckdb/src/function/table/checkpoint.cpp +4 -4
  32. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +24 -24
  33. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +7 -6
  34. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +1 -1
  35. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +11 -11
  36. package/src/duckdb/src/function/table/system/pragma_database_size.cpp +1 -1
  37. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +17 -18
  38. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  39. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  40. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +9 -9
  41. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_map.hpp +38 -0
  42. package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +4 -3
  43. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +2 -2
  44. package/src/duckdb/src/include/duckdb/catalog/default/default_types.hpp +2 -2
  45. package/src/duckdb/src/include/duckdb/catalog/default/default_views.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -5
  47. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +4 -5
  48. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +10 -9
  49. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -1
  50. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -0
  52. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +29 -6
  53. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +6 -5
  54. package/src/duckdb/src/include/duckdb/common/serializer.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/common/types/row_data_collection.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +5 -5
  58. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +2 -2
  59. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +3 -2
  60. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  61. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +7 -7
  64. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +11 -11
  65. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +8 -8
  66. package/src/duckdb/src/include/duckdb/optimizer/rule/arithmetic_simplification.hpp +1 -1
  67. package/src/duckdb/src/include/duckdb/optimizer/rule/case_simplification.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/optimizer/rule/comparison_simplification.hpp +1 -1
  69. package/src/duckdb/src/include/duckdb/optimizer/rule/conjunction_simplification.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/optimizer/rule/constant_folding.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/optimizer/rule/date_part_simplification.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/optimizer/rule/distributivity.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/optimizer/rule/enum_comparison.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/optimizer/rule/equal_or_null_simplification.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/rule/in_clause_simplification.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/optimizer/rule/like_optimizations.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/optimizer/rule/move_constants.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +1 -1
  80. package/src/duckdb/src/include/duckdb/optimizer/rule/regex_optimizations.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/parser/expression_map.hpp +19 -6
  84. package/src/duckdb/src/include/duckdb/parser/expression_util.hpp +1 -1
  85. package/src/duckdb/src/include/duckdb/planner/expression.hpp +5 -2
  86. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +3 -3
  88. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +10 -2
  89. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +1 -0
  90. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +49 -126
  91. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +5 -5
  92. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +159 -0
  93. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -0
  94. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -5
  95. package/src/duckdb/src/main/client_context.cpp +1 -1
  96. package/src/duckdb/src/main/database.cpp +2 -1
  97. package/src/duckdb/src/main/database_manager.cpp +4 -4
  98. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
  99. package/src/duckdb/src/optimizer/cse_optimizer.cpp +4 -4
  100. package/src/duckdb/src/optimizer/deliminator.cpp +13 -11
  101. package/src/duckdb/src/optimizer/expression_rewriter.cpp +2 -2
  102. package/src/duckdb/src/optimizer/filter_combiner.cpp +67 -65
  103. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -0
  104. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +26 -25
  105. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +23 -21
  106. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +3 -3
  107. package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
  108. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +6 -7
  109. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +9 -8
  110. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +7 -7
  111. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +3 -3
  112. package/src/duckdb/src/optimizer/rule/distributivity.cpp +5 -5
  113. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +6 -6
  114. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +4 -4
  115. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +23 -26
  116. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +2 -3
  117. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -3
  118. package/src/duckdb/src/optimizer/rule/move_constants.cpp +6 -6
  119. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
  120. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +3 -3
  121. package/src/duckdb/src/parser/expression_util.cpp +6 -6
  122. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +3 -3
  123. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -2
  124. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +3 -3
  125. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +5 -5
  126. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -2
  127. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  128. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +3 -3
  129. package/src/duckdb/src/storage/buffer/block_handle.cpp +3 -2
  130. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -1
  131. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -0
  132. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +3 -0
  133. package/src/duckdb/src/storage/buffer_manager.cpp +35 -726
  134. package/src/duckdb/src/storage/checkpoint_manager.cpp +2 -2
  135. package/src/duckdb/src/storage/meta_block_reader.cpp +6 -5
  136. package/src/duckdb/src/storage/standard_buffer_manager.cpp +801 -0
  137. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  138. package/src/duckdb/src/transaction/meta_transaction.cpp +13 -13
  139. package/src/duckdb/src/transaction/transaction.cpp +1 -1
  140. package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
  141. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  142. package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -19,10 +19,10 @@ PhysicalBatchInsert::PhysicalBatchInsert(vector<LogicalType> types, TableCatalog
19
19
  bound_defaults(std::move(bound_defaults)) {
20
20
  }
21
21
 
22
- PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry *schema,
22
+ PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry &schema,
23
23
  unique_ptr<BoundCreateTableInfo> info_p, idx_t estimated_cardinality)
24
24
  : PhysicalOperator(PhysicalOperatorType::BATCH_CREATE_TABLE_AS, op.types, estimated_cardinality),
25
- insert_table(nullptr), schema(schema), info(std::move(info_p)) {
25
+ insert_table(nullptr), schema(&schema), info(std::move(info_p)) {
26
26
  PhysicalInsert::GetInsertInfo(*info, insert_types, bound_defaults);
27
27
  }
28
28
 
@@ -100,7 +100,7 @@ public:
100
100
  }
101
101
 
102
102
  mutex lock;
103
- DuckTableEntry *table;
103
+ optional_ptr<DuckTableEntry> table;
104
104
  idx_t insert_count;
105
105
  map<idx_t, unique_ptr<RowGroupCollection>> collections;
106
106
 
@@ -156,7 +156,8 @@ public:
156
156
  }
157
157
 
158
158
  void AddCollection(ClientContext &context, idx_t batch_index, unique_ptr<RowGroupCollection> current_collection,
159
- OptimisticDataWriter *writer = nullptr, bool *written_to_disk = nullptr) {
159
+ optional_ptr<OptimisticDataWriter> writer = nullptr,
160
+ optional_ptr<bool> written_to_disk = nullptr) {
160
161
  vector<unique_ptr<RowGroupCollection>> merge_collections;
161
162
  idx_t merge_count;
162
163
  {
@@ -240,7 +241,7 @@ public:
240
241
  idx_t current_index;
241
242
  TableAppendState current_append_state;
242
243
  unique_ptr<RowGroupCollection> current_collection;
243
- OptimisticDataWriter *writer;
244
+ optional_ptr<OptimisticDataWriter> writer;
244
245
  bool written_to_disk;
245
246
 
246
247
  void FlushToDisk() {
@@ -253,9 +254,9 @@ public:
253
254
  writer->FlushToDisk(*current_collection, true);
254
255
  }
255
256
 
256
- void CreateNewCollection(DuckTableEntry *table, const vector<LogicalType> &insert_types) {
257
- auto &table_info = table->GetStorage().info;
258
- auto &block_manager = TableIOManager::Get(table->GetStorage()).GetBlockManagerForRowData();
257
+ void CreateNewCollection(DuckTableEntry &table, const vector<LogicalType> &insert_types) {
258
+ auto &table_info = table.GetStorage().info;
259
+ auto &block_manager = TableIOManager::Get(table.GetStorage()).GetBlockManagerForRowData();
259
260
  current_collection = make_uniq<RowGroupCollection>(table_info, block_manager, insert_types, MAX_ROW_ID);
260
261
  current_collection->InitializeEmpty();
261
262
  current_collection->InitializeAppend(current_append_state);
@@ -269,8 +270,8 @@ unique_ptr<GlobalSinkState> PhysicalBatchInsert::GetGlobalSinkState(ClientContex
269
270
  // CREATE TABLE AS
270
271
  D_ASSERT(!insert_table);
271
272
  auto &catalog = *schema->catalog;
272
- result->table =
273
- (DuckTableEntry *)catalog.CreateTable(catalog.GetCatalogTransaction(context), schema, info.get());
273
+ result->table = (DuckTableEntry *)catalog.CreateTable(catalog.GetCatalogTransaction(context),
274
+ *schema.get_mutable(), info.get());
274
275
  } else {
275
276
  D_ASSERT(insert_table);
276
277
  D_ASSERT(insert_table->IsDuckTable());
@@ -289,12 +290,12 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
289
290
  auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
290
291
 
291
292
  auto table = gstate.table;
292
- PhysicalInsert::ResolveDefaults(table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
293
+ PhysicalInsert::ResolveDefaults(*table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
293
294
 
294
295
  if (!lstate.current_collection) {
295
296
  lock_guard<mutex> l(gstate.lock);
296
297
  // no collection yet: create a new one
297
- lstate.CreateNewCollection(table, insert_types);
298
+ lstate.CreateNewCollection(*table, insert_types);
298
299
  lstate.writer = gstate.table->GetStorage().CreateOptimisticWriter(context.client);
299
300
  } else if (lstate.current_index != lstate.batch_index) {
300
301
  // batch index has changed: move the old collection to the global state and create a new collection
@@ -303,7 +304,7 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
303
304
  lstate.FlushToDisk();
304
305
  gstate.AddCollection(context.client, lstate.current_index, std::move(lstate.current_collection), lstate.writer,
305
306
  &lstate.written_to_disk);
306
- lstate.CreateNewCollection(table, insert_types);
307
+ lstate.CreateNewCollection(*table, insert_types);
307
308
  }
308
309
  lstate.current_index = lstate.batch_index;
309
310
 
@@ -53,10 +53,10 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry *t
53
53
  }
54
54
  }
55
55
 
56
- PhysicalInsert::PhysicalInsert(LogicalOperator &op, SchemaCatalogEntry *schema, unique_ptr<BoundCreateTableInfo> info_p,
56
+ PhysicalInsert::PhysicalInsert(LogicalOperator &op, SchemaCatalogEntry &schema, unique_ptr<BoundCreateTableInfo> info_p,
57
57
  idx_t estimated_cardinality, bool parallel)
58
58
  : PhysicalOperator(PhysicalOperatorType::CREATE_TABLE_AS, op.types, estimated_cardinality), insert_table(nullptr),
59
- return_chunk(false), schema(schema), info(std::move(info_p)), parallel(parallel),
59
+ return_chunk(false), schema(&schema), info(std::move(info_p)), parallel(parallel),
60
60
  action_type(OnConflictAction::THROW) {
61
61
  GetInsertInfo(*info, insert_types, bound_defaults);
62
62
  }
@@ -80,7 +80,7 @@ public:
80
80
  }
81
81
 
82
82
  mutex lock;
83
- DuckTableEntry *table;
83
+ optional_ptr<DuckTableEntry> table;
84
84
  idx_t insert_count;
85
85
  bool initialized;
86
86
  LocalAppendState append_state;
@@ -99,7 +99,7 @@ public:
99
99
  ExpressionExecutor default_executor;
100
100
  TableAppendState local_append_state;
101
101
  unique_ptr<RowGroupCollection> local_collection;
102
- OptimisticDataWriter *writer;
102
+ optional_ptr<OptimisticDataWriter> writer;
103
103
  // Rows that have been updated by a DO UPDATE conflict
104
104
  unordered_set<row_t> updated_rows;
105
105
  };
@@ -110,8 +110,8 @@ unique_ptr<GlobalSinkState> PhysicalInsert::GetGlobalSinkState(ClientContext &co
110
110
  // CREATE TABLE AS
111
111
  D_ASSERT(!insert_table);
112
112
  auto &catalog = *schema->catalog;
113
- result->table =
114
- (DuckTableEntry *)catalog.CreateTable(catalog.GetCatalogTransaction(context), schema, info.get());
113
+ result->table = (DuckTableEntry *)catalog.CreateTable(catalog.GetCatalogTransaction(context),
114
+ *schema.get_mutable(), info.get());
115
115
  } else {
116
116
  D_ASSERT(insert_table);
117
117
  D_ASSERT(insert_table->IsDuckTable());
@@ -124,7 +124,7 @@ unique_ptr<LocalSinkState> PhysicalInsert::GetLocalSinkState(ExecutionContext &c
124
124
  return make_uniq<InsertLocalState>(context.client, insert_types, bound_defaults);
125
125
  }
126
126
 
127
- void PhysicalInsert::ResolveDefaults(TableCatalogEntry *table, DataChunk &chunk,
127
+ void PhysicalInsert::ResolveDefaults(const TableCatalogEntry &table, DataChunk &chunk,
128
128
  const physical_index_vector_t<idx_t> &column_index_map,
129
129
  ExpressionExecutor &default_executor, DataChunk &result) {
130
130
  chunk.Flatten();
@@ -135,7 +135,7 @@ void PhysicalInsert::ResolveDefaults(TableCatalogEntry *table, DataChunk &chunk,
135
135
 
136
136
  if (!column_index_map.empty()) {
137
137
  // columns specified by the user, use column_index_map
138
- for (auto &col : table->GetColumns().Physical()) {
138
+ for (auto &col : table.GetColumns().Physical()) {
139
139
  auto storage_idx = col.StorageOid();
140
140
  auto mapped_index = column_index_map[col.Physical()];
141
141
  if (mapped_index == DConstants::INVALID_INDEX) {
@@ -216,7 +216,7 @@ void PhysicalInsert::CombineExistingAndInsertTuples(DataChunk &result, DataChunk
216
216
  result.SetCardinality(input_chunk.size());
217
217
  }
218
218
 
219
- void PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry *table,
219
+ void PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry &table,
220
220
  Vector &row_ids) const {
221
221
  if (action_type == OnConflictAction::NOTHING) {
222
222
  return;
@@ -255,9 +255,9 @@ void PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChun
255
255
  executor.Execute(chunk, update_chunk);
256
256
  update_chunk.SetCardinality(chunk);
257
257
 
258
- auto &data_table = table->GetStorage();
258
+ auto &data_table = table.GetStorage();
259
259
  // Perform the update, using the results of the SET expressions
260
- data_table.Update(*table, context.client, row_ids, set_columns, update_chunk);
260
+ data_table.Update(table, context.client, row_ids, set_columns, update_chunk);
261
261
  }
262
262
 
263
263
  // TODO: should we use a hash table to keep track of this instead?
@@ -274,11 +274,11 @@ void PhysicalInsert::RegisterUpdatedRows(InsertLocalState &lstate, const Vector
274
274
  }
275
275
  }
276
276
 
277
- void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionContext &context,
277
+ void PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionContext &context,
278
278
  InsertLocalState &lstate) const {
279
- auto &data_table = table->GetStorage();
279
+ auto &data_table = table.GetStorage();
280
280
  if (action_type == OnConflictAction::THROW) {
281
- data_table.VerifyAppendConstraints(*table, context.client, lstate.insert_chunk, nullptr);
281
+ data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, nullptr);
282
282
  return;
283
283
  }
284
284
  // Check whether any conflicts arise, and if they all meet the conflict_target + condition
@@ -287,7 +287,7 @@ void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionConte
287
287
  // We either want to do nothing, or perform an update when conflicts arise
288
288
  ConflictInfo conflict_info(conflict_target);
289
289
  ConflictManager conflict_manager(VerifyExistenceType::APPEND, lstate.insert_chunk.size(), &conflict_info);
290
- data_table.VerifyAppendConstraints(*table, context.client, lstate.insert_chunk, &conflict_manager);
290
+ data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, &conflict_manager);
291
291
  conflict_manager.Finalize();
292
292
  if (conflict_manager.ConflictCount() == 0) {
293
293
  // No conflicts found
@@ -312,7 +312,7 @@ void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionConte
312
312
  // then we scan the existing table for the conflicting tuples, using the rowids
313
313
  scan_chunk.Initialize(context.client, types_to_fetch);
314
314
  auto fetch_state = make_uniq<ColumnFetchState>();
315
- auto &transaction = DuckTransaction::Get(context.client, *table->catalog);
315
+ auto &transaction = DuckTransaction::Get(context.client, *table.catalog);
316
316
  data_table.Fetch(transaction, scan_chunk, columns_to_fetch, row_ids, conflicts.Count(), *fetch_state);
317
317
  }
318
318
 
@@ -335,7 +335,7 @@ void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionConte
335
335
  }
336
336
  combined_chunk.Slice(sel.Selection(), sel.Count());
337
337
  row_ids.Slice(sel.Selection(), sel.Count());
338
- data_table.VerifyAppendConstraints(*table, context.client, combined_chunk, nullptr);
338
+ data_table.VerifyAppendConstraints(table, context.client, combined_chunk, nullptr);
339
339
  throw InternalException("The previous operation was expected to throw but didn't");
340
340
  }
341
341
  }
@@ -359,7 +359,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
359
359
 
360
360
  auto table = gstate.table;
361
361
  auto &storage = table->GetStorage();
362
- PhysicalInsert::ResolveDefaults(table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
362
+ PhysicalInsert::ResolveDefaults(*table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
363
363
 
364
364
  if (!parallel) {
365
365
  if (!gstate.initialized) {
@@ -367,7 +367,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
367
367
  gstate.initialized = true;
368
368
  }
369
369
 
370
- OnConflictHandling(table, context, lstate);
370
+ OnConflictHandling(*table, context, lstate);
371
371
  storage.LocalAppend(gstate.append_state, *table, context.client, lstate.insert_chunk, true);
372
372
 
373
373
  if (return_chunk) {
@@ -387,7 +387,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
387
387
  lstate.local_collection->InitializeAppend(lstate.local_append_state);
388
388
  lstate.writer = gstate.table->GetStorage().CreateOptimisticWriter(context.client);
389
389
  }
390
- OnConflictHandling(table, context, lstate);
390
+ OnConflictHandling(*table, context, lstate);
391
391
  auto new_row_group = lstate.local_collection->Append(lstate.insert_chunk, lstate.local_append_state);
392
392
  if (new_row_group) {
393
393
  lstate.writer->CheckFlushToDisk(*lstate.local_collection);
@@ -7,7 +7,7 @@
7
7
 
8
8
  namespace duckdb {
9
9
 
10
- PhysicalCreateTable::PhysicalCreateTable(LogicalOperator &op, SchemaCatalogEntry *schema,
10
+ PhysicalCreateTable::PhysicalCreateTable(LogicalOperator &op, SchemaCatalogEntry &schema,
11
11
  unique_ptr<BoundCreateTableInfo> info, idx_t estimated_cardinality)
12
12
  : PhysicalOperator(PhysicalOperatorType::CREATE_TABLE, op.types, estimated_cardinality), schema(schema),
13
13
  info(std::move(info)) {
@@ -34,7 +34,7 @@ void PhysicalCreateTable::GetData(ExecutionContext &context, DataChunk &chunk, G
34
34
  if (state.finished) {
35
35
  return;
36
36
  }
37
- auto &catalog = *schema->catalog;
37
+ auto &catalog = *schema.catalog;
38
38
  catalog.CreateTable(catalog.GetCatalogTransaction(context.client), schema, info.get());
39
39
  state.finished = true;
40
40
  }
@@ -43,7 +43,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
43
43
  make_uniq<PhysicalTableScan>(op.info->scan_types, op.function, std::move(op.bind_data), op.info->column_ids,
44
44
  op.info->names, std::move(table_filters), op.estimated_cardinality);
45
45
 
46
- dependencies.AddDependency(&op.table);
46
+ dependencies.AddDependency(op.table);
47
47
  op.info->column_ids.pop_back();
48
48
 
49
49
  D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size());
@@ -21,10 +21,10 @@ unique_ptr<PhysicalOperator> DuckCatalog::PlanCreateTableAs(ClientContext &conte
21
21
  auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
22
22
  unique_ptr<PhysicalOperator> create;
23
23
  if (!parallel_streaming_insert && use_batch_index) {
24
- create = make_uniq<PhysicalBatchInsert>(op, op.schema, std::move(op.info), op.estimated_cardinality);
24
+ create = make_uniq<PhysicalBatchInsert>(op, *op.schema, std::move(op.info), op.estimated_cardinality);
25
25
 
26
26
  } else {
27
- create = make_uniq<PhysicalInsert>(op, op.schema, std::move(op.info), op.estimated_cardinality,
27
+ create = make_uniq<PhysicalInsert>(op, *op.schema, std::move(op.info), op.estimated_cardinality,
28
28
  parallel_streaming_insert && num_threads > 1);
29
29
  }
30
30
 
@@ -42,7 +42,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl
42
42
  auto plan = CreatePlan(*op.children[0]);
43
43
  return op.schema->catalog->PlanCreateTableAs(context, op, std::move(plan));
44
44
  } else {
45
- return make_uniq<PhysicalCreateTable>(op, op.schema, std::move(op.info), op.estimated_cardinality);
45
+ return make_uniq<PhysicalCreateTable>(op, *op.schema, std::move(op.info), op.estimated_cardinality);
46
46
  }
47
47
  }
48
48
 
@@ -25,7 +25,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDelete &op
25
25
 
26
26
  auto plan = CreatePlan(*op.children[0]);
27
27
 
28
- dependencies.AddDependency(op.table);
28
+ dependencies.AddDependency(*op.table);
29
29
  return op.table->catalog->PlanDelete(context, op, std::move(plan));
30
30
  }
31
31
 
@@ -106,7 +106,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
106
106
  D_ASSERT(op.children.size() == 1);
107
107
  plan = CreatePlan(*op.children[0]);
108
108
  }
109
- dependencies.AddDependency(op.table);
109
+ dependencies.AddDependency(*op.table);
110
110
  return op.table->catalog->PlanInsert(context, op, std::move(plan));
111
111
  }
112
112
 
@@ -22,7 +22,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalUpdate &op
22
22
 
23
23
  auto plan = CreatePlan(*op.children[0]);
24
24
 
25
- dependencies.AddDependency(op.table);
25
+ dependencies.AddDependency(*op.table);
26
26
  return op.table->catalog->PlanUpdate(context, op, std::move(plan));
27
27
  }
28
28
 
@@ -429,15 +429,15 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateE
429
429
  // similarly, we only need to ORDER BY each aggregate once
430
430
  expression_set_t seen_expressions;
431
431
  for (auto &target : groups) {
432
- seen_expressions.insert(target.get());
432
+ seen_expressions.insert(*target);
433
433
  }
434
434
  vector<BoundOrderByNode> new_order_nodes;
435
435
  for (auto &order_node : expr.order_bys->orders) {
436
- if (seen_expressions.find(order_node.expression.get()) != seen_expressions.end()) {
436
+ if (seen_expressions.find(*order_node.expression) != seen_expressions.end()) {
437
437
  // we do not need to order by this node
438
438
  continue;
439
439
  }
440
- seen_expressions.insert(order_node.expression.get());
440
+ seen_expressions.insert(*order_node.expression);
441
441
  new_order_nodes.push_back(std::move(order_node));
442
442
  }
443
443
  if (new_order_nodes.empty()) {
@@ -6,7 +6,8 @@
6
6
 
7
7
  namespace duckdb {
8
8
 
9
- BindCastInput::BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, optional_ptr<ClientContext> context)
9
+ BindCastInput::BindCastInput(CastFunctionSet &function_set, optional_ptr<BindCastInfo> info,
10
+ optional_ptr<ClientContext> context)
10
11
  : function_set(function_set), info(info), context(context) {
11
12
  }
12
13
 
@@ -15,11 +15,11 @@
15
15
  namespace duckdb {
16
16
 
17
17
  struct NextvalBindData : public FunctionData {
18
- explicit NextvalBindData(SequenceCatalogEntry *sequence) : sequence(sequence) {
18
+ explicit NextvalBindData(optional_ptr<SequenceCatalogEntry> sequence) : sequence(sequence) {
19
19
  }
20
20
 
21
21
  //! The sequence to use for the nextval computation; only if the sequence is a constant
22
- SequenceCatalogEntry *sequence;
22
+ optional_ptr<SequenceCatalogEntry> sequence;
23
23
 
24
24
  unique_ptr<FunctionData> Copy() const override {
25
25
  return make_uniq<NextvalBindData>(sequence);
@@ -32,45 +32,45 @@ struct NextvalBindData : public FunctionData {
32
32
  };
33
33
 
34
34
  struct CurrentSequenceValueOperator {
35
- static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry *seq) {
36
- lock_guard<mutex> seqlock(seq->lock);
35
+ static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
36
+ lock_guard<mutex> seqlock(seq.lock);
37
37
  int64_t result;
38
- if (seq->usage_count == 0u) {
38
+ if (seq.usage_count == 0u) {
39
39
  throw SequenceException("currval: sequence is not yet defined in this session");
40
40
  }
41
- result = seq->last_value;
41
+ result = seq.last_value;
42
42
  return result;
43
43
  }
44
44
  };
45
45
 
46
46
  struct NextSequenceValueOperator {
47
- static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry *seq) {
48
- lock_guard<mutex> seqlock(seq->lock);
47
+ static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
48
+ lock_guard<mutex> seqlock(seq.lock);
49
49
  int64_t result;
50
- result = seq->counter;
51
- bool overflow = !TryAddOperator::Operation(seq->counter, seq->increment, seq->counter);
52
- if (seq->cycle) {
50
+ result = seq.counter;
51
+ bool overflow = !TryAddOperator::Operation(seq.counter, seq.increment, seq.counter);
52
+ if (seq.cycle) {
53
53
  if (overflow) {
54
- seq->counter = seq->increment < 0 ? seq->max_value : seq->min_value;
55
- } else if (seq->counter < seq->min_value) {
56
- seq->counter = seq->max_value;
57
- } else if (seq->counter > seq->max_value) {
58
- seq->counter = seq->min_value;
54
+ seq.counter = seq.increment < 0 ? seq.max_value : seq.min_value;
55
+ } else if (seq.counter < seq.min_value) {
56
+ seq.counter = seq.max_value;
57
+ } else if (seq.counter > seq.max_value) {
58
+ seq.counter = seq.min_value;
59
59
  }
60
60
  } else {
61
- if (result < seq->min_value || (overflow && seq->increment < 0)) {
62
- throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq->name,
63
- seq->min_value);
61
+ if (result < seq.min_value || (overflow && seq.increment < 0)) {
62
+ throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq.name,
63
+ seq.min_value);
64
64
  }
65
- if (result > seq->max_value || overflow) {
66
- throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq->name,
67
- seq->max_value);
65
+ if (result > seq.max_value || overflow) {
66
+ throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq.name,
67
+ seq.max_value);
68
68
  }
69
69
  }
70
- seq->last_value = result;
71
- seq->usage_count++;
72
- if (!seq->temporary) {
73
- transaction.sequence_usage[seq] = SequenceValue(seq->usage_count, seq->counter);
70
+ seq.last_value = result;
71
+ seq.usage_count++;
72
+ if (!seq.temporary) {
73
+ transaction.sequence_usage[&seq] = SequenceValue(seq.usage_count, seq.counter);
74
74
  }
75
75
  return result;
76
76
  }
@@ -98,7 +98,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
98
98
  auto result_data = FlatVector::GetData<int64_t>(result);
99
99
  for (idx_t i = 0; i < args.size(); i++) {
100
100
  // get the next value from the sequence
101
- result_data[i] = OP::Operation(transaction, info.sequence);
101
+ result_data[i] = OP::Operation(transaction, *info.sequence);
102
102
  }
103
103
  } else {
104
104
  // sequence to use comes from the input
@@ -107,7 +107,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
107
107
  auto sequence = BindSequence(context, value.GetString());
108
108
  // finally get the next value from the sequence
109
109
  auto &transaction = DuckTransaction::Get(context, *sequence->catalog);
110
- return OP::Operation(transaction, sequence);
110
+ return OP::Operation(transaction, *sequence);
111
111
  });
112
112
  }
113
113
  }
@@ -129,7 +129,7 @@ static unique_ptr<FunctionData> NextValBind(ClientContext &context, ScalarFuncti
129
129
  static void NextValDependency(BoundFunctionExpression &expr, DependencyList &dependencies) {
130
130
  auto &info = expr.bind_info->Cast<NextvalBindData>();
131
131
  if (info.sequence) {
132
- dependencies.AddDependency(info.sequence);
132
+ dependencies.AddDependency(*info.sequence);
133
133
  }
134
134
  }
135
135
 
@@ -0,0 +1,106 @@
1
+ #include "duckdb/function/scalar/string_functions.hpp"
2
+ #include "duckdb/common/map.hpp"
3
+ #include "duckdb/common/vector.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ // Using Lowrance-Wagner (LW) algorithm: https://doi.org/10.1145%2F321879.321880
8
+ // Can't calculate as trivial modification to levenshtein algorithm
9
+ // as we need to potentially know about earlier in the string
10
+ static idx_t DamerauLevenshteinDistance(const string_t &source, const string_t &target) {
11
+ // costs associated with each type of edit, to aid readability
12
+ constexpr uint8_t COST_SUBSTITUTION = 1;
13
+ constexpr uint8_t COST_INSERTION = 1;
14
+ constexpr uint8_t COST_DELETION = 1;
15
+ constexpr uint8_t COST_TRANSPOSITION = 1;
16
+ const auto source_len = source.GetSize();
17
+ const auto target_len = target.GetSize();
18
+
19
+ // If one string is empty, the distance equals the length of the other string
20
+ // either through target_len insertions
21
+ // or source_len deletions
22
+ if (source_len == 0) {
23
+ return target_len * COST_INSERTION;
24
+ } else if (target_len == 0) {
25
+ return source_len * COST_DELETION;
26
+ }
27
+
28
+ const auto source_str = source.GetDataUnsafe();
29
+ const auto target_str = target.GetDataUnsafe();
30
+
31
+ // larger than the largest possible value:
32
+ const auto inf = source_len * COST_DELETION + target_len * COST_INSERTION + 1;
33
+ // minimum edit distance from prefix of source string to prefix of target string
34
+ // same object as H in LW paper (with indices offset by 1)
35
+ vector<vector<idx_t>> distance(source_len + 2, vector<idx_t>(target_len + 2, inf));
36
+ // keeps track of the largest string indices of source string matching each character
37
+ // same as DA in LW paper
38
+ map<char, idx_t> largest_source_chr_matching;
39
+
40
+ // initialise row/column corresponding to zero-length strings
41
+ // partial string -> empty requires a deletion for each character
42
+ for (idx_t source_idx = 0; source_idx <= source_len; source_idx++) {
43
+ distance[source_idx + 1][1] = source_idx * COST_DELETION;
44
+ }
45
+ // and empty -> partial string means simply inserting characters
46
+ for (idx_t target_idx = 1; target_idx <= target_len; target_idx++) {
47
+ distance[1][target_idx + 1] = target_idx * COST_INSERTION;
48
+ }
49
+ // loop through string indices - these are offset by 2 from distance indices
50
+ for (idx_t source_idx = 0; source_idx < source_len; source_idx++) {
51
+ // keeps track of the largest string indices of target string matching current source character
52
+ // same as DB in LW paper
53
+ idx_t largest_target_chr_matching;
54
+ largest_target_chr_matching = 0;
55
+ for (idx_t target_idx = 0; target_idx < target_len; target_idx++) {
56
+ // correspond to i1 and j1 in LW paper respectively
57
+ idx_t largest_source_chr_matching_target;
58
+ idx_t largest_target_chr_matching_source;
59
+ // cost associated to diagnanl shift in distance matrix
60
+ // corresponds to d in LW paper
61
+ uint8_t cost_diagonal_shift;
62
+ largest_source_chr_matching_target = largest_source_chr_matching[target_str[target_idx]];
63
+ largest_target_chr_matching_source = largest_target_chr_matching;
64
+ // if characters match, diagonal move costs nothing and we update our largest target index
65
+ // otherwise move is substitution and costs as such
66
+ if (source_str[source_idx] == target_str[target_idx]) {
67
+ cost_diagonal_shift = 0;
68
+ largest_target_chr_matching = target_idx + 1;
69
+ } else {
70
+ cost_diagonal_shift = COST_SUBSTITUTION;
71
+ }
72
+ distance[source_idx + 2][target_idx + 2] = MinValue(
73
+ distance[source_idx + 1][target_idx + 1] + cost_diagonal_shift,
74
+ MinValue(distance[source_idx + 2][target_idx + 1] + COST_INSERTION,
75
+ MinValue(distance[source_idx + 1][target_idx + 2] + COST_DELETION,
76
+ distance[largest_source_chr_matching_target][largest_target_chr_matching_source] +
77
+ (source_idx - largest_source_chr_matching_target) * COST_DELETION +
78
+ COST_TRANSPOSITION +
79
+ (target_idx - largest_target_chr_matching_source) * COST_INSERTION)));
80
+ }
81
+ largest_source_chr_matching[source_str[source_idx]] = source_idx + 1;
82
+ }
83
+ return distance[source_len + 1][target_len + 1];
84
+ }
85
+
86
+ static int64_t DamerauLevenshteinScalarFunction(Vector &result, const string_t source, const string_t target) {
87
+ return (int64_t)DamerauLevenshteinDistance(source, target);
88
+ }
89
+
90
+ static void DamerauLevenshteinFunction(DataChunk &args, ExpressionState &state, Vector &result) {
91
+ auto &source_vec = args.data[0];
92
+ auto &target_vec = args.data[1];
93
+
94
+ BinaryExecutor::Execute<string_t, string_t, int64_t>(
95
+ source_vec, target_vec, result, args.size(),
96
+ [&](string_t source, string_t target) { return DamerauLevenshteinScalarFunction(result, source, target); });
97
+ }
98
+
99
+ void DamerauLevenshteinFun::RegisterFunction(BuiltinFunctions &set) {
100
+ ScalarFunctionSet damerau_levenshtein("damerau_levenshtein");
101
+ damerau_levenshtein.AddFunction(ScalarFunction("damerau_levenshtein", {LogicalType::VARCHAR, LogicalType::VARCHAR},
102
+ LogicalType::BIGINT, DamerauLevenshteinFunction));
103
+ set.AddFunction(damerau_levenshtein);
104
+ }
105
+
106
+ } // namespace duckdb