duckdb 0.7.2-dev1803.0 → 0.7.2-dev1867.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/catalog/catalog.cpp +27 -27
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +6 -6
- package/src/duckdb/src/catalog/catalog_set.cpp +27 -25
- package/src/duckdb/src/catalog/default/default_functions.cpp +4 -4
- package/src/duckdb/src/catalog/default/default_types.cpp +4 -4
- package/src/duckdb/src/catalog/default/default_views.cpp +4 -4
- package/src/duckdb/src/catalog/dependency_list.cpp +7 -6
- package/src/duckdb/src/catalog/dependency_manager.cpp +44 -38
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +11 -6
- package/src/duckdb/src/common/types/batched_data_collection.cpp +2 -1
- package/src/duckdb/src/common/types/column_data_allocator.cpp +1 -0
- package/src/duckdb/src/common/types/vector.cpp +2 -2
- package/src/duckdb/src/common/vector_operations/vector_copy.cpp +14 -11
- package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +51 -50
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -13
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +20 -20
- package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_update.cpp +1 -1
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +3 -3
- package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -1
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +29 -29
- package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +106 -0
- package/src/duckdb/src/function/scalar/string/regexp.cpp +145 -28
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/table/checkpoint.cpp +4 -4
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +24 -24
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +7 -6
- package/src/duckdb/src/function/table/system/duckdb_databases.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +11 -11
- package/src/duckdb/src/function/table/system/pragma_database_size.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +17 -18
- package/src/duckdb/src/function/table/table_scan.cpp +8 -11
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +9 -9
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/default/default_types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/default/default_views.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +29 -6
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +6 -5
- package/src/duckdb/src/include/duckdb/common/serializer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +3 -2
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -2
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +7 -7
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +11 -11
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/rule/arithmetic_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/case_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/comparison_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/conjunction_simplification.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/rule/constant_folding.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/date_part_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/distributivity.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/enum_comparison.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/equal_or_null_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/in_clause_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/like_optimizations.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/move_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/regex_optimizations.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression_map.hpp +19 -6
- package/src/duckdb/src/include/duckdb/parser/expression_util.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +5 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +10 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +49 -126
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +159 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -5
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/database.cpp +2 -1
- package/src/duckdb/src/main/database_manager.cpp +4 -4
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +4 -4
- package/src/duckdb/src/optimizer/deliminator.cpp +13 -11
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +2 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +67 -65
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +26 -25
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +23 -21
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +6 -7
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +9 -8
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +7 -7
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +5 -5
- package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +6 -6
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +4 -4
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +23 -26
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +2 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +6 -6
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +3 -3
- package/src/duckdb/src/parser/expression_util.cpp +6 -6
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +3 -3
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +3 -3
- package/src/duckdb/src/storage/buffer/block_handle.cpp +3 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -1
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -0
- package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +3 -0
- package/src/duckdb/src/storage/buffer_manager.cpp +35 -726
- package/src/duckdb/src/storage/checkpoint_manager.cpp +2 -2
- package/src/duckdb/src/storage/meta_block_reader.cpp +6 -5
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +801 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/transaction/meta_transaction.cpp +13 -13
- package/src/duckdb/src/transaction/transaction.cpp +1 -1
- package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -19,10 +19,10 @@ PhysicalBatchInsert::PhysicalBatchInsert(vector<LogicalType> types, TableCatalog
|
|
19
19
|
bound_defaults(std::move(bound_defaults)) {
|
20
20
|
}
|
21
21
|
|
22
|
-
PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry
|
22
|
+
PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry &schema,
|
23
23
|
unique_ptr<BoundCreateTableInfo> info_p, idx_t estimated_cardinality)
|
24
24
|
: PhysicalOperator(PhysicalOperatorType::BATCH_CREATE_TABLE_AS, op.types, estimated_cardinality),
|
25
|
-
insert_table(nullptr), schema(schema), info(std::move(info_p)) {
|
25
|
+
insert_table(nullptr), schema(&schema), info(std::move(info_p)) {
|
26
26
|
PhysicalInsert::GetInsertInfo(*info, insert_types, bound_defaults);
|
27
27
|
}
|
28
28
|
|
@@ -100,7 +100,7 @@ public:
|
|
100
100
|
}
|
101
101
|
|
102
102
|
mutex lock;
|
103
|
-
DuckTableEntry
|
103
|
+
optional_ptr<DuckTableEntry> table;
|
104
104
|
idx_t insert_count;
|
105
105
|
map<idx_t, unique_ptr<RowGroupCollection>> collections;
|
106
106
|
|
@@ -156,7 +156,8 @@ public:
|
|
156
156
|
}
|
157
157
|
|
158
158
|
void AddCollection(ClientContext &context, idx_t batch_index, unique_ptr<RowGroupCollection> current_collection,
|
159
|
-
OptimisticDataWriter
|
159
|
+
optional_ptr<OptimisticDataWriter> writer = nullptr,
|
160
|
+
optional_ptr<bool> written_to_disk = nullptr) {
|
160
161
|
vector<unique_ptr<RowGroupCollection>> merge_collections;
|
161
162
|
idx_t merge_count;
|
162
163
|
{
|
@@ -240,7 +241,7 @@ public:
|
|
240
241
|
idx_t current_index;
|
241
242
|
TableAppendState current_append_state;
|
242
243
|
unique_ptr<RowGroupCollection> current_collection;
|
243
|
-
OptimisticDataWriter
|
244
|
+
optional_ptr<OptimisticDataWriter> writer;
|
244
245
|
bool written_to_disk;
|
245
246
|
|
246
247
|
void FlushToDisk() {
|
@@ -253,9 +254,9 @@ public:
|
|
253
254
|
writer->FlushToDisk(*current_collection, true);
|
254
255
|
}
|
255
256
|
|
256
|
-
void CreateNewCollection(DuckTableEntry
|
257
|
-
auto &table_info = table
|
258
|
-
auto &block_manager = TableIOManager::Get(table
|
257
|
+
void CreateNewCollection(DuckTableEntry &table, const vector<LogicalType> &insert_types) {
|
258
|
+
auto &table_info = table.GetStorage().info;
|
259
|
+
auto &block_manager = TableIOManager::Get(table.GetStorage()).GetBlockManagerForRowData();
|
259
260
|
current_collection = make_uniq<RowGroupCollection>(table_info, block_manager, insert_types, MAX_ROW_ID);
|
260
261
|
current_collection->InitializeEmpty();
|
261
262
|
current_collection->InitializeAppend(current_append_state);
|
@@ -269,8 +270,8 @@ unique_ptr<GlobalSinkState> PhysicalBatchInsert::GetGlobalSinkState(ClientContex
|
|
269
270
|
// CREATE TABLE AS
|
270
271
|
D_ASSERT(!insert_table);
|
271
272
|
auto &catalog = *schema->catalog;
|
272
|
-
result->table =
|
273
|
-
|
273
|
+
result->table = (DuckTableEntry *)catalog.CreateTable(catalog.GetCatalogTransaction(context),
|
274
|
+
*schema.get_mutable(), info.get());
|
274
275
|
} else {
|
275
276
|
D_ASSERT(insert_table);
|
276
277
|
D_ASSERT(insert_table->IsDuckTable());
|
@@ -289,12 +290,12 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
|
|
289
290
|
auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
|
290
291
|
|
291
292
|
auto table = gstate.table;
|
292
|
-
PhysicalInsert::ResolveDefaults(table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
|
293
|
+
PhysicalInsert::ResolveDefaults(*table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
|
293
294
|
|
294
295
|
if (!lstate.current_collection) {
|
295
296
|
lock_guard<mutex> l(gstate.lock);
|
296
297
|
// no collection yet: create a new one
|
297
|
-
lstate.CreateNewCollection(table, insert_types);
|
298
|
+
lstate.CreateNewCollection(*table, insert_types);
|
298
299
|
lstate.writer = gstate.table->GetStorage().CreateOptimisticWriter(context.client);
|
299
300
|
} else if (lstate.current_index != lstate.batch_index) {
|
300
301
|
// batch index has changed: move the old collection to the global state and create a new collection
|
@@ -303,7 +304,7 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
|
|
303
304
|
lstate.FlushToDisk();
|
304
305
|
gstate.AddCollection(context.client, lstate.current_index, std::move(lstate.current_collection), lstate.writer,
|
305
306
|
&lstate.written_to_disk);
|
306
|
-
lstate.CreateNewCollection(table, insert_types);
|
307
|
+
lstate.CreateNewCollection(*table, insert_types);
|
307
308
|
}
|
308
309
|
lstate.current_index = lstate.batch_index;
|
309
310
|
|
@@ -53,10 +53,10 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry *t
|
|
53
53
|
}
|
54
54
|
}
|
55
55
|
|
56
|
-
PhysicalInsert::PhysicalInsert(LogicalOperator &op, SchemaCatalogEntry
|
56
|
+
PhysicalInsert::PhysicalInsert(LogicalOperator &op, SchemaCatalogEntry &schema, unique_ptr<BoundCreateTableInfo> info_p,
|
57
57
|
idx_t estimated_cardinality, bool parallel)
|
58
58
|
: PhysicalOperator(PhysicalOperatorType::CREATE_TABLE_AS, op.types, estimated_cardinality), insert_table(nullptr),
|
59
|
-
return_chunk(false), schema(schema), info(std::move(info_p)), parallel(parallel),
|
59
|
+
return_chunk(false), schema(&schema), info(std::move(info_p)), parallel(parallel),
|
60
60
|
action_type(OnConflictAction::THROW) {
|
61
61
|
GetInsertInfo(*info, insert_types, bound_defaults);
|
62
62
|
}
|
@@ -80,7 +80,7 @@ public:
|
|
80
80
|
}
|
81
81
|
|
82
82
|
mutex lock;
|
83
|
-
DuckTableEntry
|
83
|
+
optional_ptr<DuckTableEntry> table;
|
84
84
|
idx_t insert_count;
|
85
85
|
bool initialized;
|
86
86
|
LocalAppendState append_state;
|
@@ -99,7 +99,7 @@ public:
|
|
99
99
|
ExpressionExecutor default_executor;
|
100
100
|
TableAppendState local_append_state;
|
101
101
|
unique_ptr<RowGroupCollection> local_collection;
|
102
|
-
OptimisticDataWriter
|
102
|
+
optional_ptr<OptimisticDataWriter> writer;
|
103
103
|
// Rows that have been updated by a DO UPDATE conflict
|
104
104
|
unordered_set<row_t> updated_rows;
|
105
105
|
};
|
@@ -110,8 +110,8 @@ unique_ptr<GlobalSinkState> PhysicalInsert::GetGlobalSinkState(ClientContext &co
|
|
110
110
|
// CREATE TABLE AS
|
111
111
|
D_ASSERT(!insert_table);
|
112
112
|
auto &catalog = *schema->catalog;
|
113
|
-
result->table =
|
114
|
-
|
113
|
+
result->table = (DuckTableEntry *)catalog.CreateTable(catalog.GetCatalogTransaction(context),
|
114
|
+
*schema.get_mutable(), info.get());
|
115
115
|
} else {
|
116
116
|
D_ASSERT(insert_table);
|
117
117
|
D_ASSERT(insert_table->IsDuckTable());
|
@@ -124,7 +124,7 @@ unique_ptr<LocalSinkState> PhysicalInsert::GetLocalSinkState(ExecutionContext &c
|
|
124
124
|
return make_uniq<InsertLocalState>(context.client, insert_types, bound_defaults);
|
125
125
|
}
|
126
126
|
|
127
|
-
void PhysicalInsert::ResolveDefaults(TableCatalogEntry
|
127
|
+
void PhysicalInsert::ResolveDefaults(const TableCatalogEntry &table, DataChunk &chunk,
|
128
128
|
const physical_index_vector_t<idx_t> &column_index_map,
|
129
129
|
ExpressionExecutor &default_executor, DataChunk &result) {
|
130
130
|
chunk.Flatten();
|
@@ -135,7 +135,7 @@ void PhysicalInsert::ResolveDefaults(TableCatalogEntry *table, DataChunk &chunk,
|
|
135
135
|
|
136
136
|
if (!column_index_map.empty()) {
|
137
137
|
// columns specified by the user, use column_index_map
|
138
|
-
for (auto &col : table
|
138
|
+
for (auto &col : table.GetColumns().Physical()) {
|
139
139
|
auto storage_idx = col.StorageOid();
|
140
140
|
auto mapped_index = column_index_map[col.Physical()];
|
141
141
|
if (mapped_index == DConstants::INVALID_INDEX) {
|
@@ -216,7 +216,7 @@ void PhysicalInsert::CombineExistingAndInsertTuples(DataChunk &result, DataChunk
|
|
216
216
|
result.SetCardinality(input_chunk.size());
|
217
217
|
}
|
218
218
|
|
219
|
-
void PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry
|
219
|
+
void PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry &table,
|
220
220
|
Vector &row_ids) const {
|
221
221
|
if (action_type == OnConflictAction::NOTHING) {
|
222
222
|
return;
|
@@ -255,9 +255,9 @@ void PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChun
|
|
255
255
|
executor.Execute(chunk, update_chunk);
|
256
256
|
update_chunk.SetCardinality(chunk);
|
257
257
|
|
258
|
-
auto &data_table = table
|
258
|
+
auto &data_table = table.GetStorage();
|
259
259
|
// Perform the update, using the results of the SET expressions
|
260
|
-
data_table.Update(
|
260
|
+
data_table.Update(table, context.client, row_ids, set_columns, update_chunk);
|
261
261
|
}
|
262
262
|
|
263
263
|
// TODO: should we use a hash table to keep track of this instead?
|
@@ -274,11 +274,11 @@ void PhysicalInsert::RegisterUpdatedRows(InsertLocalState &lstate, const Vector
|
|
274
274
|
}
|
275
275
|
}
|
276
276
|
|
277
|
-
void PhysicalInsert::OnConflictHandling(TableCatalogEntry
|
277
|
+
void PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionContext &context,
|
278
278
|
InsertLocalState &lstate) const {
|
279
|
-
auto &data_table = table
|
279
|
+
auto &data_table = table.GetStorage();
|
280
280
|
if (action_type == OnConflictAction::THROW) {
|
281
|
-
data_table.VerifyAppendConstraints(
|
281
|
+
data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, nullptr);
|
282
282
|
return;
|
283
283
|
}
|
284
284
|
// Check whether any conflicts arise, and if they all meet the conflict_target + condition
|
@@ -287,7 +287,7 @@ void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionConte
|
|
287
287
|
// We either want to do nothing, or perform an update when conflicts arise
|
288
288
|
ConflictInfo conflict_info(conflict_target);
|
289
289
|
ConflictManager conflict_manager(VerifyExistenceType::APPEND, lstate.insert_chunk.size(), &conflict_info);
|
290
|
-
data_table.VerifyAppendConstraints(
|
290
|
+
data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, &conflict_manager);
|
291
291
|
conflict_manager.Finalize();
|
292
292
|
if (conflict_manager.ConflictCount() == 0) {
|
293
293
|
// No conflicts found
|
@@ -312,7 +312,7 @@ void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionConte
|
|
312
312
|
// then we scan the existing table for the conflicting tuples, using the rowids
|
313
313
|
scan_chunk.Initialize(context.client, types_to_fetch);
|
314
314
|
auto fetch_state = make_uniq<ColumnFetchState>();
|
315
|
-
auto &transaction = DuckTransaction::Get(context.client, *table
|
315
|
+
auto &transaction = DuckTransaction::Get(context.client, *table.catalog);
|
316
316
|
data_table.Fetch(transaction, scan_chunk, columns_to_fetch, row_ids, conflicts.Count(), *fetch_state);
|
317
317
|
}
|
318
318
|
|
@@ -335,7 +335,7 @@ void PhysicalInsert::OnConflictHandling(TableCatalogEntry *table, ExecutionConte
|
|
335
335
|
}
|
336
336
|
combined_chunk.Slice(sel.Selection(), sel.Count());
|
337
337
|
row_ids.Slice(sel.Selection(), sel.Count());
|
338
|
-
data_table.VerifyAppendConstraints(
|
338
|
+
data_table.VerifyAppendConstraints(table, context.client, combined_chunk, nullptr);
|
339
339
|
throw InternalException("The previous operation was expected to throw but didn't");
|
340
340
|
}
|
341
341
|
}
|
@@ -359,7 +359,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
|
|
359
359
|
|
360
360
|
auto table = gstate.table;
|
361
361
|
auto &storage = table->GetStorage();
|
362
|
-
PhysicalInsert::ResolveDefaults(table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
|
362
|
+
PhysicalInsert::ResolveDefaults(*table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
|
363
363
|
|
364
364
|
if (!parallel) {
|
365
365
|
if (!gstate.initialized) {
|
@@ -367,7 +367,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
|
|
367
367
|
gstate.initialized = true;
|
368
368
|
}
|
369
369
|
|
370
|
-
OnConflictHandling(table, context, lstate);
|
370
|
+
OnConflictHandling(*table, context, lstate);
|
371
371
|
storage.LocalAppend(gstate.append_state, *table, context.client, lstate.insert_chunk, true);
|
372
372
|
|
373
373
|
if (return_chunk) {
|
@@ -387,7 +387,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
|
|
387
387
|
lstate.local_collection->InitializeAppend(lstate.local_append_state);
|
388
388
|
lstate.writer = gstate.table->GetStorage().CreateOptimisticWriter(context.client);
|
389
389
|
}
|
390
|
-
OnConflictHandling(table, context, lstate);
|
390
|
+
OnConflictHandling(*table, context, lstate);
|
391
391
|
auto new_row_group = lstate.local_collection->Append(lstate.insert_chunk, lstate.local_append_state);
|
392
392
|
if (new_row_group) {
|
393
393
|
lstate.writer->CheckFlushToDisk(*lstate.local_collection);
|
@@ -7,7 +7,7 @@
|
|
7
7
|
|
8
8
|
namespace duckdb {
|
9
9
|
|
10
|
-
PhysicalCreateTable::PhysicalCreateTable(LogicalOperator &op, SchemaCatalogEntry
|
10
|
+
PhysicalCreateTable::PhysicalCreateTable(LogicalOperator &op, SchemaCatalogEntry &schema,
|
11
11
|
unique_ptr<BoundCreateTableInfo> info, idx_t estimated_cardinality)
|
12
12
|
: PhysicalOperator(PhysicalOperatorType::CREATE_TABLE, op.types, estimated_cardinality), schema(schema),
|
13
13
|
info(std::move(info)) {
|
@@ -34,7 +34,7 @@ void PhysicalCreateTable::GetData(ExecutionContext &context, DataChunk &chunk, G
|
|
34
34
|
if (state.finished) {
|
35
35
|
return;
|
36
36
|
}
|
37
|
-
auto &catalog = *schema
|
37
|
+
auto &catalog = *schema.catalog;
|
38
38
|
catalog.CreateTable(catalog.GetCatalogTransaction(context.client), schema, info.get());
|
39
39
|
state.finished = true;
|
40
40
|
}
|
@@ -43,7 +43,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
43
43
|
make_uniq<PhysicalTableScan>(op.info->scan_types, op.function, std::move(op.bind_data), op.info->column_ids,
|
44
44
|
op.info->names, std::move(table_filters), op.estimated_cardinality);
|
45
45
|
|
46
|
-
dependencies.AddDependency(
|
46
|
+
dependencies.AddDependency(op.table);
|
47
47
|
op.info->column_ids.pop_back();
|
48
48
|
|
49
49
|
D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size());
|
@@ -21,10 +21,10 @@ unique_ptr<PhysicalOperator> DuckCatalog::PlanCreateTableAs(ClientContext &conte
|
|
21
21
|
auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
22
22
|
unique_ptr<PhysicalOperator> create;
|
23
23
|
if (!parallel_streaming_insert && use_batch_index) {
|
24
|
-
create = make_uniq<PhysicalBatchInsert>(op, op.schema, std::move(op.info), op.estimated_cardinality);
|
24
|
+
create = make_uniq<PhysicalBatchInsert>(op, *op.schema, std::move(op.info), op.estimated_cardinality);
|
25
25
|
|
26
26
|
} else {
|
27
|
-
create = make_uniq<PhysicalInsert>(op, op.schema, std::move(op.info), op.estimated_cardinality,
|
27
|
+
create = make_uniq<PhysicalInsert>(op, *op.schema, std::move(op.info), op.estimated_cardinality,
|
28
28
|
parallel_streaming_insert && num_threads > 1);
|
29
29
|
}
|
30
30
|
|
@@ -42,7 +42,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl
|
|
42
42
|
auto plan = CreatePlan(*op.children[0]);
|
43
43
|
return op.schema->catalog->PlanCreateTableAs(context, op, std::move(plan));
|
44
44
|
} else {
|
45
|
-
return make_uniq<PhysicalCreateTable>(op, op.schema, std::move(op.info), op.estimated_cardinality);
|
45
|
+
return make_uniq<PhysicalCreateTable>(op, *op.schema, std::move(op.info), op.estimated_cardinality);
|
46
46
|
}
|
47
47
|
}
|
48
48
|
|
@@ -25,7 +25,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDelete &op
|
|
25
25
|
|
26
26
|
auto plan = CreatePlan(*op.children[0]);
|
27
27
|
|
28
|
-
dependencies.AddDependency(op.table);
|
28
|
+
dependencies.AddDependency(*op.table);
|
29
29
|
return op.table->catalog->PlanDelete(context, op, std::move(plan));
|
30
30
|
}
|
31
31
|
|
@@ -106,7 +106,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
|
|
106
106
|
D_ASSERT(op.children.size() == 1);
|
107
107
|
plan = CreatePlan(*op.children[0]);
|
108
108
|
}
|
109
|
-
dependencies.AddDependency(op.table);
|
109
|
+
dependencies.AddDependency(*op.table);
|
110
110
|
return op.table->catalog->PlanInsert(context, op, std::move(plan));
|
111
111
|
}
|
112
112
|
|
@@ -22,7 +22,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalUpdate &op
|
|
22
22
|
|
23
23
|
auto plan = CreatePlan(*op.children[0]);
|
24
24
|
|
25
|
-
dependencies.AddDependency(op.table);
|
25
|
+
dependencies.AddDependency(*op.table);
|
26
26
|
return op.table->catalog->PlanUpdate(context, op, std::move(plan));
|
27
27
|
}
|
28
28
|
|
@@ -429,15 +429,15 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateE
|
|
429
429
|
// similarly, we only need to ORDER BY each aggregate once
|
430
430
|
expression_set_t seen_expressions;
|
431
431
|
for (auto &target : groups) {
|
432
|
-
seen_expressions.insert(target
|
432
|
+
seen_expressions.insert(*target);
|
433
433
|
}
|
434
434
|
vector<BoundOrderByNode> new_order_nodes;
|
435
435
|
for (auto &order_node : expr.order_bys->orders) {
|
436
|
-
if (seen_expressions.find(order_node.expression
|
436
|
+
if (seen_expressions.find(*order_node.expression) != seen_expressions.end()) {
|
437
437
|
// we do not need to order by this node
|
438
438
|
continue;
|
439
439
|
}
|
440
|
-
seen_expressions.insert(order_node.expression
|
440
|
+
seen_expressions.insert(*order_node.expression);
|
441
441
|
new_order_nodes.push_back(std::move(order_node));
|
442
442
|
}
|
443
443
|
if (new_order_nodes.empty()) {
|
@@ -6,7 +6,8 @@
|
|
6
6
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
9
|
-
BindCastInput::BindCastInput(CastFunctionSet &function_set, BindCastInfo
|
9
|
+
BindCastInput::BindCastInput(CastFunctionSet &function_set, optional_ptr<BindCastInfo> info,
|
10
|
+
optional_ptr<ClientContext> context)
|
10
11
|
: function_set(function_set), info(info), context(context) {
|
11
12
|
}
|
12
13
|
|
@@ -15,11 +15,11 @@
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
17
17
|
struct NextvalBindData : public FunctionData {
|
18
|
-
explicit NextvalBindData(SequenceCatalogEntry
|
18
|
+
explicit NextvalBindData(optional_ptr<SequenceCatalogEntry> sequence) : sequence(sequence) {
|
19
19
|
}
|
20
20
|
|
21
21
|
//! The sequence to use for the nextval computation; only if the sequence is a constant
|
22
|
-
SequenceCatalogEntry
|
22
|
+
optional_ptr<SequenceCatalogEntry> sequence;
|
23
23
|
|
24
24
|
unique_ptr<FunctionData> Copy() const override {
|
25
25
|
return make_uniq<NextvalBindData>(sequence);
|
@@ -32,45 +32,45 @@ struct NextvalBindData : public FunctionData {
|
|
32
32
|
};
|
33
33
|
|
34
34
|
struct CurrentSequenceValueOperator {
|
35
|
-
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry
|
36
|
-
lock_guard<mutex> seqlock(seq
|
35
|
+
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
|
36
|
+
lock_guard<mutex> seqlock(seq.lock);
|
37
37
|
int64_t result;
|
38
|
-
if (seq
|
38
|
+
if (seq.usage_count == 0u) {
|
39
39
|
throw SequenceException("currval: sequence is not yet defined in this session");
|
40
40
|
}
|
41
|
-
result = seq
|
41
|
+
result = seq.last_value;
|
42
42
|
return result;
|
43
43
|
}
|
44
44
|
};
|
45
45
|
|
46
46
|
struct NextSequenceValueOperator {
|
47
|
-
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry
|
48
|
-
lock_guard<mutex> seqlock(seq
|
47
|
+
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
|
48
|
+
lock_guard<mutex> seqlock(seq.lock);
|
49
49
|
int64_t result;
|
50
|
-
result = seq
|
51
|
-
bool overflow = !TryAddOperator::Operation(seq
|
52
|
-
if (seq
|
50
|
+
result = seq.counter;
|
51
|
+
bool overflow = !TryAddOperator::Operation(seq.counter, seq.increment, seq.counter);
|
52
|
+
if (seq.cycle) {
|
53
53
|
if (overflow) {
|
54
|
-
seq
|
55
|
-
} else if (seq
|
56
|
-
seq
|
57
|
-
} else if (seq
|
58
|
-
seq
|
54
|
+
seq.counter = seq.increment < 0 ? seq.max_value : seq.min_value;
|
55
|
+
} else if (seq.counter < seq.min_value) {
|
56
|
+
seq.counter = seq.max_value;
|
57
|
+
} else if (seq.counter > seq.max_value) {
|
58
|
+
seq.counter = seq.min_value;
|
59
59
|
}
|
60
60
|
} else {
|
61
|
-
if (result < seq
|
62
|
-
throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq
|
63
|
-
seq
|
61
|
+
if (result < seq.min_value || (overflow && seq.increment < 0)) {
|
62
|
+
throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq.name,
|
63
|
+
seq.min_value);
|
64
64
|
}
|
65
|
-
if (result > seq
|
66
|
-
throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq
|
67
|
-
seq
|
65
|
+
if (result > seq.max_value || overflow) {
|
66
|
+
throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq.name,
|
67
|
+
seq.max_value);
|
68
68
|
}
|
69
69
|
}
|
70
|
-
seq
|
71
|
-
seq
|
72
|
-
if (!seq
|
73
|
-
transaction.sequence_usage[seq] = SequenceValue(seq
|
70
|
+
seq.last_value = result;
|
71
|
+
seq.usage_count++;
|
72
|
+
if (!seq.temporary) {
|
73
|
+
transaction.sequence_usage[&seq] = SequenceValue(seq.usage_count, seq.counter);
|
74
74
|
}
|
75
75
|
return result;
|
76
76
|
}
|
@@ -98,7 +98,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
|
|
98
98
|
auto result_data = FlatVector::GetData<int64_t>(result);
|
99
99
|
for (idx_t i = 0; i < args.size(); i++) {
|
100
100
|
// get the next value from the sequence
|
101
|
-
result_data[i] = OP::Operation(transaction, info.sequence);
|
101
|
+
result_data[i] = OP::Operation(transaction, *info.sequence);
|
102
102
|
}
|
103
103
|
} else {
|
104
104
|
// sequence to use comes from the input
|
@@ -107,7 +107,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
|
|
107
107
|
auto sequence = BindSequence(context, value.GetString());
|
108
108
|
// finally get the next value from the sequence
|
109
109
|
auto &transaction = DuckTransaction::Get(context, *sequence->catalog);
|
110
|
-
return OP::Operation(transaction, sequence);
|
110
|
+
return OP::Operation(transaction, *sequence);
|
111
111
|
});
|
112
112
|
}
|
113
113
|
}
|
@@ -129,7 +129,7 @@ static unique_ptr<FunctionData> NextValBind(ClientContext &context, ScalarFuncti
|
|
129
129
|
static void NextValDependency(BoundFunctionExpression &expr, DependencyList &dependencies) {
|
130
130
|
auto &info = expr.bind_info->Cast<NextvalBindData>();
|
131
131
|
if (info.sequence) {
|
132
|
-
dependencies.AddDependency(info.sequence);
|
132
|
+
dependencies.AddDependency(*info.sequence);
|
133
133
|
}
|
134
134
|
}
|
135
135
|
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#include "duckdb/function/scalar/string_functions.hpp"
|
2
|
+
#include "duckdb/common/map.hpp"
|
3
|
+
#include "duckdb/common/vector.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
// Using Lowrance-Wagner (LW) algorithm: https://doi.org/10.1145%2F321879.321880
|
8
|
+
// Can't calculate as trivial modification to levenshtein algorithm
|
9
|
+
// as we need to potentially know about earlier in the string
|
10
|
+
static idx_t DamerauLevenshteinDistance(const string_t &source, const string_t &target) {
|
11
|
+
// costs associated with each type of edit, to aid readability
|
12
|
+
constexpr uint8_t COST_SUBSTITUTION = 1;
|
13
|
+
constexpr uint8_t COST_INSERTION = 1;
|
14
|
+
constexpr uint8_t COST_DELETION = 1;
|
15
|
+
constexpr uint8_t COST_TRANSPOSITION = 1;
|
16
|
+
const auto source_len = source.GetSize();
|
17
|
+
const auto target_len = target.GetSize();
|
18
|
+
|
19
|
+
// If one string is empty, the distance equals the length of the other string
|
20
|
+
// either through target_len insertions
|
21
|
+
// or source_len deletions
|
22
|
+
if (source_len == 0) {
|
23
|
+
return target_len * COST_INSERTION;
|
24
|
+
} else if (target_len == 0) {
|
25
|
+
return source_len * COST_DELETION;
|
26
|
+
}
|
27
|
+
|
28
|
+
const auto source_str = source.GetDataUnsafe();
|
29
|
+
const auto target_str = target.GetDataUnsafe();
|
30
|
+
|
31
|
+
// larger than the largest possible value:
|
32
|
+
const auto inf = source_len * COST_DELETION + target_len * COST_INSERTION + 1;
|
33
|
+
// minimum edit distance from prefix of source string to prefix of target string
|
34
|
+
// same object as H in LW paper (with indices offset by 1)
|
35
|
+
vector<vector<idx_t>> distance(source_len + 2, vector<idx_t>(target_len + 2, inf));
|
36
|
+
// keeps track of the largest string indices of source string matching each character
|
37
|
+
// same as DA in LW paper
|
38
|
+
map<char, idx_t> largest_source_chr_matching;
|
39
|
+
|
40
|
+
// initialise row/column corresponding to zero-length strings
|
41
|
+
// partial string -> empty requires a deletion for each character
|
42
|
+
for (idx_t source_idx = 0; source_idx <= source_len; source_idx++) {
|
43
|
+
distance[source_idx + 1][1] = source_idx * COST_DELETION;
|
44
|
+
}
|
45
|
+
// and empty -> partial string means simply inserting characters
|
46
|
+
for (idx_t target_idx = 1; target_idx <= target_len; target_idx++) {
|
47
|
+
distance[1][target_idx + 1] = target_idx * COST_INSERTION;
|
48
|
+
}
|
49
|
+
// loop through string indices - these are offset by 2 from distance indices
|
50
|
+
for (idx_t source_idx = 0; source_idx < source_len; source_idx++) {
|
51
|
+
// keeps track of the largest string indices of target string matching current source character
|
52
|
+
// same as DB in LW paper
|
53
|
+
idx_t largest_target_chr_matching;
|
54
|
+
largest_target_chr_matching = 0;
|
55
|
+
for (idx_t target_idx = 0; target_idx < target_len; target_idx++) {
|
56
|
+
// correspond to i1 and j1 in LW paper respectively
|
57
|
+
idx_t largest_source_chr_matching_target;
|
58
|
+
idx_t largest_target_chr_matching_source;
|
59
|
+
// cost associated to diagnanl shift in distance matrix
|
60
|
+
// corresponds to d in LW paper
|
61
|
+
uint8_t cost_diagonal_shift;
|
62
|
+
largest_source_chr_matching_target = largest_source_chr_matching[target_str[target_idx]];
|
63
|
+
largest_target_chr_matching_source = largest_target_chr_matching;
|
64
|
+
// if characters match, diagonal move costs nothing and we update our largest target index
|
65
|
+
// otherwise move is substitution and costs as such
|
66
|
+
if (source_str[source_idx] == target_str[target_idx]) {
|
67
|
+
cost_diagonal_shift = 0;
|
68
|
+
largest_target_chr_matching = target_idx + 1;
|
69
|
+
} else {
|
70
|
+
cost_diagonal_shift = COST_SUBSTITUTION;
|
71
|
+
}
|
72
|
+
distance[source_idx + 2][target_idx + 2] = MinValue(
|
73
|
+
distance[source_idx + 1][target_idx + 1] + cost_diagonal_shift,
|
74
|
+
MinValue(distance[source_idx + 2][target_idx + 1] + COST_INSERTION,
|
75
|
+
MinValue(distance[source_idx + 1][target_idx + 2] + COST_DELETION,
|
76
|
+
distance[largest_source_chr_matching_target][largest_target_chr_matching_source] +
|
77
|
+
(source_idx - largest_source_chr_matching_target) * COST_DELETION +
|
78
|
+
COST_TRANSPOSITION +
|
79
|
+
(target_idx - largest_target_chr_matching_source) * COST_INSERTION)));
|
80
|
+
}
|
81
|
+
largest_source_chr_matching[source_str[source_idx]] = source_idx + 1;
|
82
|
+
}
|
83
|
+
return distance[source_len + 1][target_len + 1];
|
84
|
+
}
|
85
|
+
|
86
|
+
static int64_t DamerauLevenshteinScalarFunction(Vector &result, const string_t source, const string_t target) {
|
87
|
+
return (int64_t)DamerauLevenshteinDistance(source, target);
|
88
|
+
}
|
89
|
+
|
90
|
+
static void DamerauLevenshteinFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
91
|
+
auto &source_vec = args.data[0];
|
92
|
+
auto &target_vec = args.data[1];
|
93
|
+
|
94
|
+
BinaryExecutor::Execute<string_t, string_t, int64_t>(
|
95
|
+
source_vec, target_vec, result, args.size(),
|
96
|
+
[&](string_t source, string_t target) { return DamerauLevenshteinScalarFunction(result, source, target); });
|
97
|
+
}
|
98
|
+
|
99
|
+
void DamerauLevenshteinFun::RegisterFunction(BuiltinFunctions &set) {
|
100
|
+
ScalarFunctionSet damerau_levenshtein("damerau_levenshtein");
|
101
|
+
damerau_levenshtein.AddFunction(ScalarFunction("damerau_levenshtein", {LogicalType::VARCHAR, LogicalType::VARCHAR},
|
102
|
+
LogicalType::BIGINT, DamerauLevenshteinFunction));
|
103
|
+
set.AddFunction(damerau_levenshtein);
|
104
|
+
}
|
105
|
+
|
106
|
+
} // namespace duckdb
|