duckdb 0.7.2-dev3353.0 → 0.7.2-dev3441.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -3
  3. package/src/duckdb/extension/json/include/json_functions.hpp +5 -1
  4. package/src/duckdb/extension/json/include/json_scan.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  6. package/src/duckdb/extension/json/json-extension.cpp +7 -3
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +16 -5
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +220 -93
  9. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -2
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +283 -117
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +8 -6
  12. package/src/duckdb/extension/json/json_functions.cpp +17 -15
  13. package/src/duckdb/extension/json/json_scan.cpp +8 -4
  14. package/src/duckdb/extension/parquet/column_reader.cpp +6 -2
  15. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -2
  16. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +2 -2
  17. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -0
  18. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +3 -5
  19. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -4
  20. package/src/duckdb/extension/parquet/parquet_reader.cpp +11 -22
  21. package/src/duckdb/extension/parquet/parquet_statistics.cpp +5 -0
  22. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -4
  23. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  24. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  25. package/src/duckdb/src/common/file_system.cpp +13 -20
  26. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +2 -2
  27. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
  28. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
  29. package/src/duckdb/src/execution/index/art/art.cpp +3 -1
  30. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  31. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -2
  34. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +4 -5
  35. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +1 -1
  36. package/src/duckdb/src/function/cast/cast_function_set.cpp +89 -25
  37. package/src/duckdb/src/function/pragma/pragma_queries.cpp +20 -15
  38. package/src/duckdb/src/function/table/copy_csv.cpp +4 -5
  39. package/src/duckdb/src/function/table/read_csv.cpp +6 -5
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  41. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +0 -1
  42. package/src/duckdb/src/include/duckdb/common/file_system.hpp +7 -6
  43. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +118 -0
  44. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -2
  45. package/src/duckdb/src/include/duckdb/common/types/type_map.hpp +19 -1
  46. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +3 -2
  47. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/main/client_data.hpp +4 -0
  49. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
  50. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
  51. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
  52. package/src/duckdb/src/main/client_context.cpp +1 -4
  53. package/src/duckdb/src/main/client_data.cpp +19 -0
  54. package/src/duckdb/src/main/database.cpp +4 -1
  55. package/src/duckdb/src/main/extension/extension_install.cpp +5 -6
  56. package/src/duckdb/src/main/extension/extension_load.cpp +11 -16
  57. package/src/duckdb/src/main/settings/settings.cpp +2 -3
  58. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
  59. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
  60. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -35
  61. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
  62. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +7998 -7955
@@ -230,7 +230,10 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
230
230
 
231
231
  if (!config.options.database_type.empty()) {
232
232
  // if we are opening an extension database - load the extension
233
- ExtensionHelper::LoadExternalExtension(*this, nullptr, config.options.database_type);
233
+ if (!config.file_system) {
234
+ throw InternalException("No file system!?");
235
+ }
236
+ ExtensionHelper::LoadExternalExtension(*this, *config.file_system, config.options.database_type);
234
237
  }
235
238
 
236
239
  if (!config.options.unrecognized_options.empty()) {
@@ -38,7 +38,7 @@ const vector<string> ExtensionHelper::PathComponents() {
38
38
  return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
39
39
  }
40
40
 
41
- string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, FileOpener *opener) {
41
+ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs) {
42
42
  #ifdef WASM_LOADABLE_EXTENSIONS
43
43
  static_assertion(0, "ExtensionDirectory functionality is not supported in duckdb-wasm");
44
44
  #endif
@@ -49,7 +49,7 @@ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, Fil
49
49
  // convert random separators to platform-canonic
50
50
  extension_directory = fs.ConvertSeparators(extension_directory);
51
51
  // expand ~ in extension directory
52
- extension_directory = fs.ExpandPath(extension_directory, opener);
52
+ extension_directory = fs.ExpandPath(extension_directory);
53
53
  if (!fs.DirectoryExists(extension_directory)) {
54
54
  auto sep = fs.PathSeparator();
55
55
  auto splits = StringUtil::Split(extension_directory, sep);
@@ -66,7 +66,7 @@ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, Fil
66
66
  }
67
67
  }
68
68
  } else { // otherwise default to home
69
- string home_directory = fs.GetHomeDirectory(opener);
69
+ string home_directory = fs.GetHomeDirectory();
70
70
  // exception if the home directory does not exist, don't create whatever we think is home
71
71
  if (!fs.DirectoryExists(home_directory)) {
72
72
  throw IOException("Can't find the home directory at '%s'\nSpecify a home directory using the SET "
@@ -90,8 +90,7 @@ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, Fil
90
90
  string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
91
91
  auto &config = DBConfig::GetConfig(context);
92
92
  auto &fs = FileSystem::GetFileSystem(context);
93
- auto opener = FileSystem::GetFileOpener(context);
94
- return ExtensionDirectory(config, fs, opener);
93
+ return ExtensionDirectory(config, fs);
95
94
  }
96
95
 
97
96
  bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &message) {
@@ -118,7 +117,7 @@ void ExtensionHelper::InstallExtension(DBConfig &config, FileSystem &fs, const s
118
117
  // Install is currently a no-op
119
118
  return;
120
119
  #endif
121
- string local_path = ExtensionDirectory(config, fs, nullptr);
120
+ string local_path = ExtensionDirectory(config, fs);
122
121
  InstallExtensionInternal(config, nullptr, fs, local_path, extension, force_install);
123
122
  }
124
123
 
@@ -44,24 +44,22 @@ static void ComputeSHA256FileSegment(FileHandle *handle, const idx_t start, cons
44
44
  ComputeSHA256String(file_content, res);
45
45
  }
46
46
 
47
- bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileOpener *opener, const string &extension,
47
+ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const string &extension,
48
48
  ExtensionInitResult &result, string &error) {
49
49
  if (!config.options.enable_external_access) {
50
50
  throw PermissionException("Loading external extensions is disabled through configuration");
51
51
  }
52
- VirtualFileSystem fallback_file_system; // config may not contain one yet
53
- auto &fs = config.file_system ? *config.file_system : fallback_file_system;
54
52
  auto filename = fs.ConvertSeparators(extension);
55
53
 
56
54
  // shorthand case
57
55
  if (!ExtensionHelper::IsFullPath(extension)) {
58
- string local_path = !config.options.extension_directory.empty() ? config.options.extension_directory
59
- : fs.GetHomeDirectory(opener);
56
+ string local_path =
57
+ !config.options.extension_directory.empty() ? config.options.extension_directory : fs.GetHomeDirectory();
60
58
 
61
59
  // convert random separators to platform-canonic
62
60
  local_path = fs.ConvertSeparators(local_path);
63
61
  // expand ~ in extension directory
64
- local_path = fs.ExpandPath(local_path, opener);
62
+ local_path = fs.ExpandPath(local_path);
65
63
  auto path_components = PathComponents();
66
64
  for (auto &path_ele : path_components) {
67
65
  local_path = fs.JoinPath(local_path, path_ele);
@@ -201,20 +199,17 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileOpener *opener, const
201
199
  return true;
202
200
  }
203
201
 
204
- ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileOpener *opener, const string &extension) {
202
+ ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileSystem &fs, const string &extension) {
205
203
  string error;
206
204
  ExtensionInitResult result;
207
- if (!TryInitialLoad(config, opener, extension, result, error)) {
205
+ if (!TryInitialLoad(config, fs, extension, result, error)) {
208
206
  if (!ExtensionHelper::AllowAutoInstall(extension)) {
209
207
  throw IOException(error);
210
208
  }
211
209
  // the extension load failed - try installing the extension
212
- if (!config.file_system) {
213
- throw InternalException("Attempting to install an extension without a file system");
214
- }
215
- ExtensionHelper::InstallExtension(config, *config.file_system, extension, false);
210
+ ExtensionHelper::InstallExtension(config, fs, extension, false);
216
211
  // try loading again
217
- if (!TryInitialLoad(config, nullptr, extension, result, error)) {
212
+ if (!TryInitialLoad(config, fs, extension, result, error)) {
218
213
  throw IOException(error);
219
214
  }
220
215
  }
@@ -242,12 +237,12 @@ string ExtensionHelper::GetExtensionName(const string &original_name) {
242
237
  return ExtensionHelper::ApplyExtensionAlias(splits.front());
243
238
  }
244
239
 
245
- void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileOpener *opener, const string &extension) {
240
+ void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension) {
246
241
  if (db.ExtensionIsLoaded(extension)) {
247
242
  return;
248
243
  }
249
244
 
250
- auto res = InitialLoad(DBConfig::GetConfig(db), opener, extension);
245
+ auto res = InitialLoad(DBConfig::GetConfig(db), fs, extension);
251
246
  auto init_fun_name = res.basename + "_init";
252
247
 
253
248
  ext_init_fun_t init_fun;
@@ -264,7 +259,7 @@ void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileOpener *op
264
259
  }
265
260
 
266
261
  void ExtensionHelper::LoadExternalExtension(ClientContext &context, const string &extension) {
267
- LoadExternalExtension(DatabaseInstance::GetDatabase(context), FileSystem::GetFileOpener(context), extension);
262
+ LoadExternalExtension(DatabaseInstance::GetDatabase(context), FileSystem::GetFileSystem(context), extension);
268
263
  }
269
264
 
270
265
  string ExtensionHelper::ExtractExtensionPrefixFromPath(const string &path) {
@@ -735,9 +735,8 @@ void LogQueryPathSetting::SetLocal(ClientContext &context, const Value &input) {
735
735
  // empty path: clean up query writer
736
736
  client_data.log_query_writer = nullptr;
737
737
  } else {
738
- client_data.log_query_writer =
739
- make_uniq<BufferedFileWriter>(FileSystem::GetFileSystem(context), path,
740
- BufferedFileWriter::DEFAULT_OPEN_FLAGS, client_data.file_opener.get());
738
+ client_data.log_query_writer = make_uniq<BufferedFileWriter>(FileSystem::GetFileSystem(context), path,
739
+ BufferedFileWriter::DEFAULT_OPEN_FLAGS);
741
740
  }
742
741
  }
743
742
 
@@ -574,7 +574,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalO
574
574
  D_ASSERT(node.set.count == 1);
575
575
  auto relation_id = node.set.relations[0];
576
576
 
577
- double lowest_card_found = NumericLimits<double>::Maximum();
577
+ double lowest_card_found = node.GetBaseTableCardinality();
578
578
  for (auto &column : relation_attributes[relation_id].columns) {
579
579
  auto card_after_filters = node.GetBaseTableCardinality();
580
580
  ColumnBinding key = ColumnBinding(relation_id, column);
@@ -147,6 +147,18 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
147
147
  }
148
148
  }
149
149
  }
150
+ if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
151
+ auto &join = op->Cast<LogicalAnyJoin>();
152
+ if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
153
+ auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
154
+ auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
155
+ if (rhs_cardinality > lhs_cardinality * 2) {
156
+ join.join_type = JoinType::RIGHT;
157
+ std::swap(join.children[0], join.children[1]);
158
+ }
159
+ }
160
+ }
161
+
150
162
  if (non_reorderable_operation) {
151
163
  // we encountered a non-reordable operation (setop or non-inner join)
152
164
  // we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
@@ -648,7 +660,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
648
660
  // we have to add a cross product; we add it between the two smallest relations
649
661
  optional_ptr<JoinNode> smallest_plans[2];
650
662
  idx_t smallest_index[2];
651
- for (idx_t i = 0; i < join_relations.size(); i++) {
663
+ D_ASSERT(join_relations.size() >= 2);
664
+
665
+ // first just add the first two join relations. It doesn't matter the cost as the JOO
666
+ // will swap them on estimated cardinality anyway.
667
+ for (idx_t i = 0; i < 2; i++) {
668
+ auto current_plan = plans[&join_relations[i].get()].get();
669
+ smallest_plans[i] = current_plan;
670
+ smallest_index[i] = i;
671
+ }
672
+
673
+ // if there are any other join relations that don't have connections
674
+ // add them if they have lower estimated cardinality.
675
+ for (idx_t i = 2; i < join_relations.size(); i++) {
652
676
  // get the plan for this relation
653
677
  auto current_plan = plans[&join_relations[i].get()].get();
654
678
  // check if the cardinality is smaller than the smallest two found so far
@@ -25,6 +25,7 @@
25
25
  #include "duckdb/planner/tableref/bound_basetableref.hpp"
26
26
  #include "duckdb/planner/tableref/bound_dummytableref.hpp"
27
27
  #include "duckdb/parser/parsed_expression_iterator.hpp"
28
+ #include "duckdb/storage/table_storage_info.hpp"
28
29
 
29
30
  namespace duckdb {
30
31
 
@@ -78,10 +79,10 @@ void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
78
79
  expr, [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(*child, source, dest); });
79
80
  }
80
81
 
81
- void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
82
- TableCatalogEntry &table) {
83
- D_ASSERT(insert->children.size() == 1);
84
- D_ASSERT(insert->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
82
+ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
83
+ TableCatalogEntry &table, TableStorageInfo &storage_info) {
84
+ D_ASSERT(insert.children.size() == 1);
85
+ D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
85
86
 
86
87
  vector<column_t> logical_column_ids;
87
88
  vector<string> column_names;
@@ -97,13 +98,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
97
98
  if (column.Generated()) {
98
99
  throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
99
100
  }
100
- if (std::find(insert->set_columns.begin(), insert->set_columns.end(), column.Physical()) !=
101
- insert->set_columns.end()) {
101
+ if (std::find(insert.set_columns.begin(), insert.set_columns.end(), column.Physical()) !=
102
+ insert.set_columns.end()) {
102
103
  throw BinderException("Multiple assignments to same column \"%s\"", colname);
103
104
  }
104
- insert->set_columns.push_back(column.Physical());
105
+ insert.set_columns.push_back(column.Physical());
105
106
  logical_column_ids.push_back(column.Oid());
106
- insert->set_types.push_back(column.Type());
107
+ insert.set_types.push_back(column.Type());
107
108
  column_names.push_back(colname);
108
109
  if (expr->type == ExpressionType::VALUE_DEFAULT) {
109
110
  expr = ExpandDefaultExpression(column);
@@ -120,14 +121,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
120
121
  throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
121
122
  }
122
123
 
123
- insert->expressions.push_back(std::move(bound_expr));
124
+ insert.expressions.push_back(std::move(bound_expr));
124
125
  }
125
126
 
126
127
  // Figure out which columns are indexed on
127
128
  unordered_set<column_t> indexed_columns;
128
- auto &indexes = table.GetStorage().info->indexes.Indexes();
129
- for (auto &index : indexes) {
130
- for (auto &column_id : index->column_id_set) {
129
+ for (auto &index : storage_info.index_info) {
130
+ for (auto &column_id : index.column_set) {
131
131
  indexed_columns.insert(column_id);
132
132
  }
133
133
  }
@@ -142,16 +142,16 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
142
142
  }
143
143
  }
144
144
 
145
- unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert) {
145
+ unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
146
+ TableStorageInfo &storage_info) {
146
147
  auto set_info = make_uniq<UpdateSetInfo>();
147
148
 
148
149
  auto &columns = set_info->columns;
149
150
  // Figure out which columns are indexed on
150
151
 
151
152
  unordered_set<column_t> indexed_columns;
152
- auto &indexes = table.GetStorage().info->indexes.Indexes();
153
- for (auto &index : indexes) {
154
- for (auto &column_id : index->column_id_set) {
153
+ for (auto &index : storage_info.index_info) {
154
+ for (auto &column_id : index.column_set) {
155
155
  indexed_columns.insert(column_id);
156
156
  }
157
157
  }
@@ -190,9 +190,6 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
190
190
  insert.action_type = OnConflictAction::THROW;
191
191
  return;
192
192
  }
193
- if (!table.IsDuckTable()) {
194
- throw BinderException("ON CONFLICT clause is not yet supported for non-DuckDB tables");
195
- }
196
193
  D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
197
194
 
198
195
  // visit the table reference
@@ -208,6 +205,9 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
208
205
  D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
209
206
  insert.action_type = on_conflict.action_type;
210
207
 
208
+ // obtain the table storage info
209
+ auto storage_info = table.GetStorageInfo(context);
210
+
211
211
  auto &columns = table.GetColumns();
212
212
  if (!on_conflict.indexed_columns.empty()) {
213
213
  // Bind the ON CONFLICT (<columns>)
@@ -232,18 +232,17 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
232
232
  insert.on_conflict_filter.insert(col.Oid());
233
233
  }
234
234
  }
235
- auto &indexes = table.GetStorage().info->indexes;
236
235
  bool index_references_columns = false;
237
- indexes.Scan([&](Index &index) {
238
- if (!index.IsUnique()) {
239
- return false;
236
+ for (auto &index : storage_info.index_info) {
237
+ if (!index.is_unique) {
238
+ continue;
240
239
  }
241
- bool index_matches = insert.on_conflict_filter == index.column_id_set;
240
+ bool index_matches = insert.on_conflict_filter == index.column_set;
242
241
  if (index_matches) {
243
242
  index_references_columns = true;
243
+ break;
244
244
  }
245
- return index_matches;
246
- });
245
+ }
247
246
  if (!index_references_columns) {
248
247
  // Same as before, this is essentially a no-op, turning this into a DO THROW instead
249
248
  // But since this makes no logical sense, it's probably better to throw an error
@@ -254,21 +253,19 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
254
253
  // When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
255
254
 
256
255
  // We check if there are any constraints on the table, if there aren't we throw an error.
257
- auto &indexes = table.GetStorage().info->indexes;
258
256
  idx_t found_matching_indexes = 0;
259
- indexes.Scan([&](Index &index) {
260
- if (!index.IsUnique()) {
261
- return false;
257
+ for (auto &index : storage_info.index_info) {
258
+ if (!index.is_unique) {
259
+ continue;
262
260
  }
263
261
  // does this work with multi-column indexes?
264
- auto &indexed_columns = index.column_id_set;
262
+ auto &indexed_columns = index.column_set;
265
263
  for (auto &column : table.GetColumns().Physical()) {
266
264
  if (indexed_columns.count(column.Physical().index)) {
267
265
  found_matching_indexes++;
268
266
  }
269
267
  }
270
- return false;
271
- });
268
+ }
272
269
  if (!found_matching_indexes) {
273
270
  throw BinderException(
274
271
  "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
@@ -338,7 +335,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
338
335
 
339
336
  if (insert.action_type == OnConflictAction::REPLACE) {
340
337
  D_ASSERT(on_conflict.set_info == nullptr);
341
- on_conflict.set_info = CreateSetInfoForReplace(table, stmt);
338
+ on_conflict.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
342
339
  insert.action_type = OnConflictAction::UPDATE;
343
340
  }
344
341
  if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
@@ -374,7 +371,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
374
371
  insert.do_update_condition = std::move(condition);
375
372
  }
376
373
 
377
- BindDoUpdateSetExpressions(table_alias, &insert, set_info, table);
374
+ BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
378
375
 
379
376
  // Get the column_ids we need to fetch later on from the conflicting tuples
380
377
  // of the original table, to execute the expressions
@@ -526,43 +526,59 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
526
526
  void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
527
527
  auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
528
528
 
529
- // figure out which row_group to fetch from
530
- auto row_group = row_groups->GetSegment(row_ids[0]);
531
- auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
532
- auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
533
-
534
- // create a selection vector from the row_ids
535
- SelectionVector sel(STANDARD_VECTOR_SIZE);
536
- for (idx_t i = 0; i < count; i++) {
537
- auto row_in_vector = row_ids[i] - base_row_id;
538
- D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE);
539
- sel.set_index(i, row_in_vector);
540
- }
541
-
542
- // now fetch the columns from that row_group
543
- TableScanState state;
544
- state.table_state.max_row = row_start + total_rows;
545
-
529
+ // initialize the fetch state
546
530
  // FIXME: we do not need to fetch all columns, only the columns required by the indices!
531
+ TableScanState state;
547
532
  vector<column_t> column_ids;
548
533
  column_ids.reserve(types.size());
549
534
  for (idx_t i = 0; i < types.size(); i++) {
550
535
  column_ids.push_back(i);
551
536
  }
552
537
  state.Initialize(std::move(column_ids));
538
+ state.table_state.max_row = row_start + total_rows;
553
539
 
540
+ // initialize the fetch chunk
554
541
  DataChunk result;
555
542
  result.Initialize(GetAllocator(), types);
556
543
 
557
- state.table_state.Initialize(GetTypes());
558
- row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
559
- row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
560
- result.Slice(sel, count);
544
+ SelectionVector sel(STANDARD_VECTOR_SIZE);
545
+ // now iterate over the row ids
546
+ for (idx_t r = 0; r < count;) {
547
+ result.Reset();
548
+ // figure out which row_group to fetch from
549
+ auto row_id = row_ids[r];
550
+ auto row_group = row_groups->GetSegment(row_id);
551
+ auto row_group_vector_idx = (row_id - row_group->start) / STANDARD_VECTOR_SIZE;
552
+ auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
553
+
554
+ // fetch the current vector
555
+ state.table_state.Initialize(GetTypes());
556
+ row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
557
+ row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
558
+ result.Verify();
559
+
560
+ // check for any remaining row ids if they also fall into this vector
561
+ // we try to fetch handle as many rows as possible at the same time
562
+ idx_t sel_count = 0;
563
+ for (; r < count; r++) {
564
+ idx_t current_row = idx_t(row_ids[r]);
565
+ if (current_row < base_row_id || current_row >= base_row_id + result.size()) {
566
+ // this row-id does not fall into the current chunk - break
567
+ break;
568
+ }
569
+ auto row_in_vector = current_row - base_row_id;
570
+ D_ASSERT(row_in_vector < result.size());
571
+ sel.set_index(sel_count++, row_in_vector);
572
+ }
573
+ D_ASSERT(sel_count > 0);
574
+ // slice the vector with all rows that are present in this vector and erase from the index
575
+ result.Slice(sel, sel_count);
561
576
 
562
- indexes.Scan([&](Index &index) {
563
- index.Delete(result, row_identifiers);
564
- return false;
565
- });
577
+ indexes.Scan([&](Index &index) {
578
+ index.Delete(result, row_identifiers);
579
+ return false;
580
+ });
581
+ }
566
582
  }
567
583
 
568
584
  void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,