duckdb 0.7.2-dev3402.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  3. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  4. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  6. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  7. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  8. package/src/duckdb/src/common/file_system.cpp +2 -2
  9. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  10. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  11. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  12. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  13. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
  14. package/src/duckdb/src/common/serializer.cpp +1 -1
  15. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  16. package/src/duckdb/src/common/string_util.cpp +2 -2
  17. package/src/duckdb/src/common/types/bit.cpp +2 -2
  18. package/src/duckdb/src/common/types/blob.cpp +2 -2
  19. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  20. package/src/duckdb/src/common/types/date.cpp +1 -1
  21. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  22. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
  23. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  24. package/src/duckdb/src/common/types/time.cpp +1 -1
  25. package/src/duckdb/src/common/types/vector.cpp +7 -7
  26. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  27. package/src/duckdb/src/common/windows_util.cpp +2 -2
  28. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
  29. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
  30. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
  31. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  32. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  33. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
  34. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  35. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  36. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  37. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  38. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -0
  39. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  40. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
  41. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
  42. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  43. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  44. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  45. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
  46. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  47. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  48. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
  49. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  50. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  51. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  52. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  53. package/src/duckdb/src/function/table/read_csv.cpp +43 -35
  54. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  55. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  56. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
  57. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
  58. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
  59. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
  60. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
  61. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
  64. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
  65. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
  66. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
  67. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  69. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
  70. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
  71. package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
  72. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
  74. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
  78. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  79. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
  80. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
  84. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  85. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
  86. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
  87. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
  88. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
  89. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  93. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
  94. package/src/duckdb/src/main/client_context.cpp +1 -0
  95. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
  96. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
  97. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  98. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
  99. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
  100. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
  101. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +64 -42
  102. package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
  103. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
  104. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  105. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  106. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  107. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  108. package/src/duckdb/src/storage/table/row_group.cpp +2 -2
  109. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
  110. package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
  111. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
  112. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
  113. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
  114. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  115. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +13050 -12885
  116. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  117. package/src/statement.cpp +15 -13
@@ -195,18 +195,24 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, un
195
195
  updater.replace_bindings.push_back(replace_binding);
196
196
  }
197
197
 
198
- // temporarily remove the BOUND_UNNEST and the child of the LOGICAL_UNNEST from the plan
198
+ // temporarily remove the BOUND_UNNESTs and the child of the LOGICAL_UNNEST from the plan
199
199
  D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
200
200
  auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
201
- auto temp_bound_unnest = std::move(unnest.expressions[0]);
201
+ vector<unique_ptr<Expression>> temp_bound_unnests;
202
+ for (auto &temp_bound_unnest : unnest.expressions) {
203
+ temp_bound_unnests.push_back(std::move(temp_bound_unnest));
204
+ }
205
+ D_ASSERT(unnest.children.size() == 1);
202
206
  auto temp_unnest_child = std::move(unnest.children[0]);
203
207
  unnest.expressions.clear();
204
208
  unnest.children.clear();
205
209
  // update the bindings of the plan
206
210
  updater.VisitOperator(*plan_ptr->get());
207
211
  updater.replace_bindings.clear();
208
- // add the child again
209
- unnest.expressions.push_back(std::move(temp_bound_unnest));
212
+ // add the children again
213
+ for (auto &temp_bound_unnest : temp_bound_unnests) {
214
+ unnest.expressions.push_back(std::move(temp_bound_unnest));
215
+ }
210
216
  unnest.children.push_back(std::move(temp_unnest_child));
211
217
 
212
218
  // add the LHS expressions to each LOGICAL_PROJECTION
@@ -256,6 +262,7 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
256
262
  D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
257
263
  auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
258
264
 
265
+ D_ASSERT(unnest.children.size() == 1);
259
266
  auto unnest_child_cols = unnest.children[0]->GetColumnBindings();
260
267
  for (idx_t delim_col_idx = 0; delim_col_idx < delim_columns.size(); delim_col_idx++) {
261
268
  for (idx_t child_col_idx = 0; child_col_idx < unnest_child_cols.size(); child_col_idx++) {
@@ -268,8 +275,9 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
268
275
  }
269
276
 
270
277
  // update bindings
271
- D_ASSERT(unnest.expressions.size() == 1);
272
- updater.VisitExpression(&unnest.expressions[0]);
278
+ for (auto &unnest_expr : unnest.expressions) {
279
+ updater.VisitExpression(&unnest_expr);
280
+ }
273
281
  updater.replace_bindings.clear();
274
282
  }
275
283
 
@@ -30,7 +30,7 @@ InsertStatement::InsertStatement(const InsertStatement &other)
30
30
  : SQLStatement(other), select_statement(unique_ptr_cast<SQLStatement, SelectStatement>(
31
31
  other.select_statement ? other.select_statement->Copy() : nullptr)),
32
32
  columns(other.columns), table(other.table), schema(other.schema), catalog(other.catalog),
33
- default_values(other.default_values) {
33
+ default_values(other.default_values), column_order(other.column_order) {
34
34
  cte_map = other.cte_map.Copy();
35
35
  for (auto &expr : other.returning_list) {
36
36
  returning_list.emplace_back(expr->Copy());
@@ -81,6 +81,9 @@ string InsertStatement::ToString() const {
81
81
  if (table_ref && !table_ref->alias.empty()) {
82
82
  result += StringUtil::Format(" AS %s", KeywordHelper::WriteOptionallyQuoted(table_ref->alias));
83
83
  }
84
+ if (column_order == InsertColumnOrder::INSERT_BY_NAME) {
85
+ result += " BY NAME";
86
+ }
84
87
  if (!columns.empty()) {
85
88
  result += " (";
86
89
  for (idx_t i = 0; i < columns.size(); i++) {
@@ -67,6 +67,16 @@ unique_ptr<InsertStatement> Transformer::TransformInsert(duckdb_libpgquery::PGNo
67
67
  result->on_conflict_info = DummyOnConflictClause(stmt->onConflictAlias, result->schema);
68
68
  result->table_ref = TransformRangeVar(stmt->relation);
69
69
  }
70
+ switch (stmt->insert_column_order) {
71
+ case duckdb_libpgquery::PG_INSERT_BY_POSITION:
72
+ result->column_order = InsertColumnOrder::INSERT_BY_POSITION;
73
+ break;
74
+ case duckdb_libpgquery::PG_INSERT_BY_NAME:
75
+ result->column_order = InsertColumnOrder::INSERT_BY_NAME;
76
+ break;
77
+ default:
78
+ throw InternalException("Unrecognized insert column order in TransformInsert");
79
+ }
70
80
  result->catalog = qname.catalog;
71
81
  return result;
72
82
  }
@@ -25,6 +25,7 @@
25
25
  #include "duckdb/planner/tableref/bound_basetableref.hpp"
26
26
  #include "duckdb/planner/tableref/bound_dummytableref.hpp"
27
27
  #include "duckdb/parser/parsed_expression_iterator.hpp"
28
+ #include "duckdb/storage/table_storage_info.hpp"
28
29
 
29
30
  namespace duckdb {
30
31
 
@@ -78,10 +79,10 @@ void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
78
79
  expr, [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(*child, source, dest); });
79
80
  }
80
81
 
81
- void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
82
- TableCatalogEntry &table) {
83
- D_ASSERT(insert->children.size() == 1);
84
- D_ASSERT(insert->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
82
+ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
83
+ TableCatalogEntry &table, TableStorageInfo &storage_info) {
84
+ D_ASSERT(insert.children.size() == 1);
85
+ D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
85
86
 
86
87
  vector<column_t> logical_column_ids;
87
88
  vector<string> column_names;
@@ -97,13 +98,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
97
98
  if (column.Generated()) {
98
99
  throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
99
100
  }
100
- if (std::find(insert->set_columns.begin(), insert->set_columns.end(), column.Physical()) !=
101
- insert->set_columns.end()) {
101
+ if (std::find(insert.set_columns.begin(), insert.set_columns.end(), column.Physical()) !=
102
+ insert.set_columns.end()) {
102
103
  throw BinderException("Multiple assignments to same column \"%s\"", colname);
103
104
  }
104
- insert->set_columns.push_back(column.Physical());
105
+ insert.set_columns.push_back(column.Physical());
105
106
  logical_column_ids.push_back(column.Oid());
106
- insert->set_types.push_back(column.Type());
107
+ insert.set_types.push_back(column.Type());
107
108
  column_names.push_back(colname);
108
109
  if (expr->type == ExpressionType::VALUE_DEFAULT) {
109
110
  expr = ExpandDefaultExpression(column);
@@ -120,14 +121,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
120
121
  throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
121
122
  }
122
123
 
123
- insert->expressions.push_back(std::move(bound_expr));
124
+ insert.expressions.push_back(std::move(bound_expr));
124
125
  }
125
126
 
126
127
  // Figure out which columns are indexed on
127
128
  unordered_set<column_t> indexed_columns;
128
- auto &indexes = table.GetStorage().info->indexes.Indexes();
129
- for (auto &index : indexes) {
130
- for (auto &column_id : index->column_id_set) {
129
+ for (auto &index : storage_info.index_info) {
130
+ for (auto &column_id : index.column_set) {
131
131
  indexed_columns.insert(column_id);
132
132
  }
133
133
  }
@@ -142,16 +142,16 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
142
142
  }
143
143
  }
144
144
 
145
- unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert) {
145
+ unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
146
+ TableStorageInfo &storage_info) {
146
147
  auto set_info = make_uniq<UpdateSetInfo>();
147
148
 
148
149
  auto &columns = set_info->columns;
149
150
  // Figure out which columns are indexed on
150
151
 
151
152
  unordered_set<column_t> indexed_columns;
152
- auto &indexes = table.GetStorage().info->indexes.Indexes();
153
- for (auto &index : indexes) {
154
- for (auto &column_id : index->column_id_set) {
153
+ for (auto &index : storage_info.index_info) {
154
+ for (auto &column_id : index.column_set) {
155
155
  indexed_columns.insert(column_id);
156
156
  }
157
157
  }
@@ -190,9 +190,6 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
190
190
  insert.action_type = OnConflictAction::THROW;
191
191
  return;
192
192
  }
193
- if (!table.IsDuckTable()) {
194
- throw BinderException("ON CONFLICT clause is not yet supported for non-DuckDB tables");
195
- }
196
193
  D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
197
194
 
198
195
  // visit the table reference
@@ -208,6 +205,9 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
208
205
  D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
209
206
  insert.action_type = on_conflict.action_type;
210
207
 
208
+ // obtain the table storage info
209
+ auto storage_info = table.GetStorageInfo(context);
210
+
211
211
  auto &columns = table.GetColumns();
212
212
  if (!on_conflict.indexed_columns.empty()) {
213
213
  // Bind the ON CONFLICT (<columns>)
@@ -232,18 +232,17 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
232
232
  insert.on_conflict_filter.insert(col.Oid());
233
233
  }
234
234
  }
235
- auto &indexes = table.GetStorage().info->indexes;
236
235
  bool index_references_columns = false;
237
- indexes.Scan([&](Index &index) {
238
- if (!index.IsUnique()) {
239
- return false;
236
+ for (auto &index : storage_info.index_info) {
237
+ if (!index.is_unique) {
238
+ continue;
240
239
  }
241
- bool index_matches = insert.on_conflict_filter == index.column_id_set;
240
+ bool index_matches = insert.on_conflict_filter == index.column_set;
242
241
  if (index_matches) {
243
242
  index_references_columns = true;
243
+ break;
244
244
  }
245
- return index_matches;
246
- });
245
+ }
247
246
  if (!index_references_columns) {
248
247
  // Same as before, this is essentially a no-op, turning this into a DO THROW instead
249
248
  // But since this makes no logical sense, it's probably better to throw an error
@@ -254,21 +253,19 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
254
253
  // When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
255
254
 
256
255
  // We check if there are any constraints on the table, if there aren't we throw an error.
257
- auto &indexes = table.GetStorage().info->indexes;
258
256
  idx_t found_matching_indexes = 0;
259
- indexes.Scan([&](Index &index) {
260
- if (!index.IsUnique()) {
261
- return false;
257
+ for (auto &index : storage_info.index_info) {
258
+ if (!index.is_unique) {
259
+ continue;
262
260
  }
263
261
  // does this work with multi-column indexes?
264
- auto &indexed_columns = index.column_id_set;
262
+ auto &indexed_columns = index.column_set;
265
263
  for (auto &column : table.GetColumns().Physical()) {
266
264
  if (indexed_columns.count(column.Physical().index)) {
267
265
  found_matching_indexes++;
268
266
  }
269
267
  }
270
- return false;
271
- });
268
+ }
272
269
  if (!found_matching_indexes) {
273
270
  throw BinderException(
274
271
  "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
@@ -338,7 +335,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
338
335
 
339
336
  if (insert.action_type == OnConflictAction::REPLACE) {
340
337
  D_ASSERT(on_conflict.set_info == nullptr);
341
- on_conflict.set_info = CreateSetInfoForReplace(table, stmt);
338
+ on_conflict.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
342
339
  insert.action_type = OnConflictAction::UPDATE;
343
340
  }
344
341
  if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
@@ -374,7 +371,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
374
371
  insert.do_update_condition = std::move(condition);
375
372
  }
376
373
 
377
- BindDoUpdateSetExpressions(table_alias, &insert, set_info, table);
374
+ BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
378
375
 
379
376
  // Get the column_ids we need to fetch later on from the conflicting tuples
380
377
  // of the original table, to execute the expressions
@@ -409,6 +406,26 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
409
406
  // Add CTEs as bindable
410
407
  AddCTEMap(stmt.cte_map);
411
408
 
409
+ auto values_list = stmt.GetValuesList();
410
+
411
+ // bind the root select node (if any)
412
+ BoundStatement root_select;
413
+ if (stmt.column_order == InsertColumnOrder::INSERT_BY_NAME) {
414
+ if (values_list) {
415
+ throw BinderException("INSERT BY NAME can only be used when inserting from a SELECT statement");
416
+ }
417
+ if (!stmt.columns.empty()) {
418
+ throw BinderException("INSERT BY NAME cannot be combined with an explicit column list");
419
+ }
420
+ D_ASSERT(stmt.select_statement);
421
+ // INSERT BY NAME - generate the columns from the names of the SELECT statement
422
+ auto select_binder = Binder::CreateBinder(context, this);
423
+ root_select = select_binder->Bind(*stmt.select_statement);
424
+ MoveCorrelatedExpressions(*select_binder);
425
+
426
+ stmt.columns = root_select.names;
427
+ }
428
+
412
429
  vector<LogicalIndex> named_column_map;
413
430
  if (!stmt.columns.empty() || stmt.default_values) {
414
431
  // insertion statement specifies column list
@@ -416,6 +433,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
416
433
  // create a mapping of (list index) -> (column index)
417
434
  case_insensitive_map_t<idx_t> column_name_map;
418
435
  for (idx_t i = 0; i < stmt.columns.size(); i++) {
436
+ auto entry = column_name_map.insert(make_pair(stmt.columns[i], i));
437
+ if (!entry.second) {
438
+ throw BinderException("Duplicate column name \"%s\" in INSERT", stmt.columns[i]);
439
+ }
419
440
  column_name_map[stmt.columns[i]] = i;
420
441
  auto column_index = table.GetColumnIndex(stmt.columns[i]);
421
442
  if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) {
@@ -439,8 +460,8 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
439
460
  }
440
461
  }
441
462
  } else {
442
- // No columns specified, assume insertion into all columns
443
- // Intentionally don't populate 'column_index_map' as an indication of this
463
+ // insert by position and no columns specified - insertion into all columns of the table
464
+ // intentionally don't populate 'column_index_map' as an indication of this
444
465
  for (auto &col : table.GetColumns().Physical()) {
445
466
  named_column_map.push_back(col.Logical());
446
467
  insert->expected_types.push_back(col.Type());
@@ -457,7 +478,6 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
457
478
  idx_t expected_columns = stmt.columns.empty() ? table.GetColumns().PhysicalColumnCount() : stmt.columns.size();
458
479
 
459
480
  // special case: check if we are inserting from a VALUES statement
460
- auto values_list = stmt.GetValuesList();
461
481
  if (values_list) {
462
482
  auto &expr_list = values_list->Cast<ExpressionListRef>();
463
483
  expr_list.expected_types.resize(expected_columns);
@@ -490,10 +510,12 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
490
510
  // parse select statement and add to logical plan
491
511
  unique_ptr<LogicalOperator> root;
492
512
  if (stmt.select_statement) {
493
- auto select_binder = Binder::CreateBinder(context, this);
494
- auto root_select = select_binder->Bind(*stmt.select_statement);
495
- MoveCorrelatedExpressions(*select_binder);
496
-
513
+ if (stmt.column_order == InsertColumnOrder::INSERT_BY_POSITION) {
514
+ auto select_binder = Binder::CreateBinder(context, this);
515
+ root_select = select_binder->Bind(*stmt.select_statement);
516
+ MoveCorrelatedExpressions(*select_binder);
517
+ }
518
+ // inserting from a select - check if the column count matches
497
519
  CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
498
520
  table.name.c_str());
499
521
 
@@ -64,7 +64,7 @@ data_ptr_t ArenaAllocator::Allocate(idx_t len) {
64
64
  do {
65
65
  current_capacity *= 2;
66
66
  } while (current_capacity < len);
67
- auto new_chunk = make_uniq<ArenaChunk>(allocator, current_capacity);
67
+ auto new_chunk = make_unsafe_uniq<ArenaChunk>(allocator, current_capacity);
68
68
  if (head) {
69
69
  head->prev = new_chunk.get();
70
70
  new_chunk->next = std::move(head);
@@ -7,7 +7,8 @@ namespace duckdb {
7
7
  BufferHandle::BufferHandle() : handle(nullptr), node(nullptr) {
8
8
  }
9
9
 
10
- BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle, FileBuffer *node) : handle(std::move(handle)), node(node) {
10
+ BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle_p, FileBuffer *node_p)
11
+ : handle(std::move(handle_p)), node(node_p) {
11
12
  }
12
13
 
13
14
  BufferHandle::BufferHandle(BufferHandle &&other) noexcept {
@@ -29,16 +30,6 @@ bool BufferHandle::IsValid() const {
29
30
  return node != nullptr;
30
31
  }
31
32
 
32
- data_ptr_t BufferHandle::Ptr() const {
33
- D_ASSERT(IsValid());
34
- return node->buffer;
35
- }
36
-
37
- data_ptr_t BufferHandle::Ptr() {
38
- D_ASSERT(IsValid());
39
- return node->buffer;
40
- }
41
-
42
33
  void BufferHandle::Destroy() {
43
34
  if (!handle || !IsValid()) {
44
35
  return;
@@ -32,7 +32,7 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
32
32
  MiniZStream s;
33
33
  size_t compressed_size = 0;
34
34
  compressed_size = s.MaxCompressedLength(uncompressed_size);
35
- auto compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
35
+ auto compressed_buf = make_unsafe_array<data_t>(compressed_size);
36
36
  s.Compress((const char *)string.GetData(), uncompressed_size, (char *)compressed_buf.get(), &compressed_size);
37
37
  string_t compressed_string((const char *)compressed_buf.get(), compressed_size);
38
38
 
@@ -292,13 +292,13 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
292
292
  offset += 2 * sizeof(uint32_t);
293
293
 
294
294
  data_ptr_t decompression_ptr;
295
- unique_ptr<data_t[]> decompression_buffer;
295
+ unsafe_array_ptr<data_t> decompression_buffer;
296
296
 
297
297
  // If string is in single block we decompress straight from it, else we copy first
298
298
  if (remaining <= Storage::BLOCK_SIZE - sizeof(block_id_t) - offset) {
299
299
  decompression_ptr = handle.Ptr() + offset;
300
300
  } else {
301
- decompression_buffer = unique_ptr<data_t[]>(new data_t[compressed_size]);
301
+ decompression_buffer = make_unsafe_array<data_t>(compressed_size);
302
302
  auto target_ptr = decompression_buffer.get();
303
303
 
304
304
  // now append the string to the single buffer
@@ -7,7 +7,7 @@
7
7
  namespace duckdb {
8
8
 
9
9
  void ListStats::Construct(BaseStatistics &stats) {
10
- stats.child_stats = unique_ptr<BaseStatistics[]>(new BaseStatistics[1]);
10
+ stats.child_stats = unsafe_array_ptr<BaseStatistics>(new BaseStatistics[1]);
11
11
  BaseStatistics::Construct(stats.child_stats[0], ListType::GetChildType(stats.GetType()));
12
12
  }
13
13
 
@@ -7,7 +7,7 @@ namespace duckdb {
7
7
 
8
8
  void StructStats::Construct(BaseStatistics &stats) {
9
9
  auto &child_types = StructType::GetChildTypes(stats.GetType());
10
- stats.child_stats = unique_ptr<BaseStatistics[]>(new BaseStatistics[child_types.size()]);
10
+ stats.child_stats = unsafe_array_ptr<BaseStatistics>(new BaseStatistics[child_types.size()]);
11
11
  for (idx_t i = 0; i < child_types.size(); i++) {
12
12
  BaseStatistics::Construct(stats.child_stats[i], child_types[i].second);
13
13
  }
@@ -155,7 +155,7 @@ void ColumnScanState::Initialize(const LogicalType &type) {
155
155
 
156
156
  void CollectionScanState::Initialize(const vector<LogicalType> &types) {
157
157
  auto &column_ids = GetColumnIds();
158
- column_scans = unique_ptr<ColumnScanState[]>(new ColumnScanState[column_ids.size()]);
158
+ column_scans = make_unsafe_array<ColumnScanState>(column_ids.size());
159
159
  for (idx_t i = 0; i < column_ids.size(); i++) {
160
160
  if (column_ids[i] == COLUMN_IDENTIFIER_ROW_ID) {
161
161
  continue;
@@ -695,7 +695,7 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
695
695
  append_state.row_group = this;
696
696
  append_state.offset_in_row_group = this->count;
697
697
  // for each column, initialize the append state
698
- append_state.states = unique_ptr<ColumnAppendState[]>(new ColumnAppendState[GetColumnCount()]);
698
+ append_state.states = make_unsafe_array<ColumnAppendState>(GetColumnCount());
699
699
  for (idx_t i = 0; i < GetColumnCount(); i++) {
700
700
  auto &col_data = GetColumn(i);
701
701
  col_data.InitializeAppend(append_state.states[i]);
@@ -526,43 +526,59 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
526
526
  void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
527
527
  auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
528
528
 
529
- // figure out which row_group to fetch from
530
- auto row_group = row_groups->GetSegment(row_ids[0]);
531
- auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
532
- auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
533
-
534
- // create a selection vector from the row_ids
535
- SelectionVector sel(STANDARD_VECTOR_SIZE);
536
- for (idx_t i = 0; i < count; i++) {
537
- auto row_in_vector = row_ids[i] - base_row_id;
538
- D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE);
539
- sel.set_index(i, row_in_vector);
540
- }
541
-
542
- // now fetch the columns from that row_group
543
- TableScanState state;
544
- state.table_state.max_row = row_start + total_rows;
545
-
529
+ // initialize the fetch state
546
530
  // FIXME: we do not need to fetch all columns, only the columns required by the indices!
531
+ TableScanState state;
547
532
  vector<column_t> column_ids;
548
533
  column_ids.reserve(types.size());
549
534
  for (idx_t i = 0; i < types.size(); i++) {
550
535
  column_ids.push_back(i);
551
536
  }
552
537
  state.Initialize(std::move(column_ids));
538
+ state.table_state.max_row = row_start + total_rows;
553
539
 
540
+ // initialize the fetch chunk
554
541
  DataChunk result;
555
542
  result.Initialize(GetAllocator(), types);
556
543
 
557
- state.table_state.Initialize(GetTypes());
558
- row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
559
- row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
560
- result.Slice(sel, count);
544
+ SelectionVector sel(STANDARD_VECTOR_SIZE);
545
+ // now iterate over the row ids
546
+ for (idx_t r = 0; r < count;) {
547
+ result.Reset();
548
+ // figure out which row_group to fetch from
549
+ auto row_id = row_ids[r];
550
+ auto row_group = row_groups->GetSegment(row_id);
551
+ auto row_group_vector_idx = (row_id - row_group->start) / STANDARD_VECTOR_SIZE;
552
+ auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
553
+
554
+ // fetch the current vector
555
+ state.table_state.Initialize(GetTypes());
556
+ row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
557
+ row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
558
+ result.Verify();
559
+
560
+ // check for any remaining row ids if they also fall into this vector
561
+ // we try to fetch handle as many rows as possible at the same time
562
+ idx_t sel_count = 0;
563
+ for (; r < count; r++) {
564
+ idx_t current_row = idx_t(row_ids[r]);
565
+ if (current_row < base_row_id || current_row >= base_row_id + result.size()) {
566
+ // this row-id does not fall into the current chunk - break
567
+ break;
568
+ }
569
+ auto row_in_vector = current_row - base_row_id;
570
+ D_ASSERT(row_in_vector < result.size());
571
+ sel.set_index(sel_count++, row_in_vector);
572
+ }
573
+ D_ASSERT(sel_count > 0);
574
+ // slice the vector with all rows that are present in this vector and erase from the index
575
+ result.Slice(sel, sel_count);
561
576
 
562
- indexes.Scan([&](Index &index) {
563
- index.Delete(result, row_identifiers);
564
- return false;
565
- });
577
+ indexes.Scan([&](Index &index) {
578
+ index.Delete(result, row_identifiers);
579
+ return false;
580
+ });
581
+ }
566
582
  }
567
583
 
568
584
  void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,
@@ -1042,8 +1042,9 @@ static idx_t SortSelectionVector(SelectionVector &sel, idx_t count, row_t *ids)
1042
1042
  return pos;
1043
1043
  }
1044
1044
 
1045
- UpdateInfo *CreateEmptyUpdateInfo(TransactionData transaction, idx_t type_size, idx_t count, unique_ptr<char[]> &data) {
1046
- data = unique_ptr<char[]>(new char[sizeof(UpdateInfo) + (sizeof(sel_t) + type_size) * STANDARD_VECTOR_SIZE]);
1045
+ UpdateInfo *CreateEmptyUpdateInfo(TransactionData transaction, idx_t type_size, idx_t count,
1046
+ unsafe_array_ptr<char> &data) {
1047
+ data = make_unsafe_array<char>(sizeof(UpdateInfo) + (sizeof(sel_t) + type_size) * STANDARD_VECTOR_SIZE);
1047
1048
  auto update_info = (UpdateInfo *)data.get();
1048
1049
  update_info->max = STANDARD_VECTOR_SIZE;
1049
1050
  update_info->tuples = (sel_t *)(((data_ptr_t)update_info) + sizeof(UpdateInfo));
@@ -1109,7 +1110,7 @@ void UpdateSegment::Update(TransactionData transaction, idx_t column_index, Vect
1109
1110
  }
1110
1111
  node = node->next;
1111
1112
  }
1112
- unique_ptr<char[]> update_info_data;
1113
+ unsafe_array_ptr<char> update_info_data;
1113
1114
  if (!node) {
1114
1115
  // no updates made yet by this transaction: initially the update info to empty
1115
1116
  if (transaction.transaction) {
@@ -1144,8 +1145,8 @@ void UpdateSegment::Update(TransactionData transaction, idx_t column_index, Vect
1144
1145
  auto result = make_uniq<UpdateNodeData>();
1145
1146
 
1146
1147
  result->info = make_uniq<UpdateInfo>();
1147
- result->tuples = unique_ptr<sel_t[]>(new sel_t[STANDARD_VECTOR_SIZE]);
1148
- result->tuple_data = unique_ptr<data_t[]>(new data_t[STANDARD_VECTOR_SIZE * type_size]);
1148
+ result->tuples = make_unsafe_array<sel_t>(STANDARD_VECTOR_SIZE);
1149
+ result->tuple_data = make_unsafe_array<data_t>(STANDARD_VECTOR_SIZE * type_size);
1149
1150
  result->info->tuples = result->tuples.get();
1150
1151
  result->info->tuple_data = result->tuple_data.get();
1151
1152
  result->info->version_number = TRANSACTION_ID_START - 1;
@@ -1153,7 +1154,7 @@ void UpdateSegment::Update(TransactionData transaction, idx_t column_index, Vect
1153
1154
  InitializeUpdateInfo(*result->info, ids, sel, count, vector_index, vector_offset);
1154
1155
 
1155
1156
  // now create the transaction level update info in the undo log
1156
- unique_ptr<char[]> update_info_data;
1157
+ unsafe_array_ptr<char> update_info_data;
1157
1158
  UpdateInfo *transaction_node;
1158
1159
  if (transaction.transaction) {
1159
1160
  transaction_node = transaction.transaction->CreateUpdateInfo(type_size, count);
@@ -381,8 +381,7 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_
381
381
  size_t curLine, suffixLim = symbolTable.suffixLim;
382
382
  u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
383
383
 
384
- u8 buf[512+7]; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
385
- memset(buf+511, 0, 8); /* and initialize the sentinal bytes */
384
+ u8 buf[512+7] = {}; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
386
385
 
387
386
  // three variants are possible. dead code falls away since the bool arguments are constants
388
387
  auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {
@@ -814,4 +814,13 @@ typedef enum PGOnConflictActionAlias {
814
814
  PG_ONCONFLICT_ALIAS_IGNORE /* INSERT OR IGNORE */
815
815
  } PGOnConflictActionAlias;
816
816
 
817
+ /*
818
+ * PGInsertByNameOrPosition
819
+ * "INSERT BY [POSITION|NAME]
820
+ */
821
+ typedef enum PGInsertColumnOrder {
822
+ PG_INSERT_BY_POSITION, /* INSERT BY POSITION (default behavior) */
823
+ PG_INSERT_BY_NAME, /* INSERT BY NAME */
824
+ } PGInsertColumnOrder;
825
+
817
826
  }
@@ -1045,11 +1045,11 @@ typedef struct PGInferClause {
1045
1045
  */
1046
1046
  typedef struct PGOnConflictClause {
1047
1047
  PGNodeTag type;
1048
- PGOnConflictAction action; /* DO NOTHING or UPDATE? */
1049
- PGInferClause *infer; /* Optional index inference clause */
1050
- PGList *targetList; /* the target list (of PGResTarget) */
1051
- PGNode *whereClause; /* qualifications */
1052
- int location; /* token location, or -1 if unknown */
1048
+ PGOnConflictAction action; /* DO NOTHING or UPDATE? */
1049
+ PGInferClause *infer; /* Optional index inference clause */
1050
+ PGList *targetList; /* the target list (of PGResTarget) */
1051
+ PGNode *whereClause; /* qualifications */
1052
+ int location; /* token location, or -1 if unknown */
1053
1053
  } PGOnConflictClause;
1054
1054
 
1055
1055
  /*
@@ -1125,14 +1125,15 @@ typedef struct PGRawStmt {
1125
1125
  */
1126
1126
  typedef struct PGInsertStmt {
1127
1127
  PGNodeTag type;
1128
- PGRangeVar *relation; /* relation to insert into */
1129
- PGList *cols; /* optional: names of the target columns */
1130
- PGNode *selectStmt; /* the source SELECT/VALUES, or NULL */
1128
+ PGRangeVar *relation; /* relation to insert into */
1129
+ PGList *cols; /* optional: names of the target columns */
1130
+ PGNode *selectStmt; /* the source SELECT/VALUES, or NULL */
1131
1131
  PGOnConflictActionAlias onConflictAlias; /* the (optional) shorthand provided for the onConflictClause */
1132
- PGOnConflictClause *onConflictClause; /* ON CONFLICT clause */
1133
- PGList *returningList; /* list of expressions to return */
1134
- PGWithClause *withClause; /* WITH clause */
1135
- PGOverridingKind override; /* OVERRIDING clause */
1132
+ PGOnConflictClause *onConflictClause; /* ON CONFLICT clause */
1133
+ PGList *returningList; /* list of expressions to return */
1134
+ PGWithClause *withClause; /* WITH clause */
1135
+ PGOverridingKind override; /* OVERRIDING clause */
1136
+ PGInsertColumnOrder insert_column_order; /* INSERT BY NAME or INSERT BY POSITION */
1136
1137
  } PGInsertStmt;
1137
1138
 
1138
1139
  /* ----------------------
@@ -1065,9 +1065,10 @@ typedef union YYSTYPE
1065
1065
  PGLockWaitPolicy lockwaitpolicy;
1066
1066
  PGSubLinkType subquerytype;
1067
1067
  PGViewCheckOption viewcheckoption;
1068
+ PGInsertColumnOrder bynameorposition;
1068
1069
  }
1069
1070
  /* Line 1529 of yacc.c. */
1070
- #line 1071 "third_party/libpg_query/grammar/grammar_out.hpp"
1071
+ #line 1072 "third_party/libpg_query/grammar/grammar_out.hpp"
1071
1072
  YYSTYPE;
1072
1073
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
1073
1074
  # define YYSTYPE_IS_DECLARED 1