duckdb 0.7.2-dev2552.0 → 0.7.2-dev2675.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  6. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  7. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  8. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  9. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  10. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  11. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  12. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  13. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  14. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  15. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  16. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  17. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  18. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  19. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  20. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  22. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  23. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  24. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  25. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  26. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  27. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  28. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  29. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  30. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  31. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  32. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  33. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  34. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  39. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  40. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  41. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  42. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  44. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  45. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  46. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  47. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  48. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  49. package/src/duckdb/src/include/duckdb.h +26 -0
  50. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  51. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  52. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  53. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  54. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  55. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  56. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  57. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  58. package/src/duckdb/src/storage/data_table.cpp +6 -3
  59. package/src/duckdb/src/storage/index.cpp +18 -6
  60. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  61. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  62. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  63. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  64. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  65. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  66. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -32,8 +32,7 @@ struct IndexScanState;
32
32
  class Index {
33
33
  public:
34
34
  Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_manager, const vector<column_t> &column_ids,
35
- const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
36
- bool track_memory);
35
+ const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type);
37
36
  virtual ~Index() = default;
38
37
 
39
38
  //! The type of the index
@@ -57,25 +56,21 @@ public:
57
56
  AttachedDatabase &db;
58
57
  //! Buffer manager of the database instance
59
58
  BufferManager &buffer_manager;
60
- //! The size of the index in memory
61
- //! This does not track the size of the index meta information, but only allocated nodes and leaves
62
- idx_t memory_size;
63
- //! Flag determining if this index's size is tracked by the buffer manager
64
- bool track_memory;
65
59
 
66
60
  public:
67
61
  //! Initialize a single predicate scan on the index with the given expression and column IDs
68
62
  virtual unique_ptr<IndexScanState> InitializeScanSinglePredicate(const Transaction &transaction, const Value &value,
69
- ExpressionType expressionType) = 0;
63
+ const ExpressionType expression_type) = 0;
70
64
  //! Initialize a two predicate scan on the index with the given expression and column IDs
71
- virtual unique_ptr<IndexScanState> InitializeScanTwoPredicates(Transaction &transaction, const Value &low_value,
72
- ExpressionType low_expression_type,
65
+ virtual unique_ptr<IndexScanState> InitializeScanTwoPredicates(const Transaction &transaction,
66
+ const Value &low_value,
67
+ const ExpressionType low_expression_type,
73
68
  const Value &high_value,
74
- ExpressionType high_expression_type) = 0;
69
+ const ExpressionType high_expression_type) = 0;
75
70
  //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched,
76
71
  //! and false otherwise
77
- virtual bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count,
78
- vector<row_t> &result_ids) = 0;
72
+ virtual bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state,
73
+ const idx_t max_count, vector<row_t> &result_ids) = 0;
79
74
 
80
75
  //! Obtain a lock on the index
81
76
  virtual void InitializeLock(IndexLock &state);
@@ -104,23 +99,13 @@ public:
104
99
  //! Obtains a lock and calls MergeIndexes while holding that lock
105
100
  bool MergeIndexes(Index &other_index);
106
101
 
102
+ //! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held
103
+ virtual void Vacuum(IndexLock &state) = 0;
104
+ //! Obtains a lock and calls Vacuum while holding that lock
105
+ void Vacuum();
106
+
107
107
  //! Returns the string representation of an index
108
108
  virtual string ToString() = 0;
109
- //! Verifies that the in-memory size value of the index matches its actual size
110
- virtual void Verify() = 0;
111
- //! Increases the memory size by the difference between the old size and the current size
112
- //! and performs verifications
113
- virtual void IncreaseAndVerifyMemorySize(idx_t old_memory_size) = 0;
114
-
115
- //! Increases the in-memory size value
116
- inline void IncreaseMemorySize(idx_t size) {
117
- memory_size += size;
118
- };
119
- //! Decreases the in-memory size value
120
- inline void DecreaseMemorySize(idx_t size) {
121
- D_ASSERT(memory_size >= size);
122
- memory_size -= size;
123
- };
124
109
 
125
110
  //! Returns true if the index is affected by updates on the specified column IDs, and false otherwise
126
111
  bool IndexIsUpdated(const vector<PhysicalIndex> &column_ids) const;
@@ -45,8 +45,6 @@ public:
45
45
  shared_ptr<BlockHandle> RegisterSmallMemory(idx_t block_size) final override;
46
46
 
47
47
  idx_t GetUsedMemory() const final override;
48
- void IncreaseUsedMemory(idx_t amount, bool unsafe = false) final override;
49
- void DecreaseUsedMemory(idx_t amount) final override;
50
48
  idx_t GetMaxMemory() const final override;
51
49
 
52
50
  //! Allocate an in-memory buffer with a single pin.
@@ -10,8 +10,10 @@
10
10
 
11
11
  #include "duckdb/transaction/undo_buffer.hpp"
12
12
  #include "duckdb/common/types/data_chunk.hpp"
13
+ #include "duckdb/common/unordered_map.hpp"
13
14
 
14
15
  namespace duckdb {
16
+
15
17
  class DataTable;
16
18
 
17
19
  struct DeleteInfo;
@@ -22,6 +24,9 @@ public:
22
24
  CleanupState();
23
25
  ~CleanupState();
24
26
 
27
+ // all tables with indexes that possibly need a vacuum (after e.g. a delete)
28
+ unordered_map<string, optional_ptr<DataTable>> indexed_tables;
29
+
25
30
  public:
26
31
  void CleanupEntry(UndoFlags type, data_ptr_t data);
27
32
 
@@ -190,6 +190,23 @@ typedef struct {
190
190
  idx_t size;
191
191
  } duckdb_string;
192
192
 
193
+ /*
194
+ The internal data representation of a VARCHAR/BLOB column
195
+ */
196
+ typedef struct {
197
+ union {
198
+ struct {
199
+ uint32_t length;
200
+ char prefix[4];
201
+ char *ptr;
202
+ } pointer;
203
+ struct {
204
+ uint32_t length;
205
+ char inlined[12];
206
+ } inlined;
207
+ } value;
208
+ } duckdb_string_t;
209
+
193
210
  typedef struct {
194
211
  void *data;
195
212
  idx_t size;
@@ -298,6 +315,7 @@ typedef enum {
298
315
  /*!
299
316
  Creates a new database or opens an existing database file stored at the the given path.
300
317
  If no path is given a new in-memory database is created instead.
318
+ The instantiated database should be closed with 'duckdb_close'
301
319
 
302
320
  * path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.
303
321
  * out_database: The result database object.
@@ -331,6 +349,7 @@ DUCKDB_API void duckdb_close(duckdb_database *database);
331
349
  /*!
332
350
  Opens a connection to a database. Connections are required to query the database, and store transactional state
333
351
  associated with the connection.
352
+ The instantiated connection should be closed using 'duckdb_disconnect'
334
353
 
335
354
  * database: The database file to connect to.
336
355
  * out_connection: The result connection object.
@@ -751,6 +770,13 @@ This is the amount of tuples that will fit into a data chunk created by `duckdb_
751
770
  */
752
771
  DUCKDB_API idx_t duckdb_vector_size();
753
772
 
773
+ /*!
774
+ Whether or not the duckdb_string_t value is inlined.
775
+ This means that the data of the string does not have a separate allocation.
776
+
777
+ */
778
+ DUCKDB_API bool duckdb_string_is_inlined(duckdb_string_t string);
779
+
754
780
  //===--------------------------------------------------------------------===//
755
781
  // Date/Time/Timestamp Helpers
756
782
  //===--------------------------------------------------------------------===//
@@ -186,3 +186,10 @@ void duckdb_free(void *ptr) {
186
186
  idx_t duckdb_vector_size() {
187
187
  return STANDARD_VECTOR_SIZE;
188
188
  }
189
+
190
+ bool duckdb_string_is_inlined(duckdb_string_t string_p) {
191
+ static_assert(sizeof(duckdb_string_t) == sizeof(duckdb::string_t),
192
+ "duckdb_string_t should have the same memory layout as duckdb::string_t");
193
+ auto &string = *(duckdb::string_t *)(&string_p);
194
+ return string.IsInlined();
195
+ }
@@ -27,9 +27,10 @@ InsertStatement::InsertStatement()
27
27
  }
28
28
 
29
29
  InsertStatement::InsertStatement(const InsertStatement &other)
30
- : SQLStatement(other),
31
- select_statement(unique_ptr_cast<SQLStatement, SelectStatement>(other.select_statement->Copy())),
32
- columns(other.columns), table(other.table), schema(other.schema), catalog(other.catalog) {
30
+ : SQLStatement(other), select_statement(unique_ptr_cast<SQLStatement, SelectStatement>(
31
+ other.select_statement ? other.select_statement->Copy() : nullptr)),
32
+ columns(other.columns), table(other.table), schema(other.schema), catalog(other.catalog),
33
+ default_values(other.default_values) {
33
34
  cte_map = other.cte_map.Copy();
34
35
  for (auto &expr : other.returning_list) {
35
36
  returning_list.emplace_back(expr->Copy());
@@ -93,10 +94,15 @@ string InsertStatement::ToString() const {
93
94
  result += " ";
94
95
  auto values_list = GetValuesList();
95
96
  if (values_list) {
97
+ D_ASSERT(!default_values);
96
98
  values_list->alias = string();
97
99
  result += values_list->ToString();
98
- } else {
100
+ } else if (select_statement) {
101
+ D_ASSERT(!default_values);
99
102
  result += select_statement->ToString();
103
+ } else {
104
+ D_ASSERT(default_values);
105
+ result += "DEFAULT VALUES";
100
106
  }
101
107
  if (!or_replace_shorthand_set && on_conflict_info) {
102
108
  auto &conflict_info = *on_conflict_info;
@@ -155,7 +161,10 @@ unique_ptr<SQLStatement> InsertStatement::Copy() const {
155
161
  return unique_ptr<InsertStatement>(new InsertStatement(*this));
156
162
  }
157
163
 
158
- ExpressionListRef *InsertStatement::GetValuesList() const {
164
+ optional_ptr<ExpressionListRef> InsertStatement::GetValuesList() const {
165
+ if (!select_statement) {
166
+ return nullptr;
167
+ }
159
168
  if (select_statement->node->type != QueryNodeType::SELECT_NODE) {
160
169
  return nullptr;
161
170
  }
@@ -178,7 +187,7 @@ ExpressionListRef *InsertStatement::GetValuesList() const {
178
187
  if (!node.from_table || node.from_table->type != TableReferenceType::EXPRESSION_LIST) {
179
188
  return nullptr;
180
189
  }
181
- return (ExpressionListRef *)node.from_table.get();
190
+ return &node.from_table->Cast<ExpressionListRef>();
182
191
  }
183
192
 
184
193
  } // namespace duckdb
@@ -108,7 +108,7 @@ unique_ptr<Constraint> Transformer::TransformConstraint(duckdb_libpgquery::PGLis
108
108
  pk_columns.emplace_back(reinterpret_cast<duckdb_libpgquery::PGValue *>(kc->data.ptr_value)->val.str);
109
109
  }
110
110
  }
111
- if (pk_columns.size() != fk_columns.size()) {
111
+ if (!pk_columns.empty() && pk_columns.size() != fk_columns.size()) {
112
112
  throw ParserException("The number of referencing and referenced columns for foreign keys must be the same");
113
113
  }
114
114
  return make_uniq<ForeignKeyConstraint>(pk_columns, fk_columns, std::move(fk_info));
@@ -13,15 +13,24 @@
13
13
 
14
14
  namespace duckdb {
15
15
 
16
- void Transformer::TransformWindowDef(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr) {
16
+ void Transformer::TransformWindowDef(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr,
17
+ const char *window_name) {
17
18
  D_ASSERT(window_spec);
18
19
  D_ASSERT(expr);
19
20
 
20
21
  // next: partitioning/ordering expressions
21
22
  if (window_spec->partitionClause) {
23
+ if (window_name && !expr->partitions.empty()) {
24
+ throw ParserException("Cannot override PARTITION BY clause of window \"%s\"", window_name);
25
+ }
22
26
  TransformExpressionList(*window_spec->partitionClause, expr->partitions);
23
27
  }
24
- TransformOrderBy(window_spec->orderClause, expr->orders);
28
+ if (window_spec->orderClause) {
29
+ if (window_name && !expr->orders.empty()) {
30
+ throw ParserException("Cannot override ORDER BY clause of window \"%s\"", window_name);
31
+ }
32
+ TransformOrderBy(window_spec->orderClause, expr->orders);
33
+ }
25
34
  }
26
35
 
27
36
  void Transformer::TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr) {
@@ -198,6 +207,7 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
198
207
  D_ASSERT(window_spec);
199
208
  }
200
209
  auto window_ref = window_spec;
210
+ auto window_name = window_ref->refname;
201
211
  if (window_ref->refname) {
202
212
  auto it = window_clauses.find(StringUtil::Lower(string(window_spec->refname)));
203
213
  if (it == window_clauses.end()) {
@@ -208,6 +218,9 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
208
218
  }
209
219
  in_window_definition = true;
210
220
  TransformWindowDef(window_ref, expr.get());
221
+ if (window_ref != window_spec) {
222
+ TransformWindowDef(window_spec, expr.get(), window_name);
223
+ }
211
224
  TransformWindowFrame(window_spec, expr.get());
212
225
  in_window_definition = false;
213
226
  expr->query_location = root->location;
@@ -299,9 +312,9 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
299
312
  std::move(filter_expr), std::move(order_bys),
300
313
  root->agg_distinct, false, root->export_state);
301
314
  lowercase_name = "list_sort";
302
- order_bys.reset();
303
- filter_expr.reset();
304
- children.clear();
315
+ order_bys.reset(); // NOLINT
316
+ filter_expr.reset(); // NOLINT
317
+ children.clear(); // NOLINT
305
318
  children.emplace_back(std::move(unordered));
306
319
  children.emplace_back(std::move(sense));
307
320
  children.emplace_back(std::move(nulls));
@@ -26,12 +26,6 @@ unique_ptr<InsertStatement> Transformer::TransformInsert(duckdb_libpgquery::PGNo
26
26
  auto stmt = reinterpret_cast<duckdb_libpgquery::PGInsertStmt *>(node);
27
27
  D_ASSERT(stmt);
28
28
 
29
- if (!stmt->selectStmt) {
30
- // TODO: This should be easy to add, we already support DEFAULT in the values list,
31
- // this could probably just be transformed into VALUES (DEFAULT, DEFAULT, DEFAULT, ..) in the Binder
32
- throw ParserException("DEFAULT VALUES clause is not supported!");
33
- }
34
-
35
29
  auto result = make_uniq<InsertStatement>();
36
30
  if (stmt->withClause) {
37
31
  TransformCTE(reinterpret_cast<duckdb_libpgquery::PGWithClause *>(stmt->withClause), result->cte_map);
@@ -49,7 +43,11 @@ unique_ptr<InsertStatement> Transformer::TransformInsert(duckdb_libpgquery::PGNo
49
43
  if (stmt->returningList) {
50
44
  Transformer::TransformExpressionList(*(stmt->returningList), result->returning_list);
51
45
  }
52
- result->select_statement = TransformSelect(stmt->selectStmt, false);
46
+ if (stmt->selectStmt) {
47
+ result->select_statement = TransformSelect(stmt->selectStmt, false);
48
+ } else {
49
+ result->default_values = true;
50
+ }
53
51
 
54
52
  auto qname = TransformQualifiedName(stmt->relation);
55
53
  result->table = qname.name;
@@ -292,17 +292,30 @@ static void FindMatchingPrimaryKeyColumns(const ColumnList &columns, const vecto
292
292
  } else {
293
293
  pk_names = unique.columns;
294
294
  }
295
- if (pk_names.size() != fk.fk_columns.size()) {
296
- // the number of referencing and referenced columns for foreign keys must be the same
297
- continue;
298
- }
299
295
  if (find_primary_key) {
300
296
  // found matching primary key
297
+ if (pk_names.size() != fk.fk_columns.size()) {
298
+ auto pk_name_str = StringUtil::Join(pk_names, ",");
299
+ auto fk_name_str = StringUtil::Join(fk.fk_columns, ",");
300
+ throw BinderException(
301
+ "Failed to create foreign key: number of referencing (%s) and referenced columns (%s) differ",
302
+ fk_name_str, pk_name_str);
303
+ }
301
304
  fk.pk_columns = pk_names;
302
305
  return;
303
306
  }
304
- if (fk.pk_columns != pk_names) {
305
- // Name mismatch
307
+ if (pk_names.size() != fk.fk_columns.size()) {
308
+ // the number of referencing and referenced columns for foreign keys must be the same
309
+ continue;
310
+ }
311
+ bool equals = true;
312
+ for (idx_t i = 0; i < fk.pk_columns.size(); i++) {
313
+ if (!StringUtil::CIEquals(fk.pk_columns[i], pk_names[i])) {
314
+ equals = false;
315
+ break;
316
+ }
317
+ }
318
+ if (!equals) {
306
319
  continue;
307
320
  }
308
321
  // found match
@@ -540,7 +553,7 @@ BoundStatement Binder::Bind(CreateStatement &stmt) {
540
553
  D_ASSERT(fk.info.pk_keys.empty());
541
554
  D_ASSERT(fk.info.fk_keys.empty());
542
555
  FindForeignKeyIndexes(create_info.columns, fk.fk_columns, fk.info.fk_keys);
543
- if (create_info.table == fk.info.table) {
556
+ if (StringUtil::CIEquals(create_info.table, fk.info.table)) {
544
557
  // self-referential foreign key constraint
545
558
  fk.info.type = ForeignKeyType::FK_TYPE_SELF_REFERENCE_TABLE;
546
559
  FindMatchingPrimaryKeyColumns(create_info.columns, create_info.constraints, fk);
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/planner/operator/logical_get.hpp"
10
10
  #include "duckdb/common/string_util.hpp"
11
11
  #include "duckdb/function/table/table_scan.hpp"
12
+ #include "duckdb/planner/operator/logical_dummy_scan.hpp"
12
13
  #include "duckdb/planner/operator/logical_projection.hpp"
13
14
  #include "duckdb/planner/expression_iterator.hpp"
14
15
  #include "duckdb/planner/expression_binder/returning_binder.hpp"
@@ -409,7 +410,7 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
409
410
  AddCTEMap(stmt.cte_map);
410
411
 
411
412
  vector<LogicalIndex> named_column_map;
412
- if (!stmt.columns.empty()) {
413
+ if (!stmt.columns.empty() || stmt.default_values) {
413
414
  // insertion statement specifies column list
414
415
 
415
416
  // create a mapping of (list index) -> (column index)
@@ -448,11 +449,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
448
449
 
449
450
  // bind the default values
450
451
  BindDefaultValues(table.GetColumns(), insert->bound_defaults);
451
- if (!stmt.select_statement) {
452
+ if (!stmt.select_statement && !stmt.default_values) {
452
453
  result.plan = std::move(insert);
453
454
  return result;
454
455
  }
455
-
456
456
  // Exclude the generated columns from this amount
457
457
  idx_t expected_columns = stmt.columns.empty() ? table.GetColumns().PhysicalColumnCount() : stmt.columns.size();
458
458
 
@@ -488,14 +488,19 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
488
488
  }
489
489
 
490
490
  // parse select statement and add to logical plan
491
- auto select_binder = Binder::CreateBinder(context, this);
492
- auto root_select = select_binder->Bind(*stmt.select_statement);
493
- MoveCorrelatedExpressions(*select_binder);
491
+ unique_ptr<LogicalOperator> root;
492
+ if (stmt.select_statement) {
493
+ auto select_binder = Binder::CreateBinder(context, this);
494
+ auto root_select = select_binder->Bind(*stmt.select_statement);
495
+ MoveCorrelatedExpressions(*select_binder);
494
496
 
495
- CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
496
- table.name.c_str());
497
+ CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
498
+ table.name.c_str());
497
499
 
498
- auto root = CastLogicalOperatorToTypes(root_select.types, insert->expected_types, std::move(root_select.plan));
500
+ root = CastLogicalOperatorToTypes(root_select.types, insert->expected_types, std::move(root_select.plan));
501
+ } else {
502
+ root = make_uniq<LogicalDummyScan>(GenerateTableIndex());
503
+ }
499
504
  insert->AddChild(std::move(root));
500
505
 
501
506
  BindOnConflictClause(*insert, table, stmt);
@@ -338,27 +338,29 @@ void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
338
338
 
339
339
  void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader) {
340
340
 
341
- // Deserialize the index meta data
341
+ // deserialize the index metadata
342
342
  auto info = IndexCatalogEntry::Deserialize(reader, context);
343
343
 
344
- // Create index in the catalog
344
+ // create the index in the catalog
345
345
  auto &schema_catalog = catalog.GetSchema(context, info->schema);
346
346
  auto &table_catalog = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, info->schema, info->table->table_name)
347
347
  .Cast<DuckTableEntry>();
348
348
  auto &index_catalog = schema_catalog.CreateIndex(context, *info, table_catalog)->Cast<DuckIndexEntry>();
349
349
  index_catalog.info = table_catalog.GetStorage().info;
350
- // Here we just gotta read the root node
350
+
351
+ // we deserialize the index lazily, i.e., we do not need to load any node information
352
+ // except the root block id and offset
351
353
  auto root_block_id = reader.Read<block_id_t>();
352
354
  auto root_offset = reader.Read<uint32_t>();
353
355
 
354
- // create an adaptive radix tree around the expressions
356
+ // obtain the expressions of the ART from the index metadata
355
357
  vector<unique_ptr<Expression>> unbound_expressions;
356
358
  vector<unique_ptr<ParsedExpression>> parsed_expressions;
357
-
358
359
  for (auto &p_exp : info->parsed_expressions) {
359
360
  parsed_expressions.push_back(p_exp->Copy());
360
361
  }
361
362
 
363
+ // bind the parsed expressions
362
364
  auto binder = Binder::CreateBinder(context);
363
365
  auto &table_ref = info->table->Cast<TableRef>();
364
366
  auto bound_table = binder->Bind(table_ref);
@@ -370,8 +372,7 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader
370
372
  }
371
373
 
372
374
  if (parsed_expressions.empty()) {
373
- // If no parsed_expressions are present, this means this is a PK/FK index, so we create the necessary bound
374
- // column refs
375
+ // this is a PK/FK index: we create the necessary bound column ref expressions
375
376
  unbound_expressions.reserve(info->column_ids.size());
376
377
  for (idx_t key_nr = 0; key_nr < info->column_ids.size(); key_nr++) {
377
378
  auto &col = table_catalog.GetColumn(LogicalIndex(info->column_ids[key_nr]));
@@ -380,17 +381,18 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader
380
381
  }
381
382
  }
382
383
 
384
+ // create the index and add it to the storage
383
385
  switch (info->index_type) {
384
386
  case IndexType::ART: {
385
387
  auto &storage = table_catalog.GetStorage();
386
388
  auto art = make_uniq<ART>(info->column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
387
- info->constraint_type, storage.db, true, root_block_id, root_offset);
389
+ info->constraint_type, storage.db, root_block_id, root_offset);
388
390
  index_catalog.index = art.get();
389
391
  storage.info->indexes.AddIndex(std::move(art));
390
392
  break;
391
393
  }
392
394
  default:
393
- throw InternalException("Can't read this index type");
395
+ throw InternalException("Unknown index type for ReadIndex");
394
396
  }
395
397
  }
396
398
 
@@ -1162,9 +1162,7 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1162
1162
 
1163
1163
  auto &allocator = Allocator::Get(db);
1164
1164
 
1165
- DataChunk result;
1166
- result.Initialize(allocator, index->logical_types);
1167
-
1165
+ // intermediate holds scanned chunks of the underlying data to create the index
1168
1166
  DataChunk intermediate;
1169
1167
  vector<LogicalType> intermediate_types;
1170
1168
  auto column_ids = index->column_ids;
@@ -1176,6 +1174,10 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1176
1174
  intermediate_types.emplace_back(LogicalType::ROW_TYPE);
1177
1175
  intermediate.Initialize(allocator, intermediate_types);
1178
1176
 
1177
+ // holds the result of executing the index expression on the intermediate chunks
1178
+ DataChunk result;
1179
+ result.Initialize(allocator, index->logical_types);
1180
+
1179
1181
  // initialize an index scan
1180
1182
  CreateIndexScanState state;
1181
1183
  InitializeWALCreateIndexScan(state, column_ids);
@@ -1209,6 +1211,7 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1209
1211
  }
1210
1212
  }
1211
1213
  }
1214
+
1212
1215
  info->indexes.AddIndex(std::move(index));
1213
1216
  }
1214
1217
 
@@ -10,10 +10,10 @@ namespace duckdb {
10
10
 
11
11
  Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_manager,
12
12
  const vector<column_t> &column_ids_p, const vector<unique_ptr<Expression>> &unbound_expressions,
13
- IndexConstraintType constraint_type_p, bool track_memory)
13
+ IndexConstraintType constraint_type_p)
14
14
 
15
15
  : type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
16
- db(db), buffer_manager(BufferManager::GetBufferManager(db)), memory_size(0), track_memory(track_memory) {
16
+ db(db), buffer_manager(BufferManager::GetBufferManager(db)) {
17
17
 
18
18
  for (auto &expr : unbound_expressions) {
19
19
  types.push_back(expr->return_type.InternalType());
@@ -49,19 +49,31 @@ void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
49
49
  }
50
50
 
51
51
  bool Index::MergeIndexes(Index &other_index) {
52
+
52
53
  IndexLock state;
53
54
  InitializeLock(state);
54
55
 
55
56
  switch (this->type) {
56
- case IndexType::ART: {
57
- auto &art = Cast<ART>();
58
- return art.MergeIndexes(state, other_index);
59
- }
57
+ case IndexType::ART:
58
+ return Cast<ART>().MergeIndexes(state, other_index);
60
59
  default:
61
60
  throw InternalException("Unimplemented index type for merge");
62
61
  }
63
62
  }
64
63
 
64
+ void Index::Vacuum() {
65
+
66
+ IndexLock state;
67
+ InitializeLock(state);
68
+
69
+ switch (this->type) {
70
+ case IndexType::ART:
71
+ return Cast<ART>().Vacuum(state);
72
+ default:
73
+ throw InternalException("Unimplemented index type for vacuum");
74
+ }
75
+ }
76
+
65
77
  void Index::ExecuteExpressions(DataChunk &input, DataChunk &result) {
66
78
  executor.Execute(input, result);
67
79
  }
@@ -118,6 +118,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
118
118
  row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
119
119
  types, MAX_ROW_ID, 0);
120
120
  row_groups->InitializeEmpty();
121
+
121
122
  table.info->indexes.Scan([&](Index &index) {
122
123
  D_ASSERT(index.type == IndexType::ART);
123
124
  auto &art = index.Cast<ART>();
@@ -129,7 +130,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
129
130
  unbound_expressions.push_back(expr->Copy());
130
131
  }
131
132
  indexes.AddIndex(make_uniq<ART>(art.column_ids, art.table_io_manager, std::move(unbound_expressions),
132
- art.constraint_type, art.db, true));
133
+ art.constraint_type, art.db));
133
134
  }
134
135
  return false;
135
136
  });
@@ -520,6 +521,12 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
520
521
  storage.AppendToIndexes(transaction, append_state, append_count, true);
521
522
  }
522
523
  transaction.PushAppend(table, append_state.row_start, append_count);
524
+
525
+ // possibly vacuum any excess index data
526
+ table.info->indexes.Scan([&](Index &index) {
527
+ index.Vacuum();
528
+ return false;
529
+ });
523
530
  }
524
531
 
525
532
  void LocalStorage::Commit(LocalStorage::CommitState &commit_state, DuckTransaction &transaction) {
@@ -531,7 +538,6 @@ void LocalStorage::Commit(LocalStorage::CommitState &commit_state, DuckTransacti
531
538
  auto table = entry.first;
532
539
  auto storage = entry.second.get();
533
540
  Flush(table, *storage);
534
-
535
541
  entry.second.reset();
536
542
  }
537
543
  }
@@ -229,15 +229,6 @@ void StandardBufferManager::Unpin(shared_ptr<BlockHandle> &handle) {
229
229
  }
230
230
  }
231
231
 
232
- // POTENTIALLY PROBLEMATIC
233
- void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
234
- ReserveMemory(size);
235
- }
236
-
237
- void StandardBufferManager::DecreaseUsedMemory(idx_t size) {
238
- FreeReservedMemory(size);
239
- }
240
-
241
232
  void StandardBufferManager::SetLimit(idx_t limit) {
242
233
  buffer_pool.SetLimit(limit, InMemoryWarning());
243
234
  }
@@ -417,7 +417,7 @@ void ReplayState::ReplayCreateIndex() {
417
417
  switch (info->index_type) {
418
418
  case IndexType::ART: {
419
419
  index = make_uniq<ART>(info->column_ids, TableIOManager::Get(data_table), expressions, info->constraint_type,
420
- data_table.db, true);
420
+ data_table.db);
421
421
  break;
422
422
  }
423
423
  default:
@@ -52,15 +52,21 @@ void CleanupState::CleanupDelete(DeleteInfo &info) {
52
52
  auto version_table = info.table;
53
53
  D_ASSERT(version_table->info->cardinality >= info.count);
54
54
  version_table->info->cardinality -= info.count;
55
+
55
56
  if (version_table->info->indexes.Empty()) {
56
57
  // this table has no indexes: no cleanup to be done
57
58
  return;
58
59
  }
60
+
59
61
  if (current_table != version_table) {
60
62
  // table for this entry differs from previous table: flush and switch to the new table
61
63
  Flush();
62
64
  current_table = version_table;
63
65
  }
66
+
67
+ // possibly vacuum any indexes in this table later
68
+ indexed_tables[current_table->info->table] = current_table;
69
+
64
70
  count = 0;
65
71
  for (idx_t i = 0; i < info.count; i++) {
66
72
  row_numbers[count++] = info.vinfo->start + info.rows[i];
@@ -126,6 +126,14 @@ void UndoBuffer::Cleanup() {
126
126
  CleanupState state;
127
127
  UndoBuffer::IteratorState iterator_state;
128
128
  IterateEntries(iterator_state, [&](UndoFlags type, data_ptr_t data) { state.CleanupEntry(type, data); });
129
+
130
+ // possibly vacuum indexes
131
+ for (const auto &table : state.indexed_tables) {
132
+ table.second->info->indexes.Scan([&](Index &index) {
133
+ index.Vacuum();
134
+ return false;
135
+ });
136
+ }
129
137
  }
130
138
 
131
139
  void UndoBuffer::Commit(UndoBuffer::IteratorState &iterator_state, optional_ptr<WriteAheadLog> log,