duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
  6. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  7. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  8. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  9. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  10. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  11. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  12. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  13. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  14. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  15. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  16. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  17. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  18. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  19. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  20. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  21. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  22. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  24. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
  25. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  26. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  27. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  28. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  29. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  30. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  31. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  32. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  33. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  34. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  40. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  41. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  42. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  43. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  48. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  49. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  50. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  51. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  52. package/src/duckdb/src/include/duckdb.h +26 -0
  53. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  54. package/src/duckdb/src/main/client_context.cpp +1 -1
  55. package/src/duckdb/src/main/query_result.cpp +1 -1
  56. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  57. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  58. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  59. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  60. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  61. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  62. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  63. package/src/duckdb/src/storage/data_table.cpp +6 -3
  64. package/src/duckdb/src/storage/index.cpp +18 -6
  65. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  66. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  67. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  68. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  69. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  70. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  71. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -338,27 +338,29 @@ void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
338
338
 
339
339
  void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader) {
340
340
 
341
- // Deserialize the index meta data
341
+ // deserialize the index metadata
342
342
  auto info = IndexCatalogEntry::Deserialize(reader, context);
343
343
 
344
- // Create index in the catalog
344
+ // create the index in the catalog
345
345
  auto &schema_catalog = catalog.GetSchema(context, info->schema);
346
346
  auto &table_catalog = catalog.GetEntry(context, CatalogType::TABLE_ENTRY, info->schema, info->table->table_name)
347
347
  .Cast<DuckTableEntry>();
348
348
  auto &index_catalog = schema_catalog.CreateIndex(context, *info, table_catalog)->Cast<DuckIndexEntry>();
349
349
  index_catalog.info = table_catalog.GetStorage().info;
350
- // Here we just gotta read the root node
350
+
351
+ // we deserialize the index lazily, i.e., we do not need to load any node information
352
+ // except the root block id and offset
351
353
  auto root_block_id = reader.Read<block_id_t>();
352
354
  auto root_offset = reader.Read<uint32_t>();
353
355
 
354
- // create an adaptive radix tree around the expressions
356
+ // obtain the expressions of the ART from the index metadata
355
357
  vector<unique_ptr<Expression>> unbound_expressions;
356
358
  vector<unique_ptr<ParsedExpression>> parsed_expressions;
357
-
358
359
  for (auto &p_exp : info->parsed_expressions) {
359
360
  parsed_expressions.push_back(p_exp->Copy());
360
361
  }
361
362
 
363
+ // bind the parsed expressions
362
364
  auto binder = Binder::CreateBinder(context);
363
365
  auto &table_ref = info->table->Cast<TableRef>();
364
366
  auto bound_table = binder->Bind(table_ref);
@@ -370,8 +372,7 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader
370
372
  }
371
373
 
372
374
  if (parsed_expressions.empty()) {
373
- // If no parsed_expressions are present, this means this is a PK/FK index, so we create the necessary bound
374
- // column refs
375
+ // this is a PK/FK index: we create the necessary bound column ref expressions
375
376
  unbound_expressions.reserve(info->column_ids.size());
376
377
  for (idx_t key_nr = 0; key_nr < info->column_ids.size(); key_nr++) {
377
378
  auto &col = table_catalog.GetColumn(LogicalIndex(info->column_ids[key_nr]));
@@ -380,17 +381,18 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader
380
381
  }
381
382
  }
382
383
 
384
+ // create the index and add it to the storage
383
385
  switch (info->index_type) {
384
386
  case IndexType::ART: {
385
387
  auto &storage = table_catalog.GetStorage();
386
388
  auto art = make_uniq<ART>(info->column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
387
- info->constraint_type, storage.db, true, root_block_id, root_offset);
389
+ info->constraint_type, storage.db, root_block_id, root_offset);
388
390
  index_catalog.index = art.get();
389
391
  storage.info->indexes.AddIndex(std::move(art));
390
392
  break;
391
393
  }
392
394
  default:
393
- throw InternalException("Can't read this index type");
395
+ throw InternalException("Unknown index type for ReadIndex");
394
396
  }
395
397
  }
396
398
 
@@ -1162,9 +1162,7 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1162
1162
 
1163
1163
  auto &allocator = Allocator::Get(db);
1164
1164
 
1165
- DataChunk result;
1166
- result.Initialize(allocator, index->logical_types);
1167
-
1165
+ // intermediate holds scanned chunks of the underlying data to create the index
1168
1166
  DataChunk intermediate;
1169
1167
  vector<LogicalType> intermediate_types;
1170
1168
  auto column_ids = index->column_ids;
@@ -1176,6 +1174,10 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1176
1174
  intermediate_types.emplace_back(LogicalType::ROW_TYPE);
1177
1175
  intermediate.Initialize(allocator, intermediate_types);
1178
1176
 
1177
+ // holds the result of executing the index expression on the intermediate chunks
1178
+ DataChunk result;
1179
+ result.Initialize(allocator, index->logical_types);
1180
+
1179
1181
  // initialize an index scan
1180
1182
  CreateIndexScanState state;
1181
1183
  InitializeWALCreateIndexScan(state, column_ids);
@@ -1209,6 +1211,7 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1209
1211
  }
1210
1212
  }
1211
1213
  }
1214
+
1212
1215
  info->indexes.AddIndex(std::move(index));
1213
1216
  }
1214
1217
 
@@ -10,10 +10,10 @@ namespace duckdb {
10
10
 
11
11
  Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_manager,
12
12
  const vector<column_t> &column_ids_p, const vector<unique_ptr<Expression>> &unbound_expressions,
13
- IndexConstraintType constraint_type_p, bool track_memory)
13
+ IndexConstraintType constraint_type_p)
14
14
 
15
15
  : type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
16
- db(db), buffer_manager(BufferManager::GetBufferManager(db)), memory_size(0), track_memory(track_memory) {
16
+ db(db), buffer_manager(BufferManager::GetBufferManager(db)) {
17
17
 
18
18
  for (auto &expr : unbound_expressions) {
19
19
  types.push_back(expr->return_type.InternalType());
@@ -49,19 +49,31 @@ void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
49
49
  }
50
50
 
51
51
  bool Index::MergeIndexes(Index &other_index) {
52
+
52
53
  IndexLock state;
53
54
  InitializeLock(state);
54
55
 
55
56
  switch (this->type) {
56
- case IndexType::ART: {
57
- auto &art = Cast<ART>();
58
- return art.MergeIndexes(state, other_index);
59
- }
57
+ case IndexType::ART:
58
+ return Cast<ART>().MergeIndexes(state, other_index);
60
59
  default:
61
60
  throw InternalException("Unimplemented index type for merge");
62
61
  }
63
62
  }
64
63
 
64
+ void Index::Vacuum() {
65
+
66
+ IndexLock state;
67
+ InitializeLock(state);
68
+
69
+ switch (this->type) {
70
+ case IndexType::ART:
71
+ return Cast<ART>().Vacuum(state);
72
+ default:
73
+ throw InternalException("Unimplemented index type for vacuum");
74
+ }
75
+ }
76
+
65
77
  void Index::ExecuteExpressions(DataChunk &input, DataChunk &result) {
66
78
  executor.Execute(input, result);
67
79
  }
@@ -118,6 +118,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
118
118
  row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
119
119
  types, MAX_ROW_ID, 0);
120
120
  row_groups->InitializeEmpty();
121
+
121
122
  table.info->indexes.Scan([&](Index &index) {
122
123
  D_ASSERT(index.type == IndexType::ART);
123
124
  auto &art = index.Cast<ART>();
@@ -129,7 +130,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
129
130
  unbound_expressions.push_back(expr->Copy());
130
131
  }
131
132
  indexes.AddIndex(make_uniq<ART>(art.column_ids, art.table_io_manager, std::move(unbound_expressions),
132
- art.constraint_type, art.db, true));
133
+ art.constraint_type, art.db));
133
134
  }
134
135
  return false;
135
136
  });
@@ -520,6 +521,12 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
520
521
  storage.AppendToIndexes(transaction, append_state, append_count, true);
521
522
  }
522
523
  transaction.PushAppend(table, append_state.row_start, append_count);
524
+
525
+ // possibly vacuum any excess index data
526
+ table.info->indexes.Scan([&](Index &index) {
527
+ index.Vacuum();
528
+ return false;
529
+ });
523
530
  }
524
531
 
525
532
  void LocalStorage::Commit(LocalStorage::CommitState &commit_state, DuckTransaction &transaction) {
@@ -531,7 +538,6 @@ void LocalStorage::Commit(LocalStorage::CommitState &commit_state, DuckTransacti
531
538
  auto table = entry.first;
532
539
  auto storage = entry.second.get();
533
540
  Flush(table, *storage);
534
-
535
541
  entry.second.reset();
536
542
  }
537
543
  }
@@ -229,15 +229,6 @@ void StandardBufferManager::Unpin(shared_ptr<BlockHandle> &handle) {
229
229
  }
230
230
  }
231
231
 
232
- // POTENTIALLY PROBLEMATIC
233
- void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
234
- ReserveMemory(size);
235
- }
236
-
237
- void StandardBufferManager::DecreaseUsedMemory(idx_t size) {
238
- FreeReservedMemory(size);
239
- }
240
-
241
232
  void StandardBufferManager::SetLimit(idx_t limit) {
242
233
  buffer_pool.SetLimit(limit, InMemoryWarning());
243
234
  }
@@ -417,7 +417,7 @@ void ReplayState::ReplayCreateIndex() {
417
417
  switch (info->index_type) {
418
418
  case IndexType::ART: {
419
419
  index = make_uniq<ART>(info->column_ids, TableIOManager::Get(data_table), expressions, info->constraint_type,
420
- data_table.db, true);
420
+ data_table.db);
421
421
  break;
422
422
  }
423
423
  default:
@@ -52,15 +52,21 @@ void CleanupState::CleanupDelete(DeleteInfo &info) {
52
52
  auto version_table = info.table;
53
53
  D_ASSERT(version_table->info->cardinality >= info.count);
54
54
  version_table->info->cardinality -= info.count;
55
+
55
56
  if (version_table->info->indexes.Empty()) {
56
57
  // this table has no indexes: no cleanup to be done
57
58
  return;
58
59
  }
60
+
59
61
  if (current_table != version_table) {
60
62
  // table for this entry differs from previous table: flush and switch to the new table
61
63
  Flush();
62
64
  current_table = version_table;
63
65
  }
66
+
67
+ // possibly vacuum any indexes in this table later
68
+ indexed_tables[current_table->info->table] = current_table;
69
+
64
70
  count = 0;
65
71
  for (idx_t i = 0; i < info.count; i++) {
66
72
  row_numbers[count++] = info.vinfo->start + info.rows[i];
@@ -126,6 +126,14 @@ void UndoBuffer::Cleanup() {
126
126
  CleanupState state;
127
127
  UndoBuffer::IteratorState iterator_state;
128
128
  IterateEntries(iterator_state, [&](UndoFlags type, data_ptr_t data) { state.CleanupEntry(type, data); });
129
+
130
+ // possibly vacuum indexes
131
+ for (const auto &table : state.indexed_tables) {
132
+ table.second->info->indexes.Scan([&](Index &index) {
133
+ index.Vacuum();
134
+ return false;
135
+ });
136
+ }
129
137
  }
130
138
 
131
139
  void UndoBuffer::Commit(UndoBuffer::IteratorState &iterator_state, optional_ptr<WriteAheadLog> log,
@@ -352,13 +352,13 @@
352
352
 
353
353
  #include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
354
354
 
355
+ #include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"
356
+
355
357
  #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
356
358
 
357
- #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
359
+ #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
358
360
 
359
361
  #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
360
362
 
361
- #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
362
-
363
- #include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"
363
+ #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
364
364
 
@@ -1,10 +1,14 @@
1
1
  #include "src/execution/index/art/art_key.cpp"
2
2
 
3
+ #include "src/execution/index/art/node.cpp"
4
+
5
+ #include "src/execution/index/art/fixed_size_allocator.cpp"
6
+
3
7
  #include "src/execution/index/art/iterator.cpp"
4
8
 
5
9
  #include "src/execution/index/art/leaf.cpp"
6
10
 
7
- #include "src/execution/index/art/node.cpp"
11
+ #include "src/execution/index/art/leaf_segment.cpp"
8
12
 
9
13
  #include "src/execution/index/art/node4.cpp"
10
14
 
@@ -18,5 +22,7 @@
18
22
 
19
23
  #include "src/execution/index/art/prefix.cpp"
20
24
 
25
+ #include "src/execution/index/art/prefix_segment.cpp"
26
+
21
27
  #include "src/execution/index/art/art.cpp"
22
28