duckdb 0.8.2-dev4025.0 → 0.8.2-dev4142.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/src/catalog/catalog.cpp +20 -0
  8. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  9. package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -0
  10. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  11. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  12. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  13. package/src/duckdb/src/core_functions/function_list.cpp +7 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  15. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  16. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  17. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  18. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  19. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  20. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  21. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  22. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  23. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  24. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  25. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  26. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  27. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  28. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  29. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  30. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  31. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  32. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
  33. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  35. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  37. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  38. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  40. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  41. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  42. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  43. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  44. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  45. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  46. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  47. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  48. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  53. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  54. package/src/duckdb/src/main/extension/extension_helper.cpp +17 -0
  55. package/src/duckdb/src/main/extension/extension_install.cpp +5 -3
  56. package/src/duckdb/src/main/extension/extension_load.cpp +3 -3
  57. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  58. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  59. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  60. package/src/duckdb/src/storage/data_table.cpp +3 -3
  61. package/src/duckdb/src/storage/index.cpp +7 -1
  62. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  63. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
  64. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  65. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  66. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  67. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  68. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  69. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  70. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  71. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -262,7 +262,8 @@ void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
262
262
  void CheckpointReader::ReadSchema(ClientContext &context, MetadataReader &reader) {
263
263
  // read the schema and create it in the catalog
264
264
  auto info = CatalogEntry::Deserialize(reader);
265
- // we set create conflict to ignore to ignore the failure of recreating the main schema
265
+
266
+ // we set create conflict to IGNORE_ON_CONFLICT, so that we can ignore a failure when recreating the main schema
266
267
  info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
267
268
  catalog.CreateSchema(context, info->Cast<CreateSchemaInfo>());
268
269
 
@@ -336,14 +337,12 @@ void CheckpointReader::ReadSequence(ClientContext &context, MetadataReader &read
336
337
  // Indexes
337
338
  //===--------------------------------------------------------------------===//
338
339
  void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
339
- // The index data should already have been written as part of WriteTableData.
340
- // Here, we need only serialize the pointer to that data.
341
- auto root_offset = index_catalog.index->GetSerializedDataPointer();
340
+ // we write the index data in WriteTableData
341
+ // here, we only write the root pointer
342
+ const auto root_block_pointer = index_catalog.index->GetRootBlockPointer();
342
343
  auto &metadata_writer = GetMetadataWriter();
343
344
  index_catalog.Serialize(metadata_writer);
344
- // Serialize the Block id and offset of root node
345
- metadata_writer.Write(root_offset.block_id);
346
- metadata_writer.Write(root_offset.offset);
345
+ metadata_writer.Write(root_block_pointer);
347
346
  }
348
347
 
349
348
  void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader) {
@@ -358,10 +357,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
358
357
  auto &index_catalog = schema_catalog.CreateIndex(context, index_info, table_catalog)->Cast<DuckIndexEntry>();
359
358
  index_catalog.info = table_catalog.GetStorage().info;
360
359
 
361
- // we deserialize the index lazily, i.e., we do not need to load any node information
362
- // except the root block id and offset
363
- auto root_block_id = reader.Read<block_id_t>();
364
- auto root_offset = reader.Read<uint32_t>();
360
+ // we deserialize the index lazily, i.e., we only load the root block pointer
361
+ const auto index_block_pointer = reader.Read<BlockPointer>();
365
362
 
366
363
  // obtain the expressions of the ART from the index metadata
367
364
  vector<unique_ptr<Expression>> unbound_expressions;
@@ -401,9 +398,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
401
398
  switch (index_info.index_type) {
402
399
  case IndexType::ART: {
403
400
  auto &storage = table_catalog.GetStorage();
404
- auto art =
405
- make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
406
- index_info.constraint_type, storage.db, nullptr, BlockPointer(root_block_id, root_offset));
401
+ auto art = make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
402
+ index_info.constraint_type, storage.db, nullptr, index_block_pointer);
407
403
  index_catalog.index = art.get();
408
404
  storage.info->indexes.AddIndex(std::move(art));
409
405
  break;
@@ -450,7 +446,7 @@ void CheckpointReader::ReadTableMacro(ClientContext &context, MetadataReader &re
450
446
  // Table Metadata
451
447
  //===--------------------------------------------------------------------===//
452
448
  void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
453
- // write the table meta data
449
+ // write the table metadata
454
450
  table.Serialize(GetMetadataWriter());
455
451
  // now we need to write the table data.
456
452
  if (auto writer = GetTableDataWriter(table)) {
@@ -483,12 +479,11 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetadataReader &rea
483
479
  data_reader.ReadTableData();
484
480
  bound_info.data->total_rows = reader.Read<idx_t>();
485
481
 
486
- // Get any indexes block info
487
- idx_t num_indexes = reader.Read<idx_t>();
488
- for (idx_t i = 0; i < num_indexes; i++) {
489
- auto idx_block_id = reader.Read<block_id_t>();
490
- auto idx_offset = reader.Read<uint32_t>();
491
- bound_info.indexes.emplace_back(idx_block_id, idx_offset);
482
+ // get the root block pointers of each index
483
+ idx_t index_count = reader.Read<idx_t>();
484
+ for (idx_t i = 0; i < index_count; i++) {
485
+ const auto index_pointer = reader.Read<BlockPointer>();
486
+ bound_info.indexes.emplace_back(index_pointer);
492
487
  }
493
488
  }
494
489
 
@@ -429,13 +429,13 @@ void DataTable::VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk,
429
429
  data_table.info->indexes.VerifyForeignKey(*dst_keys_ptr, dst_chunk, regular_conflicts);
430
430
  regular_conflicts.Finalize();
431
431
  auto &regular_matches = regular_conflicts.Conflicts();
432
- // check whether or not the chunk can be inserted or deleted into the referenced table' transaction local storage
433
- auto &local_storage = LocalStorage::Get(context, db);
434
432
 
433
+ // check if we can insert the chunk into the reference table's local storage
434
+ auto &local_storage = LocalStorage::Get(context, db);
435
435
  bool error = IsForeignKeyConstraintError(is_append, count, regular_matches);
436
436
  bool transaction_error = false;
437
-
438
437
  bool transaction_check = local_storage.Find(data_table);
438
+
439
439
  if (transaction_check) {
440
440
  auto &transact_index = local_storage.GetIndexes(data_table);
441
441
  transact_index.VerifyForeignKey(*dst_keys_ptr, dst_chunk, transaction_conflicts);
@@ -12,7 +12,7 @@ Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_mana
12
12
  IndexConstraintType constraint_type_p)
13
13
 
14
14
  : type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
15
- db(db), buffer_manager(BufferManager::GetBufferManager(db)) {
15
+ db(db) {
16
16
 
17
17
  for (auto &expr : unbound_expressions) {
18
18
  types.push_back(expr->return_type.InternalType());
@@ -39,6 +39,12 @@ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
39
39
  return Append(state, entries, row_identifiers);
40
40
  }
41
41
 
42
+ void Index::CommitDrop() {
43
+ IndexLock index_lock;
44
+ InitializeLock(index_lock);
45
+ CommitDrop(index_lock);
46
+ }
47
+
42
48
  void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
43
49
  IndexLock state;
44
50
  InitializeLock(state);
@@ -13,7 +13,7 @@ MetadataManager::~MetadataManager() {
13
13
 
14
14
  MetadataHandle MetadataManager::AllocateHandle() {
15
15
  // check if there is any free space left in an existing block
16
- // if not allocate a new bloc
16
+ // if not allocate a new block
17
17
  block_id_t free_block = INVALID_BLOCK;
18
18
  for (auto &kv : blocks) {
19
19
  auto &block = kv.second;
@@ -230,27 +230,27 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
230
230
  }
231
231
 
232
232
  void MetadataManager::MarkBlocksAsModified() {
233
- if (!modified_blocks.empty()) {
234
- // for any blocks that were modified in the last checkpoint - set them to free blocks currently
235
- for (auto &kv : modified_blocks) {
236
- auto block_id = kv.first;
237
- idx_t modified_list = kv.second;
238
- auto entry = blocks.find(block_id);
239
- D_ASSERT(entry != blocks.end());
240
- auto &block = entry->second;
241
- idx_t current_free_blocks = block.FreeBlocksToInteger();
242
- // merge the current set of free blocks with the modified blocks
243
- idx_t new_free_blocks = current_free_blocks | modified_list;
244
- // if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
245
- // // if new free_blocks is all blocks - mark entire block as modified
246
- // blocks.erase(entry);
247
- // block_manager.MarkBlockAsModified(block_id);
248
- // } else {
249
- // set the new set of free blocks
250
- block.FreeBlocksFromInteger(new_free_blocks);
251
- // }
252
- }
233
+
234
+ // for any blocks that were modified in the last checkpoint - set them to free blocks currently
235
+ for (auto &kv : modified_blocks) {
236
+ auto block_id = kv.first;
237
+ idx_t modified_list = kv.second;
238
+ auto entry = blocks.find(block_id);
239
+ D_ASSERT(entry != blocks.end());
240
+ auto &block = entry->second;
241
+ idx_t current_free_blocks = block.FreeBlocksToInteger();
242
+ // merge the current set of free blocks with the modified blocks
243
+ idx_t new_free_blocks = current_free_blocks | modified_list;
244
+ // if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
245
+ // // if new free_blocks is all blocks - mark entire block as modified
246
+ // blocks.erase(entry);
247
+ // block_manager.MarkBlockAsModified(block_id);
248
+ // } else {
249
+ // set the new set of free blocks
250
+ block.FreeBlocksFromInteger(new_free_blocks);
251
+ // }
253
252
  }
253
+
254
254
  modified_blocks.clear();
255
255
  for (auto &kv : blocks) {
256
256
  auto &block = kv.second;
@@ -77,14 +77,6 @@ idx_t StandardBufferManager::GetMaxMemory() const {
77
77
  return buffer_pool.GetMaxMemory();
78
78
  }
79
79
 
80
- // POTENTIALLY PROBLEMATIC
81
- // void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
82
- // if (!unsafe && buffer_pool.GetUsedMemory() + size > buffer_pool.GetMaxMemory()) {
83
- // throw OutOfMemoryException("Failed to allocate data of size %lld%s", size, InMemoryWarning());
84
- // }
85
- // buffer_pool.IncreaseUsedMemory(size);
86
- //}
87
-
88
80
  template <typename... ARGS>
89
81
  TempBufferPoolReservation StandardBufferManager::EvictBlocksOrThrow(idx_t memory_delta, unique_ptr<FileBuffer> *buffer,
90
82
  ARGS... args) {
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 57;
5
+ const uint64_t VERSION_NUMBER = 58;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -54,7 +54,7 @@ void TableIndexList::VerifyForeignKey(const vector<PhysicalIndex> &fk_keys, Data
54
54
  ? ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE
55
55
  : ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE;
56
56
 
57
- // check whether or not the chunk can be inserted or deleted into the referenced table' storage
57
+ // check whether the chunk can be inserted or deleted into the referenced table storage
58
58
  auto index = FindForeignKeyIndex(fk_keys, fk_type);
59
59
  if (!index) {
60
60
  throw InternalException("Internal Foreign Key error: could not find index to verify...");
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/transaction/commit_state.hpp"
2
2
 
3
+ #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
3
4
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
4
5
  #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
5
6
  #include "duckdb/catalog/catalog_set.hpp"
@@ -124,9 +125,12 @@ void CommitState::WriteCatalogEntry(CatalogEntry &entry, data_ptr_t dataptr) {
124
125
  case CatalogType::TYPE_ENTRY:
125
126
  log->WriteDropType(entry.Cast<TypeCatalogEntry>());
126
127
  break;
127
- case CatalogType::INDEX_ENTRY:
128
+ case CatalogType::INDEX_ENTRY: {
129
+ auto &index_entry = entry.Cast<DuckIndexEntry>();
130
+ index_entry.CommitDrop();
128
131
  log->WriteDropIndex(entry.Cast<IndexCatalogEntry>());
129
132
  break;
133
+ }
130
134
  case CatalogType::PREPARED_STATEMENT:
131
135
  case CatalogType::SCALAR_FUNCTION_ENTRY:
132
136
  // do nothing, indexes/prepared statements/functions aren't persisted to disk
@@ -10,5 +10,11 @@
10
10
 
11
11
  #include "src/core_functions/scalar/list/list_sort.cpp"
12
12
 
13
+ #include "src/core_functions/scalar/list/list_distance.cpp"
14
+
15
+ #include "src/core_functions/scalar/list/list_cosine_similarity.cpp"
16
+
17
+ #include "src/core_functions/scalar/list/list_inner_product.cpp"
18
+
13
19
  #include "src/core_functions/scalar/list/range.cpp"
14
20
 
@@ -0,0 +1,4 @@
1
+ #include "src/execution/index/fixed_size_allocator.cpp"
2
+
3
+ #include "src/execution/index/fixed_size_buffer.cpp"
4
+
@@ -2,8 +2,6 @@
2
2
 
3
3
  #include "src/execution/index/art/node.cpp"
4
4
 
5
- #include "src/execution/index/art/fixed_size_allocator.cpp"
6
-
7
5
  #include "src/execution/index/art/iterator.cpp"
8
6
 
9
7
  #include "src/execution/index/art/leaf.cpp"
@@ -1,238 +0,0 @@
1
- #include "duckdb/execution/index/art/fixed_size_allocator.hpp"
2
-
3
- namespace duckdb {
4
-
5
- constexpr idx_t FixedSizeAllocator::BASE[];
6
- constexpr uint8_t FixedSizeAllocator::SHIFT[];
7
-
8
- FixedSizeAllocator::FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator)
9
- : allocation_size(allocation_size), total_allocations(0), allocator(allocator) {
10
-
11
- // calculate how many allocations fit into one buffer
12
-
13
- idx_t bits_per_value = sizeof(validity_t) * 8;
14
- idx_t curr_alloc_size = 0;
15
-
16
- bitmask_count = 0;
17
- allocations_per_buffer = 0;
18
-
19
- while (curr_alloc_size < BUFFER_ALLOC_SIZE) {
20
- if (!bitmask_count || (bitmask_count * bits_per_value) % allocations_per_buffer == 0) {
21
- bitmask_count++;
22
- curr_alloc_size += sizeof(validity_t);
23
- }
24
-
25
- auto remaining_alloc_size = BUFFER_ALLOC_SIZE - curr_alloc_size;
26
- auto remaining_allocations = MinValue(remaining_alloc_size / allocation_size, bits_per_value);
27
-
28
- if (remaining_allocations == 0) {
29
- break;
30
- }
31
-
32
- allocations_per_buffer += remaining_allocations;
33
- curr_alloc_size += remaining_allocations * allocation_size;
34
- }
35
-
36
- allocation_offset = bitmask_count * sizeof(validity_t);
37
- }
38
-
39
- FixedSizeAllocator::~FixedSizeAllocator() {
40
- for (auto &buffer : buffers) {
41
- allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
42
- }
43
- }
44
-
45
- Node FixedSizeAllocator::New() {
46
-
47
- // no more free pointers
48
- if (buffers_with_free_space.empty()) {
49
-
50
- // add a new buffer
51
- idx_t buffer_id = buffers.size();
52
- D_ASSERT(buffer_id <= (uint32_t)DConstants::INVALID_INDEX);
53
- auto buffer = allocator.AllocateData(BUFFER_ALLOC_SIZE);
54
- buffers.emplace_back(buffer, 0);
55
- buffers_with_free_space.insert(buffer_id);
56
-
57
- // set the bitmask
58
- ValidityMask mask(reinterpret_cast<validity_t *>(buffer));
59
- mask.SetAllValid(allocations_per_buffer);
60
- }
61
-
62
- // return a pointer
63
- D_ASSERT(!buffers_with_free_space.empty());
64
- auto buffer_id = (uint32_t)*buffers_with_free_space.begin();
65
-
66
- auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[buffer_id].ptr);
67
- ValidityMask mask(bitmask_ptr);
68
- auto offset = GetOffset(mask, buffers[buffer_id].allocation_count);
69
-
70
- buffers[buffer_id].allocation_count++;
71
- total_allocations++;
72
- if (buffers[buffer_id].allocation_count == allocations_per_buffer) {
73
- buffers_with_free_space.erase(buffer_id);
74
- }
75
-
76
- return Node(buffer_id, offset);
77
- }
78
-
79
- void FixedSizeAllocator::Free(const Node ptr) {
80
- auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[ptr.GetBufferId()].ptr);
81
- ValidityMask mask(bitmask_ptr);
82
- D_ASSERT(!mask.RowIsValid(ptr.GetOffset()));
83
- mask.SetValid(ptr.GetOffset());
84
- buffers_with_free_space.insert(ptr.GetBufferId());
85
-
86
- D_ASSERT(total_allocations > 0);
87
- D_ASSERT(buffers[ptr.GetBufferId()].allocation_count > 0);
88
- buffers[ptr.GetBufferId()].allocation_count--;
89
- total_allocations--;
90
- }
91
-
92
- void FixedSizeAllocator::Reset() {
93
-
94
- for (auto &buffer : buffers) {
95
- allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
96
- }
97
- buffers.clear();
98
- buffers_with_free_space.clear();
99
- total_allocations = 0;
100
- }
101
-
102
- void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
103
-
104
- D_ASSERT(allocation_size == other.allocation_size);
105
-
106
- // remember the buffer count and merge the buffers
107
- idx_t buffer_count = buffers.size();
108
- for (auto &buffer : other.buffers) {
109
- buffers.push_back(buffer);
110
- }
111
- other.buffers.clear();
112
-
113
- // merge the buffers with free spaces
114
- for (auto &buffer_id : other.buffers_with_free_space) {
115
- buffers_with_free_space.insert(buffer_id + buffer_count);
116
- }
117
- other.buffers_with_free_space.clear();
118
-
119
- // add the total allocations
120
- total_allocations += other.total_allocations;
121
- }
122
-
123
- bool FixedSizeAllocator::InitializeVacuum() {
124
-
125
- if (total_allocations == 0) {
126
- Reset();
127
- return false;
128
- }
129
-
130
- auto total_available_allocations = allocations_per_buffer * buffers.size();
131
- D_ASSERT(total_available_allocations >= total_allocations);
132
- auto total_free_positions = total_available_allocations - total_allocations;
133
-
134
- // vacuum_count buffers can be freed
135
- auto vacuum_count = total_free_positions / allocations_per_buffer;
136
-
137
- // calculate the vacuum threshold adaptively
138
- D_ASSERT(vacuum_count < buffers.size());
139
- idx_t memory_usage = GetMemoryUsage();
140
- idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
141
- auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
142
- auto threshold = (double)VACUUM_THRESHOLD / 100.0;
143
- if (excess_percentage < threshold) {
144
- return false;
145
- }
146
-
147
- min_vacuum_buffer_id = buffers.size() - vacuum_count;
148
-
149
- // remove all invalid buffers from the available buffer list to ensure that we do not reuse them
150
- auto it = buffers_with_free_space.begin();
151
- while (it != buffers_with_free_space.end()) {
152
- if (*it >= min_vacuum_buffer_id) {
153
- it = buffers_with_free_space.erase(it);
154
- } else {
155
- it++;
156
- }
157
- }
158
-
159
- return true;
160
- }
161
-
162
- void FixedSizeAllocator::FinalizeVacuum() {
163
-
164
- // free all (now unused) buffers
165
- while (min_vacuum_buffer_id < buffers.size()) {
166
- allocator.FreeData(buffers.back().ptr, BUFFER_ALLOC_SIZE);
167
- buffers.pop_back();
168
- }
169
- }
170
-
171
- Node FixedSizeAllocator::VacuumPointer(const Node ptr) {
172
-
173
- // we do not need to adjust the bitmask of the old buffer, because we will free the entire
174
- // buffer after the vacuum operation
175
-
176
- auto new_ptr = New();
177
-
178
- // new increases the allocation count
179
- total_allocations--;
180
-
181
- memcpy(Get(new_ptr), Get(ptr), allocation_size);
182
- return new_ptr;
183
- }
184
-
185
- void FixedSizeAllocator::Verify() const {
186
- #ifdef DEBUG
187
- auto total_available_allocations = allocations_per_buffer * buffers.size();
188
- D_ASSERT(total_available_allocations >= total_allocations);
189
- D_ASSERT(buffers.size() >= buffers_with_free_space.size());
190
- #endif
191
- }
192
-
193
- uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
194
-
195
- auto data = mask.GetData();
196
-
197
- // fills up a buffer sequentially before searching for free bits
198
- if (mask.RowIsValid(allocation_count)) {
199
- mask.SetInvalid(allocation_count);
200
- return allocation_count;
201
- }
202
-
203
- // get an entry with free bits
204
- for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
205
- if (data[entry_idx] != 0) {
206
-
207
- // find the position of the free bit
208
- auto entry = data[entry_idx];
209
- idx_t first_valid_bit = 0;
210
-
211
- // this loop finds the position of the rightmost set bit in entry and stores it
212
- // in first_valid_bit
213
- for (idx_t i = 0; i < 6; i++) {
214
- // set the left half of the bits of this level to zero and test if the entry is still not zero
215
- if (entry & BASE[i]) {
216
- // first valid bit is in the rightmost s[i] bits
217
- // permanently set the left half of the bits to zero
218
- entry &= BASE[i];
219
- } else {
220
- // first valid bit is in the leftmost s[i] bits
221
- // shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
222
- entry >>= SHIFT[i];
223
- first_valid_bit += SHIFT[i];
224
- }
225
- }
226
- D_ASSERT(entry);
227
-
228
- auto prev_bits = entry_idx * sizeof(validity_t) * 8;
229
- D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
230
- mask.SetInvalid(prev_bits + first_valid_bit);
231
- return (prev_bits + first_valid_bit);
232
- }
233
- }
234
-
235
- throw InternalException("Invalid bitmask of FixedSizeAllocator");
236
- }
237
-
238
- } // namespace duckdb
@@ -1,115 +0,0 @@
1
- //===----------------------------------------------------------------------===//
2
- // DuckDB
3
- //
4
- // duckdb/execution/index/art/fixed_size_allocator.hpp
5
- //
6
- //
7
- //===----------------------------------------------------------------------===//
8
-
9
- #pragma once
10
-
11
- #include "duckdb/common/types/validity_mask.hpp"
12
- #include "duckdb/common/unordered_set.hpp"
13
- #include "duckdb/storage/buffer_manager.hpp"
14
- #include "duckdb/execution/index/art/node.hpp"
15
-
16
- namespace duckdb {
17
-
18
- // structs
19
- struct BufferEntry {
20
- BufferEntry(const data_ptr_t &ptr, const idx_t &allocation_count) : ptr(ptr), allocation_count(allocation_count) {
21
- }
22
- data_ptr_t ptr;
23
- idx_t allocation_count;
24
- };
25
-
26
- //! The FixedSizeAllocator provides pointers to fixed-size sections of pre-allocated memory buffers.
27
- //! The pointers are Node pointers, and the leftmost byte (serialize flag and type) must always be zero.
28
- class FixedSizeAllocator {
29
- public:
30
- //! Fixed size of the buffers
31
- static constexpr idx_t BUFFER_ALLOC_SIZE = Storage::BLOCK_ALLOC_SIZE;
32
- //! We can vacuum 10% or more of the total memory usage of the allocator
33
- static constexpr uint8_t VACUUM_THRESHOLD = 10;
34
-
35
- //! Constants for fast offset calculations in the bitmask
36
- static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
37
- static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
38
-
39
- public:
40
- explicit FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator);
41
- ~FixedSizeAllocator();
42
-
43
- //! Allocation size of one element in a buffer
44
- idx_t allocation_size;
45
- //! Total number of allocations
46
- idx_t total_allocations;
47
- //! Number of validity_t values in the bitmask
48
- idx_t bitmask_count;
49
- //! First starting byte of the payload
50
- idx_t allocation_offset;
51
- //! Number of possible allocations per buffer
52
- idx_t allocations_per_buffer;
53
-
54
- //! Buffers containing the data
55
- vector<BufferEntry> buffers;
56
- //! Buffers with free space
57
- unordered_set<idx_t> buffers_with_free_space;
58
-
59
- //! Minimum buffer ID of buffers that can be vacuumed
60
- idx_t min_vacuum_buffer_id;
61
-
62
- //! Buffer manager of the database instance
63
- Allocator &allocator;
64
-
65
- public:
66
- //! Get a new Node pointer to data, might cause a new buffer allocation
67
- Node New();
68
- //! Free the data of the Node pointer
69
- void Free(const Node ptr);
70
- //! Get the data of the Node pointer
71
- template <class T>
72
- inline T *Get(const Node ptr) const {
73
- return (T *)Get(ptr);
74
- }
75
-
76
- //! Resets the allocator, e.g., becomes necessary during DELETE FROM table
77
- void Reset();
78
-
79
- //! Returns the allocated memory size in bytes
80
- inline idx_t GetMemoryUsage() const {
81
- return buffers.size() * BUFFER_ALLOC_SIZE;
82
- }
83
-
84
- //! Merge another FixedSizeAllocator into this allocator. Both must have the same allocation size
85
- void Merge(FixedSizeAllocator &other);
86
-
87
- //! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
88
- bool InitializeVacuum();
89
- //! Finalize a vacuum operation by freeing all buffers exceeding the min_vacuum_buffer_id
90
- void FinalizeVacuum();
91
- //! Returns true, if a Node pointer qualifies for a vacuum operation, and false otherwise
92
- inline bool NeedsVacuum(const Node ptr) const {
93
- if (ptr.GetBufferId() >= min_vacuum_buffer_id) {
94
- return true;
95
- }
96
- return false;
97
- }
98
- //! Vacuums a Node pointer
99
- Node VacuumPointer(const Node ptr);
100
-
101
- //! Verify that the allocation counts match the existing positions on the buffers
102
- void Verify() const;
103
-
104
- private:
105
- //! Returns the data_ptr_t of a Node pointer
106
- inline data_ptr_t Get(const Node ptr) const {
107
- D_ASSERT(ptr.GetBufferId() < buffers.size());
108
- D_ASSERT(ptr.GetOffset() < allocations_per_buffer);
109
- return buffers[ptr.GetBufferId()].ptr + ptr.GetOffset() * allocation_size + allocation_offset;
110
- }
111
- //! Returns the first free offset in a bitmask
112
- uint32_t GetOffset(ValidityMask &mask, const idx_t allocation_count);
113
- };
114
-
115
- } // namespace duckdb