duckdb 0.8.2-dev4025.0 → 0.8.2-dev4142.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/src/catalog/catalog.cpp +20 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +7 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +17 -0
- package/src/duckdb/src/main/extension/extension_install.cpp +5 -3
- package/src/duckdb/src/main/extension/extension_load.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -262,7 +262,8 @@ void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
|
|
262
262
|
void CheckpointReader::ReadSchema(ClientContext &context, MetadataReader &reader) {
|
263
263
|
// read the schema and create it in the catalog
|
264
264
|
auto info = CatalogEntry::Deserialize(reader);
|
265
|
-
|
265
|
+
|
266
|
+
// we set create conflict to IGNORE_ON_CONFLICT, so that we can ignore a failure when recreating the main schema
|
266
267
|
info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
|
267
268
|
catalog.CreateSchema(context, info->Cast<CreateSchemaInfo>());
|
268
269
|
|
@@ -336,14 +337,12 @@ void CheckpointReader::ReadSequence(ClientContext &context, MetadataReader &read
|
|
336
337
|
// Indexes
|
337
338
|
//===--------------------------------------------------------------------===//
|
338
339
|
void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
|
339
|
-
//
|
340
|
-
//
|
341
|
-
auto
|
340
|
+
// we write the index data in WriteTableData
|
341
|
+
// here, we only write the root pointer
|
342
|
+
const auto root_block_pointer = index_catalog.index->GetRootBlockPointer();
|
342
343
|
auto &metadata_writer = GetMetadataWriter();
|
343
344
|
index_catalog.Serialize(metadata_writer);
|
344
|
-
|
345
|
-
metadata_writer.Write(root_offset.block_id);
|
346
|
-
metadata_writer.Write(root_offset.offset);
|
345
|
+
metadata_writer.Write(root_block_pointer);
|
347
346
|
}
|
348
347
|
|
349
348
|
void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader) {
|
@@ -358,10 +357,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
|
|
358
357
|
auto &index_catalog = schema_catalog.CreateIndex(context, index_info, table_catalog)->Cast<DuckIndexEntry>();
|
359
358
|
index_catalog.info = table_catalog.GetStorage().info;
|
360
359
|
|
361
|
-
// we deserialize the index lazily, i.e., we
|
362
|
-
|
363
|
-
auto root_block_id = reader.Read<block_id_t>();
|
364
|
-
auto root_offset = reader.Read<uint32_t>();
|
360
|
+
// we deserialize the index lazily, i.e., we only load the root block pointer
|
361
|
+
const auto index_block_pointer = reader.Read<BlockPointer>();
|
365
362
|
|
366
363
|
// obtain the expressions of the ART from the index metadata
|
367
364
|
vector<unique_ptr<Expression>> unbound_expressions;
|
@@ -401,9 +398,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
|
|
401
398
|
switch (index_info.index_type) {
|
402
399
|
case IndexType::ART: {
|
403
400
|
auto &storage = table_catalog.GetStorage();
|
404
|
-
auto art =
|
405
|
-
|
406
|
-
index_info.constraint_type, storage.db, nullptr, BlockPointer(root_block_id, root_offset));
|
401
|
+
auto art = make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
|
402
|
+
index_info.constraint_type, storage.db, nullptr, index_block_pointer);
|
407
403
|
index_catalog.index = art.get();
|
408
404
|
storage.info->indexes.AddIndex(std::move(art));
|
409
405
|
break;
|
@@ -450,7 +446,7 @@ void CheckpointReader::ReadTableMacro(ClientContext &context, MetadataReader &re
|
|
450
446
|
// Table Metadata
|
451
447
|
//===--------------------------------------------------------------------===//
|
452
448
|
void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
|
453
|
-
// write the table
|
449
|
+
// write the table metadata
|
454
450
|
table.Serialize(GetMetadataWriter());
|
455
451
|
// now we need to write the table data.
|
456
452
|
if (auto writer = GetTableDataWriter(table)) {
|
@@ -483,12 +479,11 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetadataReader &rea
|
|
483
479
|
data_reader.ReadTableData();
|
484
480
|
bound_info.data->total_rows = reader.Read<idx_t>();
|
485
481
|
|
486
|
-
//
|
487
|
-
idx_t
|
488
|
-
for (idx_t i = 0; i <
|
489
|
-
auto
|
490
|
-
|
491
|
-
bound_info.indexes.emplace_back(idx_block_id, idx_offset);
|
482
|
+
// get the root block pointers of each index
|
483
|
+
idx_t index_count = reader.Read<idx_t>();
|
484
|
+
for (idx_t i = 0; i < index_count; i++) {
|
485
|
+
const auto index_pointer = reader.Read<BlockPointer>();
|
486
|
+
bound_info.indexes.emplace_back(index_pointer);
|
492
487
|
}
|
493
488
|
}
|
494
489
|
|
@@ -429,13 +429,13 @@ void DataTable::VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk,
|
|
429
429
|
data_table.info->indexes.VerifyForeignKey(*dst_keys_ptr, dst_chunk, regular_conflicts);
|
430
430
|
regular_conflicts.Finalize();
|
431
431
|
auto ®ular_matches = regular_conflicts.Conflicts();
|
432
|
-
// check whether or not the chunk can be inserted or deleted into the referenced table' transaction local storage
|
433
|
-
auto &local_storage = LocalStorage::Get(context, db);
|
434
432
|
|
433
|
+
// check if we can insert the chunk into the reference table's local storage
|
434
|
+
auto &local_storage = LocalStorage::Get(context, db);
|
435
435
|
bool error = IsForeignKeyConstraintError(is_append, count, regular_matches);
|
436
436
|
bool transaction_error = false;
|
437
|
-
|
438
437
|
bool transaction_check = local_storage.Find(data_table);
|
438
|
+
|
439
439
|
if (transaction_check) {
|
440
440
|
auto &transact_index = local_storage.GetIndexes(data_table);
|
441
441
|
transact_index.VerifyForeignKey(*dst_keys_ptr, dst_chunk, transaction_conflicts);
|
@@ -12,7 +12,7 @@ Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_mana
|
|
12
12
|
IndexConstraintType constraint_type_p)
|
13
13
|
|
14
14
|
: type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
|
15
|
-
db(db)
|
15
|
+
db(db) {
|
16
16
|
|
17
17
|
for (auto &expr : unbound_expressions) {
|
18
18
|
types.push_back(expr->return_type.InternalType());
|
@@ -39,6 +39,12 @@ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
|
|
39
39
|
return Append(state, entries, row_identifiers);
|
40
40
|
}
|
41
41
|
|
42
|
+
void Index::CommitDrop() {
|
43
|
+
IndexLock index_lock;
|
44
|
+
InitializeLock(index_lock);
|
45
|
+
CommitDrop(index_lock);
|
46
|
+
}
|
47
|
+
|
42
48
|
void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
|
43
49
|
IndexLock state;
|
44
50
|
InitializeLock(state);
|
@@ -13,7 +13,7 @@ MetadataManager::~MetadataManager() {
|
|
13
13
|
|
14
14
|
MetadataHandle MetadataManager::AllocateHandle() {
|
15
15
|
// check if there is any free space left in an existing block
|
16
|
-
// if not allocate a new
|
16
|
+
// if not allocate a new block
|
17
17
|
block_id_t free_block = INVALID_BLOCK;
|
18
18
|
for (auto &kv : blocks) {
|
19
19
|
auto &block = kv.second;
|
@@ -230,27 +230,27 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
|
|
230
230
|
}
|
231
231
|
|
232
232
|
void MetadataManager::MarkBlocksAsModified() {
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
}
|
233
|
+
|
234
|
+
// for any blocks that were modified in the last checkpoint - set them to free blocks currently
|
235
|
+
for (auto &kv : modified_blocks) {
|
236
|
+
auto block_id = kv.first;
|
237
|
+
idx_t modified_list = kv.second;
|
238
|
+
auto entry = blocks.find(block_id);
|
239
|
+
D_ASSERT(entry != blocks.end());
|
240
|
+
auto &block = entry->second;
|
241
|
+
idx_t current_free_blocks = block.FreeBlocksToInteger();
|
242
|
+
// merge the current set of free blocks with the modified blocks
|
243
|
+
idx_t new_free_blocks = current_free_blocks | modified_list;
|
244
|
+
// if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
|
245
|
+
// // if new free_blocks is all blocks - mark entire block as modified
|
246
|
+
// blocks.erase(entry);
|
247
|
+
// block_manager.MarkBlockAsModified(block_id);
|
248
|
+
// } else {
|
249
|
+
// set the new set of free blocks
|
250
|
+
block.FreeBlocksFromInteger(new_free_blocks);
|
251
|
+
// }
|
253
252
|
}
|
253
|
+
|
254
254
|
modified_blocks.clear();
|
255
255
|
for (auto &kv : blocks) {
|
256
256
|
auto &block = kv.second;
|
@@ -77,14 +77,6 @@ idx_t StandardBufferManager::GetMaxMemory() const {
|
|
77
77
|
return buffer_pool.GetMaxMemory();
|
78
78
|
}
|
79
79
|
|
80
|
-
// POTENTIALLY PROBLEMATIC
|
81
|
-
// void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
|
82
|
-
// if (!unsafe && buffer_pool.GetUsedMemory() + size > buffer_pool.GetMaxMemory()) {
|
83
|
-
// throw OutOfMemoryException("Failed to allocate data of size %lld%s", size, InMemoryWarning());
|
84
|
-
// }
|
85
|
-
// buffer_pool.IncreaseUsedMemory(size);
|
86
|
-
//}
|
87
|
-
|
88
80
|
template <typename... ARGS>
|
89
81
|
TempBufferPoolReservation StandardBufferManager::EvictBlocksOrThrow(idx_t memory_delta, unique_ptr<FileBuffer> *buffer,
|
90
82
|
ARGS... args) {
|
@@ -54,7 +54,7 @@ void TableIndexList::VerifyForeignKey(const vector<PhysicalIndex> &fk_keys, Data
|
|
54
54
|
? ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE
|
55
55
|
: ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE;
|
56
56
|
|
57
|
-
// check whether
|
57
|
+
// check whether the chunk can be inserted or deleted into the referenced table storage
|
58
58
|
auto index = FindForeignKeyIndex(fk_keys, fk_type);
|
59
59
|
if (!index) {
|
60
60
|
throw InternalException("Internal Foreign Key error: could not find index to verify...");
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "duckdb/transaction/commit_state.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
|
3
4
|
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
|
4
5
|
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
|
5
6
|
#include "duckdb/catalog/catalog_set.hpp"
|
@@ -124,9 +125,12 @@ void CommitState::WriteCatalogEntry(CatalogEntry &entry, data_ptr_t dataptr) {
|
|
124
125
|
case CatalogType::TYPE_ENTRY:
|
125
126
|
log->WriteDropType(entry.Cast<TypeCatalogEntry>());
|
126
127
|
break;
|
127
|
-
case CatalogType::INDEX_ENTRY:
|
128
|
+
case CatalogType::INDEX_ENTRY: {
|
129
|
+
auto &index_entry = entry.Cast<DuckIndexEntry>();
|
130
|
+
index_entry.CommitDrop();
|
128
131
|
log->WriteDropIndex(entry.Cast<IndexCatalogEntry>());
|
129
132
|
break;
|
133
|
+
}
|
130
134
|
case CatalogType::PREPARED_STATEMENT:
|
131
135
|
case CatalogType::SCALAR_FUNCTION_ENTRY:
|
132
136
|
// do nothing, indexes/prepared statements/functions aren't persisted to disk
|
@@ -10,5 +10,11 @@
|
|
10
10
|
|
11
11
|
#include "src/core_functions/scalar/list/list_sort.cpp"
|
12
12
|
|
13
|
+
#include "src/core_functions/scalar/list/list_distance.cpp"
|
14
|
+
|
15
|
+
#include "src/core_functions/scalar/list/list_cosine_similarity.cpp"
|
16
|
+
|
17
|
+
#include "src/core_functions/scalar/list/list_inner_product.cpp"
|
18
|
+
|
13
19
|
#include "src/core_functions/scalar/list/range.cpp"
|
14
20
|
|
@@ -1,238 +0,0 @@
|
|
1
|
-
#include "duckdb/execution/index/art/fixed_size_allocator.hpp"
|
2
|
-
|
3
|
-
namespace duckdb {
|
4
|
-
|
5
|
-
constexpr idx_t FixedSizeAllocator::BASE[];
|
6
|
-
constexpr uint8_t FixedSizeAllocator::SHIFT[];
|
7
|
-
|
8
|
-
FixedSizeAllocator::FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator)
|
9
|
-
: allocation_size(allocation_size), total_allocations(0), allocator(allocator) {
|
10
|
-
|
11
|
-
// calculate how many allocations fit into one buffer
|
12
|
-
|
13
|
-
idx_t bits_per_value = sizeof(validity_t) * 8;
|
14
|
-
idx_t curr_alloc_size = 0;
|
15
|
-
|
16
|
-
bitmask_count = 0;
|
17
|
-
allocations_per_buffer = 0;
|
18
|
-
|
19
|
-
while (curr_alloc_size < BUFFER_ALLOC_SIZE) {
|
20
|
-
if (!bitmask_count || (bitmask_count * bits_per_value) % allocations_per_buffer == 0) {
|
21
|
-
bitmask_count++;
|
22
|
-
curr_alloc_size += sizeof(validity_t);
|
23
|
-
}
|
24
|
-
|
25
|
-
auto remaining_alloc_size = BUFFER_ALLOC_SIZE - curr_alloc_size;
|
26
|
-
auto remaining_allocations = MinValue(remaining_alloc_size / allocation_size, bits_per_value);
|
27
|
-
|
28
|
-
if (remaining_allocations == 0) {
|
29
|
-
break;
|
30
|
-
}
|
31
|
-
|
32
|
-
allocations_per_buffer += remaining_allocations;
|
33
|
-
curr_alloc_size += remaining_allocations * allocation_size;
|
34
|
-
}
|
35
|
-
|
36
|
-
allocation_offset = bitmask_count * sizeof(validity_t);
|
37
|
-
}
|
38
|
-
|
39
|
-
FixedSizeAllocator::~FixedSizeAllocator() {
|
40
|
-
for (auto &buffer : buffers) {
|
41
|
-
allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
|
42
|
-
}
|
43
|
-
}
|
44
|
-
|
45
|
-
Node FixedSizeAllocator::New() {
|
46
|
-
|
47
|
-
// no more free pointers
|
48
|
-
if (buffers_with_free_space.empty()) {
|
49
|
-
|
50
|
-
// add a new buffer
|
51
|
-
idx_t buffer_id = buffers.size();
|
52
|
-
D_ASSERT(buffer_id <= (uint32_t)DConstants::INVALID_INDEX);
|
53
|
-
auto buffer = allocator.AllocateData(BUFFER_ALLOC_SIZE);
|
54
|
-
buffers.emplace_back(buffer, 0);
|
55
|
-
buffers_with_free_space.insert(buffer_id);
|
56
|
-
|
57
|
-
// set the bitmask
|
58
|
-
ValidityMask mask(reinterpret_cast<validity_t *>(buffer));
|
59
|
-
mask.SetAllValid(allocations_per_buffer);
|
60
|
-
}
|
61
|
-
|
62
|
-
// return a pointer
|
63
|
-
D_ASSERT(!buffers_with_free_space.empty());
|
64
|
-
auto buffer_id = (uint32_t)*buffers_with_free_space.begin();
|
65
|
-
|
66
|
-
auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[buffer_id].ptr);
|
67
|
-
ValidityMask mask(bitmask_ptr);
|
68
|
-
auto offset = GetOffset(mask, buffers[buffer_id].allocation_count);
|
69
|
-
|
70
|
-
buffers[buffer_id].allocation_count++;
|
71
|
-
total_allocations++;
|
72
|
-
if (buffers[buffer_id].allocation_count == allocations_per_buffer) {
|
73
|
-
buffers_with_free_space.erase(buffer_id);
|
74
|
-
}
|
75
|
-
|
76
|
-
return Node(buffer_id, offset);
|
77
|
-
}
|
78
|
-
|
79
|
-
void FixedSizeAllocator::Free(const Node ptr) {
|
80
|
-
auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[ptr.GetBufferId()].ptr);
|
81
|
-
ValidityMask mask(bitmask_ptr);
|
82
|
-
D_ASSERT(!mask.RowIsValid(ptr.GetOffset()));
|
83
|
-
mask.SetValid(ptr.GetOffset());
|
84
|
-
buffers_with_free_space.insert(ptr.GetBufferId());
|
85
|
-
|
86
|
-
D_ASSERT(total_allocations > 0);
|
87
|
-
D_ASSERT(buffers[ptr.GetBufferId()].allocation_count > 0);
|
88
|
-
buffers[ptr.GetBufferId()].allocation_count--;
|
89
|
-
total_allocations--;
|
90
|
-
}
|
91
|
-
|
92
|
-
void FixedSizeAllocator::Reset() {
|
93
|
-
|
94
|
-
for (auto &buffer : buffers) {
|
95
|
-
allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
|
96
|
-
}
|
97
|
-
buffers.clear();
|
98
|
-
buffers_with_free_space.clear();
|
99
|
-
total_allocations = 0;
|
100
|
-
}
|
101
|
-
|
102
|
-
void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
|
103
|
-
|
104
|
-
D_ASSERT(allocation_size == other.allocation_size);
|
105
|
-
|
106
|
-
// remember the buffer count and merge the buffers
|
107
|
-
idx_t buffer_count = buffers.size();
|
108
|
-
for (auto &buffer : other.buffers) {
|
109
|
-
buffers.push_back(buffer);
|
110
|
-
}
|
111
|
-
other.buffers.clear();
|
112
|
-
|
113
|
-
// merge the buffers with free spaces
|
114
|
-
for (auto &buffer_id : other.buffers_with_free_space) {
|
115
|
-
buffers_with_free_space.insert(buffer_id + buffer_count);
|
116
|
-
}
|
117
|
-
other.buffers_with_free_space.clear();
|
118
|
-
|
119
|
-
// add the total allocations
|
120
|
-
total_allocations += other.total_allocations;
|
121
|
-
}
|
122
|
-
|
123
|
-
bool FixedSizeAllocator::InitializeVacuum() {
|
124
|
-
|
125
|
-
if (total_allocations == 0) {
|
126
|
-
Reset();
|
127
|
-
return false;
|
128
|
-
}
|
129
|
-
|
130
|
-
auto total_available_allocations = allocations_per_buffer * buffers.size();
|
131
|
-
D_ASSERT(total_available_allocations >= total_allocations);
|
132
|
-
auto total_free_positions = total_available_allocations - total_allocations;
|
133
|
-
|
134
|
-
// vacuum_count buffers can be freed
|
135
|
-
auto vacuum_count = total_free_positions / allocations_per_buffer;
|
136
|
-
|
137
|
-
// calculate the vacuum threshold adaptively
|
138
|
-
D_ASSERT(vacuum_count < buffers.size());
|
139
|
-
idx_t memory_usage = GetMemoryUsage();
|
140
|
-
idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
|
141
|
-
auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
|
142
|
-
auto threshold = (double)VACUUM_THRESHOLD / 100.0;
|
143
|
-
if (excess_percentage < threshold) {
|
144
|
-
return false;
|
145
|
-
}
|
146
|
-
|
147
|
-
min_vacuum_buffer_id = buffers.size() - vacuum_count;
|
148
|
-
|
149
|
-
// remove all invalid buffers from the available buffer list to ensure that we do not reuse them
|
150
|
-
auto it = buffers_with_free_space.begin();
|
151
|
-
while (it != buffers_with_free_space.end()) {
|
152
|
-
if (*it >= min_vacuum_buffer_id) {
|
153
|
-
it = buffers_with_free_space.erase(it);
|
154
|
-
} else {
|
155
|
-
it++;
|
156
|
-
}
|
157
|
-
}
|
158
|
-
|
159
|
-
return true;
|
160
|
-
}
|
161
|
-
|
162
|
-
void FixedSizeAllocator::FinalizeVacuum() {
|
163
|
-
|
164
|
-
// free all (now unused) buffers
|
165
|
-
while (min_vacuum_buffer_id < buffers.size()) {
|
166
|
-
allocator.FreeData(buffers.back().ptr, BUFFER_ALLOC_SIZE);
|
167
|
-
buffers.pop_back();
|
168
|
-
}
|
169
|
-
}
|
170
|
-
|
171
|
-
Node FixedSizeAllocator::VacuumPointer(const Node ptr) {
|
172
|
-
|
173
|
-
// we do not need to adjust the bitmask of the old buffer, because we will free the entire
|
174
|
-
// buffer after the vacuum operation
|
175
|
-
|
176
|
-
auto new_ptr = New();
|
177
|
-
|
178
|
-
// new increases the allocation count
|
179
|
-
total_allocations--;
|
180
|
-
|
181
|
-
memcpy(Get(new_ptr), Get(ptr), allocation_size);
|
182
|
-
return new_ptr;
|
183
|
-
}
|
184
|
-
|
185
|
-
void FixedSizeAllocator::Verify() const {
|
186
|
-
#ifdef DEBUG
|
187
|
-
auto total_available_allocations = allocations_per_buffer * buffers.size();
|
188
|
-
D_ASSERT(total_available_allocations >= total_allocations);
|
189
|
-
D_ASSERT(buffers.size() >= buffers_with_free_space.size());
|
190
|
-
#endif
|
191
|
-
}
|
192
|
-
|
193
|
-
uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
|
194
|
-
|
195
|
-
auto data = mask.GetData();
|
196
|
-
|
197
|
-
// fills up a buffer sequentially before searching for free bits
|
198
|
-
if (mask.RowIsValid(allocation_count)) {
|
199
|
-
mask.SetInvalid(allocation_count);
|
200
|
-
return allocation_count;
|
201
|
-
}
|
202
|
-
|
203
|
-
// get an entry with free bits
|
204
|
-
for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
|
205
|
-
if (data[entry_idx] != 0) {
|
206
|
-
|
207
|
-
// find the position of the free bit
|
208
|
-
auto entry = data[entry_idx];
|
209
|
-
idx_t first_valid_bit = 0;
|
210
|
-
|
211
|
-
// this loop finds the position of the rightmost set bit in entry and stores it
|
212
|
-
// in first_valid_bit
|
213
|
-
for (idx_t i = 0; i < 6; i++) {
|
214
|
-
// set the left half of the bits of this level to zero and test if the entry is still not zero
|
215
|
-
if (entry & BASE[i]) {
|
216
|
-
// first valid bit is in the rightmost s[i] bits
|
217
|
-
// permanently set the left half of the bits to zero
|
218
|
-
entry &= BASE[i];
|
219
|
-
} else {
|
220
|
-
// first valid bit is in the leftmost s[i] bits
|
221
|
-
// shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
|
222
|
-
entry >>= SHIFT[i];
|
223
|
-
first_valid_bit += SHIFT[i];
|
224
|
-
}
|
225
|
-
}
|
226
|
-
D_ASSERT(entry);
|
227
|
-
|
228
|
-
auto prev_bits = entry_idx * sizeof(validity_t) * 8;
|
229
|
-
D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
|
230
|
-
mask.SetInvalid(prev_bits + first_valid_bit);
|
231
|
-
return (prev_bits + first_valid_bit);
|
232
|
-
}
|
233
|
-
}
|
234
|
-
|
235
|
-
throw InternalException("Invalid bitmask of FixedSizeAllocator");
|
236
|
-
}
|
237
|
-
|
238
|
-
} // namespace duckdb
|
@@ -1,115 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/execution/index/art/fixed_size_allocator.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include "duckdb/common/types/validity_mask.hpp"
|
12
|
-
#include "duckdb/common/unordered_set.hpp"
|
13
|
-
#include "duckdb/storage/buffer_manager.hpp"
|
14
|
-
#include "duckdb/execution/index/art/node.hpp"
|
15
|
-
|
16
|
-
namespace duckdb {
|
17
|
-
|
18
|
-
// structs
|
19
|
-
struct BufferEntry {
|
20
|
-
BufferEntry(const data_ptr_t &ptr, const idx_t &allocation_count) : ptr(ptr), allocation_count(allocation_count) {
|
21
|
-
}
|
22
|
-
data_ptr_t ptr;
|
23
|
-
idx_t allocation_count;
|
24
|
-
};
|
25
|
-
|
26
|
-
//! The FixedSizeAllocator provides pointers to fixed-size sections of pre-allocated memory buffers.
|
27
|
-
//! The pointers are Node pointers, and the leftmost byte (serialize flag and type) must always be zero.
|
28
|
-
class FixedSizeAllocator {
|
29
|
-
public:
|
30
|
-
//! Fixed size of the buffers
|
31
|
-
static constexpr idx_t BUFFER_ALLOC_SIZE = Storage::BLOCK_ALLOC_SIZE;
|
32
|
-
//! We can vacuum 10% or more of the total memory usage of the allocator
|
33
|
-
static constexpr uint8_t VACUUM_THRESHOLD = 10;
|
34
|
-
|
35
|
-
//! Constants for fast offset calculations in the bitmask
|
36
|
-
static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
|
37
|
-
static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
|
38
|
-
|
39
|
-
public:
|
40
|
-
explicit FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator);
|
41
|
-
~FixedSizeAllocator();
|
42
|
-
|
43
|
-
//! Allocation size of one element in a buffer
|
44
|
-
idx_t allocation_size;
|
45
|
-
//! Total number of allocations
|
46
|
-
idx_t total_allocations;
|
47
|
-
//! Number of validity_t values in the bitmask
|
48
|
-
idx_t bitmask_count;
|
49
|
-
//! First starting byte of the payload
|
50
|
-
idx_t allocation_offset;
|
51
|
-
//! Number of possible allocations per buffer
|
52
|
-
idx_t allocations_per_buffer;
|
53
|
-
|
54
|
-
//! Buffers containing the data
|
55
|
-
vector<BufferEntry> buffers;
|
56
|
-
//! Buffers with free space
|
57
|
-
unordered_set<idx_t> buffers_with_free_space;
|
58
|
-
|
59
|
-
//! Minimum buffer ID of buffers that can be vacuumed
|
60
|
-
idx_t min_vacuum_buffer_id;
|
61
|
-
|
62
|
-
//! Buffer manager of the database instance
|
63
|
-
Allocator &allocator;
|
64
|
-
|
65
|
-
public:
|
66
|
-
//! Get a new Node pointer to data, might cause a new buffer allocation
|
67
|
-
Node New();
|
68
|
-
//! Free the data of the Node pointer
|
69
|
-
void Free(const Node ptr);
|
70
|
-
//! Get the data of the Node pointer
|
71
|
-
template <class T>
|
72
|
-
inline T *Get(const Node ptr) const {
|
73
|
-
return (T *)Get(ptr);
|
74
|
-
}
|
75
|
-
|
76
|
-
//! Resets the allocator, e.g., becomes necessary during DELETE FROM table
|
77
|
-
void Reset();
|
78
|
-
|
79
|
-
//! Returns the allocated memory size in bytes
|
80
|
-
inline idx_t GetMemoryUsage() const {
|
81
|
-
return buffers.size() * BUFFER_ALLOC_SIZE;
|
82
|
-
}
|
83
|
-
|
84
|
-
//! Merge another FixedSizeAllocator into this allocator. Both must have the same allocation size
|
85
|
-
void Merge(FixedSizeAllocator &other);
|
86
|
-
|
87
|
-
//! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
|
88
|
-
bool InitializeVacuum();
|
89
|
-
//! Finalize a vacuum operation by freeing all buffers exceeding the min_vacuum_buffer_id
|
90
|
-
void FinalizeVacuum();
|
91
|
-
//! Returns true, if a Node pointer qualifies for a vacuum operation, and false otherwise
|
92
|
-
inline bool NeedsVacuum(const Node ptr) const {
|
93
|
-
if (ptr.GetBufferId() >= min_vacuum_buffer_id) {
|
94
|
-
return true;
|
95
|
-
}
|
96
|
-
return false;
|
97
|
-
}
|
98
|
-
//! Vacuums a Node pointer
|
99
|
-
Node VacuumPointer(const Node ptr);
|
100
|
-
|
101
|
-
//! Verify that the allocation counts match the existing positions on the buffers
|
102
|
-
void Verify() const;
|
103
|
-
|
104
|
-
private:
|
105
|
-
//! Returns the data_ptr_t of a Node pointer
|
106
|
-
inline data_ptr_t Get(const Node ptr) const {
|
107
|
-
D_ASSERT(ptr.GetBufferId() < buffers.size());
|
108
|
-
D_ASSERT(ptr.GetOffset() < allocations_per_buffer);
|
109
|
-
return buffers[ptr.GetBufferId()].ptr + ptr.GetOffset() * allocation_size + allocation_offset;
|
110
|
-
}
|
111
|
-
//! Returns the first free offset in a bitmask
|
112
|
-
uint32_t GetOffset(ValidityMask &mask, const idx_t allocation_count);
|
113
|
-
};
|
114
|
-
|
115
|
-
} // namespace duckdb
|