duckdb 0.8.2-dev4025.0 → 0.8.2-dev4126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +7 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -0,0 +1,72 @@
|
|
1
|
+
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
+
#include <cmath>
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
template <class NUMERIC_TYPE>
|
7
|
+
static void ListDistance(DataChunk &args, ExpressionState &, Vector &result) {
|
8
|
+
D_ASSERT(args.ColumnCount() == 2);
|
9
|
+
|
10
|
+
auto count = args.size();
|
11
|
+
auto &left = args.data[0];
|
12
|
+
auto &right = args.data[1];
|
13
|
+
auto left_count = ListVector::GetListSize(left);
|
14
|
+
auto right_count = ListVector::GetListSize(right);
|
15
|
+
|
16
|
+
auto &left_child = ListVector::GetEntry(left);
|
17
|
+
auto &right_child = ListVector::GetEntry(right);
|
18
|
+
|
19
|
+
D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
20
|
+
D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
21
|
+
|
22
|
+
if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
|
23
|
+
throw InvalidInputException("list_distance: left argument can not contain NULL values");
|
24
|
+
}
|
25
|
+
|
26
|
+
if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
|
27
|
+
throw InvalidInputException("list_distance: right argument can not contain NULL values");
|
28
|
+
}
|
29
|
+
|
30
|
+
auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
|
31
|
+
auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
|
32
|
+
|
33
|
+
BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
|
34
|
+
left, right, result, count, [&](list_entry_t left, list_entry_t right) {
|
35
|
+
if (left.length != right.length) {
|
36
|
+
throw InvalidInputException(StringUtil::Format(
|
37
|
+
"list_distance: list dimensions must be equal, got left length %d and right length %d", left.length,
|
38
|
+
right.length));
|
39
|
+
}
|
40
|
+
|
41
|
+
auto dimensions = left.length;
|
42
|
+
|
43
|
+
NUMERIC_TYPE distance = 0;
|
44
|
+
|
45
|
+
auto l_ptr = left_data + left.offset;
|
46
|
+
auto r_ptr = right_data + right.offset;
|
47
|
+
|
48
|
+
for (idx_t i = 0; i < dimensions; i++) {
|
49
|
+
auto x = *l_ptr++;
|
50
|
+
auto y = *r_ptr++;
|
51
|
+
auto diff = x - y;
|
52
|
+
distance += diff * diff;
|
53
|
+
}
|
54
|
+
|
55
|
+
return std::sqrt(distance);
|
56
|
+
});
|
57
|
+
|
58
|
+
if (args.AllConstant()) {
|
59
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
ScalarFunctionSet ListDistanceFun::GetFunctions() {
|
64
|
+
ScalarFunctionSet set("list_distance");
|
65
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
|
66
|
+
LogicalType::FLOAT, ListDistance<float>));
|
67
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
|
68
|
+
LogicalType::DOUBLE, ListDistance<double>));
|
69
|
+
return set;
|
70
|
+
}
|
71
|
+
|
72
|
+
} // namespace duckdb
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
+
|
3
|
+
namespace duckdb {
|
4
|
+
|
5
|
+
template <class NUMERIC_TYPE>
|
6
|
+
static void ListInnerProduct(DataChunk &args, ExpressionState &, Vector &result) {
|
7
|
+
D_ASSERT(args.ColumnCount() == 2);
|
8
|
+
|
9
|
+
auto count = args.size();
|
10
|
+
auto &left = args.data[0];
|
11
|
+
auto &right = args.data[1];
|
12
|
+
auto left_count = ListVector::GetListSize(left);
|
13
|
+
auto right_count = ListVector::GetListSize(right);
|
14
|
+
|
15
|
+
auto &left_child = ListVector::GetEntry(left);
|
16
|
+
auto &right_child = ListVector::GetEntry(right);
|
17
|
+
|
18
|
+
D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
19
|
+
D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
|
20
|
+
|
21
|
+
if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
|
22
|
+
throw InvalidInputException("list_inner_product: left argument can not contain NULL values");
|
23
|
+
}
|
24
|
+
|
25
|
+
if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
|
26
|
+
throw InvalidInputException("list_inner_product: right argument can not contain NULL values");
|
27
|
+
}
|
28
|
+
|
29
|
+
auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
|
30
|
+
auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
|
31
|
+
|
32
|
+
BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
|
33
|
+
left, right, result, count, [&](list_entry_t left, list_entry_t right) {
|
34
|
+
if (left.length != right.length) {
|
35
|
+
throw InvalidInputException(StringUtil::Format(
|
36
|
+
"list_inner_product: list dimensions must be equal, got left length %d and right length %d",
|
37
|
+
left.length, right.length));
|
38
|
+
}
|
39
|
+
|
40
|
+
auto dimensions = left.length;
|
41
|
+
|
42
|
+
NUMERIC_TYPE distance = 0;
|
43
|
+
|
44
|
+
auto l_ptr = left_data + left.offset;
|
45
|
+
auto r_ptr = right_data + right.offset;
|
46
|
+
|
47
|
+
for (idx_t i = 0; i < dimensions; i++) {
|
48
|
+
auto x = *l_ptr++;
|
49
|
+
auto y = *r_ptr++;
|
50
|
+
distance += x * y;
|
51
|
+
}
|
52
|
+
|
53
|
+
return distance;
|
54
|
+
});
|
55
|
+
|
56
|
+
if (args.AllConstant()) {
|
57
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
ScalarFunctionSet ListInnerProductFun::GetFunctions() {
|
62
|
+
ScalarFunctionSet set("list_inner_product");
|
63
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
|
64
|
+
LogicalType::FLOAT, ListInnerProduct<float>));
|
65
|
+
set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
|
66
|
+
LogicalType::DOUBLE, ListInnerProduct<double>));
|
67
|
+
return set;
|
68
|
+
}
|
69
|
+
|
70
|
+
} // namespace duckdb
|
@@ -14,6 +14,8 @@
|
|
14
14
|
#include "duckdb/execution/index/art/iterator.hpp"
|
15
15
|
#include "duckdb/common/types/conflict_manager.hpp"
|
16
16
|
#include "duckdb/storage/table/scan_state.hpp"
|
17
|
+
#include "duckdb/storage/metadata/metadata_reader.hpp"
|
18
|
+
#include "duckdb/storage/table_io_manager.hpp"
|
17
19
|
|
18
20
|
#include <algorithm>
|
19
21
|
|
@@ -33,7 +35,8 @@ struct ARTIndexScanState : public IndexScanState {
|
|
33
35
|
|
34
36
|
ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
35
37
|
const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
|
36
|
-
AttachedDatabase &db, const shared_ptr<
|
38
|
+
AttachedDatabase &db, const shared_ptr<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>> &allocators_ptr,
|
39
|
+
const BlockPointer &pointer)
|
37
40
|
: Index(db, IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type),
|
38
41
|
allocators(allocators_ptr), owns_data(false) {
|
39
42
|
if (!Radix::IsLittleEndian()) {
|
@@ -43,22 +46,20 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
43
46
|
// initialize all allocators
|
44
47
|
if (!allocators) {
|
45
48
|
owns_data = true;
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
auto &block_manager = table_io_manager.GetIndexBlockManager();
|
50
|
+
|
51
|
+
array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT> allocator_array = {
|
52
|
+
make_uniq<FixedSizeAllocator>(sizeof(Prefix), block_manager),
|
53
|
+
make_uniq<FixedSizeAllocator>(sizeof(Leaf), block_manager),
|
54
|
+
make_uniq<FixedSizeAllocator>(sizeof(Node4), block_manager),
|
55
|
+
make_uniq<FixedSizeAllocator>(sizeof(Node16), block_manager),
|
56
|
+
make_uniq<FixedSizeAllocator>(sizeof(Node48), block_manager),
|
57
|
+
make_uniq<FixedSizeAllocator>(sizeof(Node256), block_manager)};
|
58
|
+
allocators = make_shared<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>>(std::move(allocator_array));
|
59
|
+
}
|
60
|
+
|
58
61
|
if (pointer.IsValid()) {
|
59
|
-
|
60
|
-
tree->SetPtr(pointer.block_id, pointer.offset);
|
61
|
-
tree->Deserialize(*this);
|
62
|
+
Deserialize(pointer);
|
62
63
|
}
|
63
64
|
|
64
65
|
// validate the types of the key columns
|
@@ -84,10 +85,6 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
84
85
|
}
|
85
86
|
}
|
86
87
|
|
87
|
-
ART::~ART() {
|
88
|
-
tree->Reset();
|
89
|
-
}
|
90
|
-
|
91
88
|
//===--------------------------------------------------------------------===//
|
92
89
|
// Initialize Predicate Scans
|
93
90
|
//===--------------------------------------------------------------------===//
|
@@ -351,7 +348,7 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
|
|
351
348
|
|
352
349
|
auto key_section = KeySection(0, count - 1, 0, 0);
|
353
350
|
auto has_constraint = IsUnique();
|
354
|
-
if (!Construct(*this, keys, row_ids,
|
351
|
+
if (!Construct(*this, keys, row_ids, tree, key_section, has_constraint)) {
|
355
352
|
return false;
|
356
353
|
}
|
357
354
|
|
@@ -359,9 +356,8 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
|
|
359
356
|
D_ASSERT(!VerifyAndToStringInternal(true).empty());
|
360
357
|
for (idx_t i = 0; i < count; i++) {
|
361
358
|
D_ASSERT(!keys[i].Empty());
|
362
|
-
auto leaf = Lookup(
|
363
|
-
D_ASSERT(leaf
|
364
|
-
D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_ids[i]));
|
359
|
+
auto leaf = Lookup(tree, keys[i], 0);
|
360
|
+
D_ASSERT(Leaf::ContainsRowId(*this, *leaf, row_ids[i]));
|
365
361
|
}
|
366
362
|
#endif
|
367
363
|
|
@@ -393,7 +389,7 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
393
389
|
}
|
394
390
|
|
395
391
|
row_t row_id = row_identifiers[i];
|
396
|
-
if (!Insert(
|
392
|
+
if (!Insert(tree, keys[i], 0, row_id)) {
|
397
393
|
// failed to insert because of constraint violation
|
398
394
|
failed_index = i;
|
399
395
|
break;
|
@@ -407,7 +403,7 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
407
403
|
continue;
|
408
404
|
}
|
409
405
|
row_t row_id = row_identifiers[i];
|
410
|
-
Erase(
|
406
|
+
Erase(tree, keys[i], 0, row_id);
|
411
407
|
}
|
412
408
|
}
|
413
409
|
|
@@ -422,9 +418,8 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
422
418
|
continue;
|
423
419
|
}
|
424
420
|
|
425
|
-
auto leaf = Lookup(
|
426
|
-
D_ASSERT(leaf
|
427
|
-
D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
|
421
|
+
auto leaf = Lookup(tree, keys[i], 0);
|
422
|
+
D_ASSERT(Leaf::ContainsRowId(*this, *leaf, row_identifiers[i]));
|
428
423
|
}
|
429
424
|
#endif
|
430
425
|
|
@@ -465,7 +460,7 @@ bool ART::InsertToLeaf(Node &leaf, const row_t &row_id) {
|
|
465
460
|
bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
|
466
461
|
|
467
462
|
// node is currently empty, create a leaf here with the key
|
468
|
-
if (!node.
|
463
|
+
if (!node.HasMetadata()) {
|
469
464
|
D_ASSERT(depth <= key.len);
|
470
465
|
reference<Node> ref_node(node);
|
471
466
|
Prefix::New(*this, ref_node, key, depth, key.len - depth);
|
@@ -482,7 +477,7 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
482
477
|
|
483
478
|
if (node_type != NType::PREFIX) {
|
484
479
|
D_ASSERT(depth < key.len);
|
485
|
-
auto child = node.
|
480
|
+
auto child = node.GetChildMutable(*this, key[depth]);
|
486
481
|
|
487
482
|
// recurse, if a child exists at key[depth]
|
488
483
|
if (child) {
|
@@ -504,7 +499,7 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
504
499
|
|
505
500
|
// this is a prefix node, traverse
|
506
501
|
reference<Node> next_node(node);
|
507
|
-
auto mismatch_position = Prefix::
|
502
|
+
auto mismatch_position = Prefix::TraverseMutable(*this, next_node, key, depth);
|
508
503
|
|
509
504
|
// prefix matches key
|
510
505
|
if (next_node.get().GetType() != NType::PREFIX) {
|
@@ -533,9 +528,16 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
|
|
533
528
|
}
|
534
529
|
|
535
530
|
//===--------------------------------------------------------------------===//
|
536
|
-
// Delete
|
531
|
+
// Drop and Delete
|
537
532
|
//===--------------------------------------------------------------------===//
|
538
533
|
|
534
|
+
void ART::CommitDrop(IndexLock &index_lock) {
|
535
|
+
for (auto &allocator : *allocators) {
|
536
|
+
allocator->Reset();
|
537
|
+
}
|
538
|
+
tree.Clear();
|
539
|
+
}
|
540
|
+
|
539
541
|
void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
540
542
|
|
541
543
|
DataChunk expression;
|
@@ -557,7 +559,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
557
559
|
if (keys[i].Empty()) {
|
558
560
|
continue;
|
559
561
|
}
|
560
|
-
Erase(
|
562
|
+
Erase(tree, keys[i], 0, row_identifiers[i]);
|
561
563
|
}
|
562
564
|
|
563
565
|
#ifdef DEBUG
|
@@ -567,9 +569,9 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
567
569
|
continue;
|
568
570
|
}
|
569
571
|
|
570
|
-
auto leaf = Lookup(
|
571
|
-
if (leaf
|
572
|
-
D_ASSERT(!Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
|
572
|
+
auto leaf = Lookup(tree, keys[i], 0);
|
573
|
+
if (leaf) {
|
574
|
+
D_ASSERT(!Leaf::ContainsRowId(*this, *leaf, row_identifiers[i]));
|
573
575
|
}
|
574
576
|
}
|
575
577
|
#endif
|
@@ -577,14 +579,14 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
577
579
|
|
578
580
|
void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
|
579
581
|
|
580
|
-
if (!node.
|
582
|
+
if (!node.HasMetadata()) {
|
581
583
|
return;
|
582
584
|
}
|
583
585
|
|
584
586
|
// handle prefix
|
585
587
|
reference<Node> next_node(node);
|
586
588
|
if (next_node.get().GetType() == NType::PREFIX) {
|
587
|
-
Prefix::
|
589
|
+
Prefix::TraverseMutable(*this, next_node, key, depth);
|
588
590
|
if (next_node.get().GetType() == NType::PREFIX) {
|
589
591
|
return;
|
590
592
|
}
|
@@ -599,14 +601,14 @@ void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id)
|
|
599
601
|
}
|
600
602
|
|
601
603
|
D_ASSERT(depth < key.len);
|
602
|
-
auto child = next_node.get().
|
604
|
+
auto child = next_node.get().GetChildMutable(*this, key[depth]);
|
603
605
|
if (child) {
|
604
|
-
D_ASSERT(child->
|
606
|
+
D_ASSERT(child->HasMetadata());
|
605
607
|
|
606
608
|
auto temp_depth = depth + 1;
|
607
609
|
reference<Node> child_node(*child);
|
608
610
|
if (child_node.get().GetType() == NType::PREFIX) {
|
609
|
-
Prefix::
|
611
|
+
Prefix::TraverseMutable(*this, child_node, key, temp_depth);
|
610
612
|
if (child_node.get().GetType() == NType::PREFIX) {
|
611
613
|
return;
|
612
614
|
}
|
@@ -666,24 +668,24 @@ static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &val
|
|
666
668
|
|
667
669
|
bool ART::SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids) {
|
668
670
|
|
669
|
-
auto leaf = Lookup(
|
670
|
-
if (!leaf
|
671
|
+
auto leaf = Lookup(tree, key, 0);
|
672
|
+
if (!leaf) {
|
671
673
|
return true;
|
672
674
|
}
|
673
|
-
return Leaf::GetRowIds(*this, leaf, result_ids, max_count);
|
675
|
+
return Leaf::GetRowIds(*this, *leaf, result_ids, max_count);
|
674
676
|
}
|
675
677
|
|
676
678
|
void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
|
677
679
|
|
678
680
|
// we need to look for a leaf
|
679
|
-
auto leaf_node = Lookup(
|
680
|
-
if (!leaf_node
|
681
|
+
auto leaf_node = Lookup(tree, key, 0);
|
682
|
+
if (!leaf_node) {
|
681
683
|
result_size = 0;
|
682
684
|
return;
|
683
685
|
}
|
684
686
|
|
685
687
|
// we only perform index joins on PK/FK columns
|
686
|
-
D_ASSERT(leaf_node
|
688
|
+
D_ASSERT(leaf_node->GetType() == NType::LEAF_INLINED);
|
687
689
|
result_size = 1;
|
688
690
|
return;
|
689
691
|
}
|
@@ -692,37 +694,38 @@ void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
|
|
692
694
|
// Lookup
|
693
695
|
//===--------------------------------------------------------------------===//
|
694
696
|
|
695
|
-
Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
|
697
|
+
optional_ptr<const Node> ART::Lookup(const Node &node, const ARTKey &key, idx_t depth) {
|
696
698
|
|
697
|
-
|
699
|
+
reference<const Node> node_ref(node);
|
700
|
+
while (node_ref.get().HasMetadata()) {
|
698
701
|
|
699
702
|
// traverse prefix, if exists
|
700
|
-
reference<Node> next_node(
|
703
|
+
reference<const Node> next_node(node_ref.get());
|
701
704
|
if (next_node.get().GetType() == NType::PREFIX) {
|
702
705
|
Prefix::Traverse(*this, next_node, key, depth);
|
703
706
|
if (next_node.get().GetType() == NType::PREFIX) {
|
704
|
-
return
|
707
|
+
return nullptr;
|
705
708
|
}
|
706
709
|
}
|
707
710
|
|
708
711
|
if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) {
|
709
|
-
return next_node.get();
|
712
|
+
return &next_node.get();
|
710
713
|
}
|
711
714
|
|
712
715
|
D_ASSERT(depth < key.len);
|
713
716
|
auto child = next_node.get().GetChild(*this, key[depth]);
|
714
717
|
if (!child) {
|
715
718
|
// prefix matches key, but no child at byte, ART/subtree does not contain key
|
716
|
-
return
|
719
|
+
return nullptr;
|
717
720
|
}
|
718
721
|
|
719
722
|
// lookup in child node
|
720
|
-
|
721
|
-
D_ASSERT(
|
723
|
+
node_ref = *child;
|
724
|
+
D_ASSERT(node_ref.get().HasMetadata());
|
722
725
|
depth++;
|
723
726
|
}
|
724
727
|
|
725
|
-
return
|
728
|
+
return nullptr;
|
726
729
|
}
|
727
730
|
|
728
731
|
//===--------------------------------------------------------------------===//
|
@@ -731,7 +734,7 @@ Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
|
|
731
734
|
|
732
735
|
bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t max_count, vector<row_t> &result_ids) {
|
733
736
|
|
734
|
-
if (!tree
|
737
|
+
if (!tree.HasMetadata()) {
|
735
738
|
return true;
|
736
739
|
}
|
737
740
|
Iterator &it = state.iterator;
|
@@ -739,7 +742,7 @@ bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t
|
|
739
742
|
// find the lowest value that satisfies the predicate
|
740
743
|
if (!it.art) {
|
741
744
|
it.art = this;
|
742
|
-
if (!it.LowerBound(
|
745
|
+
if (!it.LowerBound(tree, key, equal, 0)) {
|
743
746
|
// early-out, if the maximum value in the ART is lower than the lower bound
|
744
747
|
return true;
|
745
748
|
}
|
@@ -754,7 +757,7 @@ bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t
|
|
754
757
|
bool ART::SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool equal, idx_t max_count,
|
755
758
|
vector<row_t> &result_ids) {
|
756
759
|
|
757
|
-
if (!tree
|
760
|
+
if (!tree.HasMetadata()) {
|
758
761
|
return true;
|
759
762
|
}
|
760
763
|
Iterator &it = state.iterator;
|
@@ -762,7 +765,7 @@ bool ART::SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool equal,
|
|
762
765
|
if (!it.art) {
|
763
766
|
it.art = this;
|
764
767
|
// find the minimum value in the ART: we start scanning from this value
|
765
|
-
it.FindMinimum(
|
768
|
+
it.FindMinimum(tree);
|
766
769
|
// early-out, if the minimum value is higher than the upper bound
|
767
770
|
if (it.current_key > upper_bound) {
|
768
771
|
return true;
|
@@ -785,7 +788,7 @@ bool ART::SearchCloseRange(ARTIndexScanState &state, ARTKey &lower_bound, ARTKey
|
|
785
788
|
// find the first node that satisfies the left predicate
|
786
789
|
if (!it.art) {
|
787
790
|
it.art = this;
|
788
|
-
if (!it.LowerBound(
|
791
|
+
if (!it.LowerBound(tree, lower_bound, left_equal, 0)) {
|
789
792
|
// early-out, if the maximum value in the ART is lower than the lower bound
|
790
793
|
return true;
|
791
794
|
}
|
@@ -940,8 +943,8 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
|
|
940
943
|
continue;
|
941
944
|
}
|
942
945
|
|
943
|
-
auto leaf = Lookup(
|
944
|
-
if (!leaf
|
946
|
+
auto leaf = Lookup(tree, keys[i], 0);
|
947
|
+
if (!leaf) {
|
945
948
|
if (conflict_manager.AddMiss(i)) {
|
946
949
|
found_conflict = i;
|
947
950
|
}
|
@@ -950,8 +953,8 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
|
|
950
953
|
|
951
954
|
// when we find a node, we need to update the 'matches' and 'row_ids'
|
952
955
|
// NOTE: leaves can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one
|
953
|
-
D_ASSERT(leaf
|
954
|
-
if (conflict_manager.AddHit(i, leaf
|
956
|
+
D_ASSERT(leaf->GetType() == NType::LEAF_INLINED);
|
957
|
+
if (conflict_manager.AddHit(i, leaf->GetRowId())) {
|
955
958
|
found_conflict = i;
|
956
959
|
}
|
957
960
|
}
|
@@ -973,14 +976,38 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
|
|
973
976
|
|
974
977
|
BlockPointer ART::Serialize(MetadataWriter &writer) {
|
975
978
|
|
979
|
+
D_ASSERT(owns_data);
|
980
|
+
|
981
|
+
// early-out, if all allocators are empty
|
982
|
+
if (!tree.HasMetadata()) {
|
983
|
+
root_block_pointer = BlockPointer();
|
984
|
+
return root_block_pointer;
|
985
|
+
}
|
986
|
+
|
976
987
|
lock_guard<mutex> l(lock);
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
serialized_data_pointer = BlockPointer();
|
988
|
+
vector<BlockPointer> allocator_pointers;
|
989
|
+
for (auto &allocator : *allocators) {
|
990
|
+
allocator_pointers.push_back(allocator->Serialize(writer));
|
981
991
|
}
|
982
992
|
|
983
|
-
|
993
|
+
root_block_pointer = writer.GetBlockPointer();
|
994
|
+
writer.Write(tree);
|
995
|
+
for (auto &allocator_pointer : allocator_pointers) {
|
996
|
+
writer.Write(allocator_pointer);
|
997
|
+
}
|
998
|
+
|
999
|
+
return root_block_pointer;
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
void ART::Deserialize(const BlockPointer &pointer) {
|
1003
|
+
|
1004
|
+
D_ASSERT(pointer.IsValid());
|
1005
|
+
MetadataReader reader(table_io_manager.GetMetadataManager(), pointer);
|
1006
|
+
tree = reader.Read<Node>();
|
1007
|
+
|
1008
|
+
for (idx_t i = 0; i < ALLOCATOR_COUNT; i++) {
|
1009
|
+
(*allocators)[i]->Deserialize(reader.Read<BlockPointer>());
|
1010
|
+
}
|
984
1011
|
}
|
985
1012
|
|
986
1013
|
//===--------------------------------------------------------------------===//
|
@@ -991,7 +1018,7 @@ void ART::InitializeVacuum(ARTFlags &flags) {
|
|
991
1018
|
|
992
1019
|
flags.vacuum_flags.reserve(allocators->size());
|
993
1020
|
for (auto &allocator : *allocators) {
|
994
|
-
flags.vacuum_flags.push_back(allocator
|
1021
|
+
flags.vacuum_flags.push_back(allocator->InitializeVacuum());
|
995
1022
|
}
|
996
1023
|
}
|
997
1024
|
|
@@ -999,7 +1026,7 @@ void ART::FinalizeVacuum(const ARTFlags &flags) {
|
|
999
1026
|
|
1000
1027
|
for (idx_t i = 0; i < allocators->size(); i++) {
|
1001
1028
|
if (flags.vacuum_flags[i]) {
|
1002
|
-
(*allocators)[i]
|
1029
|
+
(*allocators)[i]->FinalizeVacuum();
|
1003
1030
|
}
|
1004
1031
|
}
|
1005
1032
|
}
|
@@ -1008,9 +1035,9 @@ void ART::Vacuum(IndexLock &state) {
|
|
1008
1035
|
|
1009
1036
|
D_ASSERT(owns_data);
|
1010
1037
|
|
1011
|
-
if (!tree
|
1038
|
+
if (!tree.HasMetadata()) {
|
1012
1039
|
for (auto &allocator : *allocators) {
|
1013
|
-
allocator
|
1040
|
+
allocator->Reset();
|
1014
1041
|
}
|
1015
1042
|
return;
|
1016
1043
|
}
|
@@ -1032,14 +1059,10 @@ void ART::Vacuum(IndexLock &state) {
|
|
1032
1059
|
}
|
1033
1060
|
|
1034
1061
|
// traverse the allocated memory of the tree to perform a vacuum
|
1035
|
-
tree
|
1062
|
+
tree.Vacuum(*this, flags);
|
1036
1063
|
|
1037
1064
|
// finalize the vacuum operation
|
1038
1065
|
FinalizeVacuum(flags);
|
1039
|
-
|
1040
|
-
for (auto &allocator : *allocators) {
|
1041
|
-
allocator.Verify();
|
1042
|
-
}
|
1043
1066
|
}
|
1044
1067
|
|
1045
1068
|
//===--------------------------------------------------------------------===//
|
@@ -1052,39 +1075,35 @@ void ART::InitializeMerge(ARTFlags &flags) {
|
|
1052
1075
|
|
1053
1076
|
flags.merge_buffer_counts.reserve(allocators->size());
|
1054
1077
|
for (auto &allocator : *allocators) {
|
1055
|
-
flags.merge_buffer_counts.emplace_back(allocator
|
1078
|
+
flags.merge_buffer_counts.emplace_back(allocator->GetUpperBoundBufferId());
|
1056
1079
|
}
|
1057
1080
|
}
|
1058
1081
|
|
1059
1082
|
bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
|
1060
1083
|
|
1061
1084
|
auto &other_art = other_index.Cast<ART>();
|
1062
|
-
if (!other_art.tree
|
1085
|
+
if (!other_art.tree.HasMetadata()) {
|
1063
1086
|
return true;
|
1064
1087
|
}
|
1065
1088
|
|
1066
1089
|
if (other_art.owns_data) {
|
1067
|
-
if (tree
|
1090
|
+
if (tree.HasMetadata()) {
|
1068
1091
|
// fully deserialize other_index, and traverse it to increment its buffer IDs
|
1069
1092
|
ARTFlags flags;
|
1070
1093
|
InitializeMerge(flags);
|
1071
|
-
other_art.tree
|
1094
|
+
other_art.tree.InitializeMerge(other_art, flags);
|
1072
1095
|
}
|
1073
1096
|
|
1074
1097
|
// merge the node storage
|
1075
1098
|
for (idx_t i = 0; i < allocators->size(); i++) {
|
1076
|
-
(*allocators)[i]
|
1099
|
+
(*allocators)[i]->Merge(*(*other_art.allocators)[i]);
|
1077
1100
|
}
|
1078
1101
|
}
|
1079
1102
|
|
1080
1103
|
// merge the ARTs
|
1081
|
-
if (!tree
|
1104
|
+
if (!tree.Merge(*this, other_art.tree)) {
|
1082
1105
|
return false;
|
1083
1106
|
}
|
1084
|
-
|
1085
|
-
for (auto &allocator : *allocators) {
|
1086
|
-
allocator.Verify();
|
1087
|
-
}
|
1088
1107
|
return true;
|
1089
1108
|
}
|
1090
1109
|
|
@@ -1100,8 +1119,8 @@ string ART::VerifyAndToString(IndexLock &state, const bool only_verify) {
|
|
1100
1119
|
}
|
1101
1120
|
|
1102
1121
|
string ART::VerifyAndToStringInternal(const bool only_verify) {
|
1103
|
-
if (tree
|
1104
|
-
return "ART: " + tree
|
1122
|
+
if (tree.HasMetadata()) {
|
1123
|
+
return "ART: " + tree.VerifyAndToString(*this, only_verify);
|
1105
1124
|
}
|
1106
1125
|
return "[empty]";
|
1107
1126
|
}
|