duckdb 0.8.2-dev4025.0 → 0.8.2-dev4126.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  8. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  9. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  10. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  11. package/src/duckdb/src/core_functions/function_list.cpp +7 -0
  12. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  13. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  15. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  16. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  17. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  18. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  19. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  20. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  21. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  22. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  23. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  24. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  25. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  26. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  28. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  30. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  31. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  32. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  33. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  34. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  35. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  38. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  40. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  41. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  42. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  43. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  44. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  45. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  48. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  49. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  50. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  51. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  52. package/src/duckdb/src/storage/data_table.cpp +3 -3
  53. package/src/duckdb/src/storage/index.cpp +7 -1
  54. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  55. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
  56. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  57. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  58. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  59. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  60. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  61. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  62. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  63. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -0,0 +1,72 @@
1
+ #include "duckdb/core_functions/scalar/list_functions.hpp"
2
+ #include <cmath>
3
+
4
+ namespace duckdb {
5
+
6
+ template <class NUMERIC_TYPE>
7
+ static void ListDistance(DataChunk &args, ExpressionState &, Vector &result) {
8
+ D_ASSERT(args.ColumnCount() == 2);
9
+
10
+ auto count = args.size();
11
+ auto &left = args.data[0];
12
+ auto &right = args.data[1];
13
+ auto left_count = ListVector::GetListSize(left);
14
+ auto right_count = ListVector::GetListSize(right);
15
+
16
+ auto &left_child = ListVector::GetEntry(left);
17
+ auto &right_child = ListVector::GetEntry(right);
18
+
19
+ D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
20
+ D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
21
+
22
+ if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
23
+ throw InvalidInputException("list_distance: left argument can not contain NULL values");
24
+ }
25
+
26
+ if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
27
+ throw InvalidInputException("list_distance: right argument can not contain NULL values");
28
+ }
29
+
30
+ auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
31
+ auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
32
+
33
+ BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
34
+ left, right, result, count, [&](list_entry_t left, list_entry_t right) {
35
+ if (left.length != right.length) {
36
+ throw InvalidInputException(StringUtil::Format(
37
+ "list_distance: list dimensions must be equal, got left length %d and right length %d", left.length,
38
+ right.length));
39
+ }
40
+
41
+ auto dimensions = left.length;
42
+
43
+ NUMERIC_TYPE distance = 0;
44
+
45
+ auto l_ptr = left_data + left.offset;
46
+ auto r_ptr = right_data + right.offset;
47
+
48
+ for (idx_t i = 0; i < dimensions; i++) {
49
+ auto x = *l_ptr++;
50
+ auto y = *r_ptr++;
51
+ auto diff = x - y;
52
+ distance += diff * diff;
53
+ }
54
+
55
+ return std::sqrt(distance);
56
+ });
57
+
58
+ if (args.AllConstant()) {
59
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
60
+ }
61
+ }
62
+
63
+ ScalarFunctionSet ListDistanceFun::GetFunctions() {
64
+ ScalarFunctionSet set("list_distance");
65
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
66
+ LogicalType::FLOAT, ListDistance<float>));
67
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
68
+ LogicalType::DOUBLE, ListDistance<double>));
69
+ return set;
70
+ }
71
+
72
+ } // namespace duckdb
@@ -0,0 +1,70 @@
1
+ #include "duckdb/core_functions/scalar/list_functions.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ template <class NUMERIC_TYPE>
6
+ static void ListInnerProduct(DataChunk &args, ExpressionState &, Vector &result) {
7
+ D_ASSERT(args.ColumnCount() == 2);
8
+
9
+ auto count = args.size();
10
+ auto &left = args.data[0];
11
+ auto &right = args.data[1];
12
+ auto left_count = ListVector::GetListSize(left);
13
+ auto right_count = ListVector::GetListSize(right);
14
+
15
+ auto &left_child = ListVector::GetEntry(left);
16
+ auto &right_child = ListVector::GetEntry(right);
17
+
18
+ D_ASSERT(left_child.GetVectorType() == VectorType::FLAT_VECTOR);
19
+ D_ASSERT(right_child.GetVectorType() == VectorType::FLAT_VECTOR);
20
+
21
+ if (!FlatVector::Validity(left_child).CheckAllValid(left_count)) {
22
+ throw InvalidInputException("list_inner_product: left argument can not contain NULL values");
23
+ }
24
+
25
+ if (!FlatVector::Validity(right_child).CheckAllValid(right_count)) {
26
+ throw InvalidInputException("list_inner_product: right argument can not contain NULL values");
27
+ }
28
+
29
+ auto left_data = FlatVector::GetData<NUMERIC_TYPE>(left_child);
30
+ auto right_data = FlatVector::GetData<NUMERIC_TYPE>(right_child);
31
+
32
+ BinaryExecutor::Execute<list_entry_t, list_entry_t, NUMERIC_TYPE>(
33
+ left, right, result, count, [&](list_entry_t left, list_entry_t right) {
34
+ if (left.length != right.length) {
35
+ throw InvalidInputException(StringUtil::Format(
36
+ "list_inner_product: list dimensions must be equal, got left length %d and right length %d",
37
+ left.length, right.length));
38
+ }
39
+
40
+ auto dimensions = left.length;
41
+
42
+ NUMERIC_TYPE distance = 0;
43
+
44
+ auto l_ptr = left_data + left.offset;
45
+ auto r_ptr = right_data + right.offset;
46
+
47
+ for (idx_t i = 0; i < dimensions; i++) {
48
+ auto x = *l_ptr++;
49
+ auto y = *r_ptr++;
50
+ distance += x * y;
51
+ }
52
+
53
+ return distance;
54
+ });
55
+
56
+ if (args.AllConstant()) {
57
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
58
+ }
59
+ }
60
+
61
+ ScalarFunctionSet ListInnerProductFun::GetFunctions() {
62
+ ScalarFunctionSet set("list_inner_product");
63
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::FLOAT), LogicalType::LIST(LogicalType::FLOAT)},
64
+ LogicalType::FLOAT, ListInnerProduct<float>));
65
+ set.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::DOUBLE), LogicalType::LIST(LogicalType::DOUBLE)},
66
+ LogicalType::DOUBLE, ListInnerProduct<double>));
67
+ return set;
68
+ }
69
+
70
+ } // namespace duckdb
@@ -14,6 +14,8 @@
14
14
  #include "duckdb/execution/index/art/iterator.hpp"
15
15
  #include "duckdb/common/types/conflict_manager.hpp"
16
16
  #include "duckdb/storage/table/scan_state.hpp"
17
+ #include "duckdb/storage/metadata/metadata_reader.hpp"
18
+ #include "duckdb/storage/table_io_manager.hpp"
17
19
 
18
20
  #include <algorithm>
19
21
 
@@ -33,7 +35,8 @@ struct ARTIndexScanState : public IndexScanState {
33
35
 
34
36
  ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
35
37
  const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
36
- AttachedDatabase &db, const shared_ptr<vector<FixedSizeAllocator>> &allocators_ptr, BlockPointer pointer)
38
+ AttachedDatabase &db, const shared_ptr<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>> &allocators_ptr,
39
+ const BlockPointer &pointer)
37
40
  : Index(db, IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type),
38
41
  allocators(allocators_ptr), owns_data(false) {
39
42
  if (!Radix::IsLittleEndian()) {
@@ -43,22 +46,20 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
43
46
  // initialize all allocators
44
47
  if (!allocators) {
45
48
  owns_data = true;
46
- allocators = make_shared<vector<FixedSizeAllocator>>();
47
- allocators->emplace_back(FixedSizeAllocator(sizeof(Prefix), buffer_manager.GetBufferAllocator()));
48
- allocators->emplace_back(FixedSizeAllocator(sizeof(Leaf), buffer_manager.GetBufferAllocator()));
49
- allocators->emplace_back(FixedSizeAllocator(sizeof(Node4), buffer_manager.GetBufferAllocator()));
50
- allocators->emplace_back(FixedSizeAllocator(sizeof(Node16), buffer_manager.GetBufferAllocator()));
51
- allocators->emplace_back(FixedSizeAllocator(sizeof(Node48), buffer_manager.GetBufferAllocator()));
52
- allocators->emplace_back(FixedSizeAllocator(sizeof(Node256), buffer_manager.GetBufferAllocator()));
53
- }
54
-
55
- // set the root node of the tree
56
- tree = make_uniq<Node>();
57
- serialized_data_pointer = pointer;
49
+ auto &block_manager = table_io_manager.GetIndexBlockManager();
50
+
51
+ array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT> allocator_array = {
52
+ make_uniq<FixedSizeAllocator>(sizeof(Prefix), block_manager),
53
+ make_uniq<FixedSizeAllocator>(sizeof(Leaf), block_manager),
54
+ make_uniq<FixedSizeAllocator>(sizeof(Node4), block_manager),
55
+ make_uniq<FixedSizeAllocator>(sizeof(Node16), block_manager),
56
+ make_uniq<FixedSizeAllocator>(sizeof(Node48), block_manager),
57
+ make_uniq<FixedSizeAllocator>(sizeof(Node256), block_manager)};
58
+ allocators = make_shared<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>>(std::move(allocator_array));
59
+ }
60
+
58
61
  if (pointer.IsValid()) {
59
- tree->SetSerialized();
60
- tree->SetPtr(pointer.block_id, pointer.offset);
61
- tree->Deserialize(*this);
62
+ Deserialize(pointer);
62
63
  }
63
64
 
64
65
  // validate the types of the key columns
@@ -84,10 +85,6 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
84
85
  }
85
86
  }
86
87
 
87
- ART::~ART() {
88
- tree->Reset();
89
- }
90
-
91
88
  //===--------------------------------------------------------------------===//
92
89
  // Initialize Predicate Scans
93
90
  //===--------------------------------------------------------------------===//
@@ -351,7 +348,7 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
351
348
 
352
349
  auto key_section = KeySection(0, count - 1, 0, 0);
353
350
  auto has_constraint = IsUnique();
354
- if (!Construct(*this, keys, row_ids, *this->tree, key_section, has_constraint)) {
351
+ if (!Construct(*this, keys, row_ids, tree, key_section, has_constraint)) {
355
352
  return false;
356
353
  }
357
354
 
@@ -359,9 +356,8 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
359
356
  D_ASSERT(!VerifyAndToStringInternal(true).empty());
360
357
  for (idx_t i = 0; i < count; i++) {
361
358
  D_ASSERT(!keys[i].Empty());
362
- auto leaf = Lookup(*tree, keys[i], 0);
363
- D_ASSERT(leaf.IsSet());
364
- D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_ids[i]));
359
+ auto leaf = Lookup(tree, keys[i], 0);
360
+ D_ASSERT(Leaf::ContainsRowId(*this, *leaf, row_ids[i]));
365
361
  }
366
362
  #endif
367
363
 
@@ -393,7 +389,7 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
393
389
  }
394
390
 
395
391
  row_t row_id = row_identifiers[i];
396
- if (!Insert(*tree, keys[i], 0, row_id)) {
392
+ if (!Insert(tree, keys[i], 0, row_id)) {
397
393
  // failed to insert because of constraint violation
398
394
  failed_index = i;
399
395
  break;
@@ -407,7 +403,7 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
407
403
  continue;
408
404
  }
409
405
  row_t row_id = row_identifiers[i];
410
- Erase(*tree, keys[i], 0, row_id);
406
+ Erase(tree, keys[i], 0, row_id);
411
407
  }
412
408
  }
413
409
 
@@ -422,9 +418,8 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
422
418
  continue;
423
419
  }
424
420
 
425
- auto leaf = Lookup(*tree, keys[i], 0);
426
- D_ASSERT(leaf.IsSet());
427
- D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
421
+ auto leaf = Lookup(tree, keys[i], 0);
422
+ D_ASSERT(Leaf::ContainsRowId(*this, *leaf, row_identifiers[i]));
428
423
  }
429
424
  #endif
430
425
 
@@ -465,7 +460,7 @@ bool ART::InsertToLeaf(Node &leaf, const row_t &row_id) {
465
460
  bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
466
461
 
467
462
  // node is currently empty, create a leaf here with the key
468
- if (!node.IsSet()) {
463
+ if (!node.HasMetadata()) {
469
464
  D_ASSERT(depth <= key.len);
470
465
  reference<Node> ref_node(node);
471
466
  Prefix::New(*this, ref_node, key, depth, key.len - depth);
@@ -482,7 +477,7 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
482
477
 
483
478
  if (node_type != NType::PREFIX) {
484
479
  D_ASSERT(depth < key.len);
485
- auto child = node.GetChild(*this, key[depth]);
480
+ auto child = node.GetChildMutable(*this, key[depth]);
486
481
 
487
482
  // recurse, if a child exists at key[depth]
488
483
  if (child) {
@@ -504,7 +499,7 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
504
499
 
505
500
  // this is a prefix node, traverse
506
501
  reference<Node> next_node(node);
507
- auto mismatch_position = Prefix::Traverse(*this, next_node, key, depth);
502
+ auto mismatch_position = Prefix::TraverseMutable(*this, next_node, key, depth);
508
503
 
509
504
  // prefix matches key
510
505
  if (next_node.get().GetType() != NType::PREFIX) {
@@ -533,9 +528,16 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
533
528
  }
534
529
 
535
530
  //===--------------------------------------------------------------------===//
536
- // Delete
531
+ // Drop and Delete
537
532
  //===--------------------------------------------------------------------===//
538
533
 
534
+ void ART::CommitDrop(IndexLock &index_lock) {
535
+ for (auto &allocator : *allocators) {
536
+ allocator->Reset();
537
+ }
538
+ tree.Clear();
539
+ }
540
+
539
541
  void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
540
542
 
541
543
  DataChunk expression;
@@ -557,7 +559,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
557
559
  if (keys[i].Empty()) {
558
560
  continue;
559
561
  }
560
- Erase(*tree, keys[i], 0, row_identifiers[i]);
562
+ Erase(tree, keys[i], 0, row_identifiers[i]);
561
563
  }
562
564
 
563
565
  #ifdef DEBUG
@@ -567,9 +569,9 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
567
569
  continue;
568
570
  }
569
571
 
570
- auto leaf = Lookup(*tree, keys[i], 0);
571
- if (leaf.IsSet()) {
572
- D_ASSERT(!Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
572
+ auto leaf = Lookup(tree, keys[i], 0);
573
+ if (leaf) {
574
+ D_ASSERT(!Leaf::ContainsRowId(*this, *leaf, row_identifiers[i]));
573
575
  }
574
576
  }
575
577
  #endif
@@ -577,14 +579,14 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
577
579
 
578
580
  void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
579
581
 
580
- if (!node.IsSet()) {
582
+ if (!node.HasMetadata()) {
581
583
  return;
582
584
  }
583
585
 
584
586
  // handle prefix
585
587
  reference<Node> next_node(node);
586
588
  if (next_node.get().GetType() == NType::PREFIX) {
587
- Prefix::Traverse(*this, next_node, key, depth);
589
+ Prefix::TraverseMutable(*this, next_node, key, depth);
588
590
  if (next_node.get().GetType() == NType::PREFIX) {
589
591
  return;
590
592
  }
@@ -599,14 +601,14 @@ void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id)
599
601
  }
600
602
 
601
603
  D_ASSERT(depth < key.len);
602
- auto child = next_node.get().GetChild(*this, key[depth]);
604
+ auto child = next_node.get().GetChildMutable(*this, key[depth]);
603
605
  if (child) {
604
- D_ASSERT(child->IsSet());
606
+ D_ASSERT(child->HasMetadata());
605
607
 
606
608
  auto temp_depth = depth + 1;
607
609
  reference<Node> child_node(*child);
608
610
  if (child_node.get().GetType() == NType::PREFIX) {
609
- Prefix::Traverse(*this, child_node, key, temp_depth);
611
+ Prefix::TraverseMutable(*this, child_node, key, temp_depth);
610
612
  if (child_node.get().GetType() == NType::PREFIX) {
611
613
  return;
612
614
  }
@@ -666,24 +668,24 @@ static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &val
666
668
 
667
669
  bool ART::SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids) {
668
670
 
669
- auto leaf = Lookup(*tree, key, 0);
670
- if (!leaf.IsSet()) {
671
+ auto leaf = Lookup(tree, key, 0);
672
+ if (!leaf) {
671
673
  return true;
672
674
  }
673
- return Leaf::GetRowIds(*this, leaf, result_ids, max_count);
675
+ return Leaf::GetRowIds(*this, *leaf, result_ids, max_count);
674
676
  }
675
677
 
676
678
  void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
677
679
 
678
680
  // we need to look for a leaf
679
- auto leaf_node = Lookup(*tree, key, 0);
680
- if (!leaf_node.IsSet()) {
681
+ auto leaf_node = Lookup(tree, key, 0);
682
+ if (!leaf_node) {
681
683
  result_size = 0;
682
684
  return;
683
685
  }
684
686
 
685
687
  // we only perform index joins on PK/FK columns
686
- D_ASSERT(leaf_node.GetType() == NType::LEAF_INLINED);
688
+ D_ASSERT(leaf_node->GetType() == NType::LEAF_INLINED);
687
689
  result_size = 1;
688
690
  return;
689
691
  }
@@ -692,37 +694,38 @@ void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
692
694
  // Lookup
693
695
  //===--------------------------------------------------------------------===//
694
696
 
695
- Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
697
+ optional_ptr<const Node> ART::Lookup(const Node &node, const ARTKey &key, idx_t depth) {
696
698
 
697
- while (node.IsSet()) {
699
+ reference<const Node> node_ref(node);
700
+ while (node_ref.get().HasMetadata()) {
698
701
 
699
702
  // traverse prefix, if exists
700
- reference<Node> next_node(node);
703
+ reference<const Node> next_node(node_ref.get());
701
704
  if (next_node.get().GetType() == NType::PREFIX) {
702
705
  Prefix::Traverse(*this, next_node, key, depth);
703
706
  if (next_node.get().GetType() == NType::PREFIX) {
704
- return Node();
707
+ return nullptr;
705
708
  }
706
709
  }
707
710
 
708
711
  if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) {
709
- return next_node.get();
712
+ return &next_node.get();
710
713
  }
711
714
 
712
715
  D_ASSERT(depth < key.len);
713
716
  auto child = next_node.get().GetChild(*this, key[depth]);
714
717
  if (!child) {
715
718
  // prefix matches key, but no child at byte, ART/subtree does not contain key
716
- return Node();
719
+ return nullptr;
717
720
  }
718
721
 
719
722
  // lookup in child node
720
- node = *child;
721
- D_ASSERT(node.IsSet());
723
+ node_ref = *child;
724
+ D_ASSERT(node_ref.get().HasMetadata());
722
725
  depth++;
723
726
  }
724
727
 
725
- return Node();
728
+ return nullptr;
726
729
  }
727
730
 
728
731
  //===--------------------------------------------------------------------===//
@@ -731,7 +734,7 @@ Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
731
734
 
732
735
  bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t max_count, vector<row_t> &result_ids) {
733
736
 
734
- if (!tree->IsSet()) {
737
+ if (!tree.HasMetadata()) {
735
738
  return true;
736
739
  }
737
740
  Iterator &it = state.iterator;
@@ -739,7 +742,7 @@ bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t
739
742
  // find the lowest value that satisfies the predicate
740
743
  if (!it.art) {
741
744
  it.art = this;
742
- if (!it.LowerBound(*tree, key, equal, 0)) {
745
+ if (!it.LowerBound(tree, key, equal, 0)) {
743
746
  // early-out, if the maximum value in the ART is lower than the lower bound
744
747
  return true;
745
748
  }
@@ -754,7 +757,7 @@ bool ART::SearchGreater(ARTIndexScanState &state, ARTKey &key, bool equal, idx_t
754
757
  bool ART::SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool equal, idx_t max_count,
755
758
  vector<row_t> &result_ids) {
756
759
 
757
- if (!tree->IsSet()) {
760
+ if (!tree.HasMetadata()) {
758
761
  return true;
759
762
  }
760
763
  Iterator &it = state.iterator;
@@ -762,7 +765,7 @@ bool ART::SearchLess(ARTIndexScanState &state, ARTKey &upper_bound, bool equal,
762
765
  if (!it.art) {
763
766
  it.art = this;
764
767
  // find the minimum value in the ART: we start scanning from this value
765
- it.FindMinimum(*tree);
768
+ it.FindMinimum(tree);
766
769
  // early-out, if the minimum value is higher than the upper bound
767
770
  if (it.current_key > upper_bound) {
768
771
  return true;
@@ -785,7 +788,7 @@ bool ART::SearchCloseRange(ARTIndexScanState &state, ARTKey &lower_bound, ARTKey
785
788
  // find the first node that satisfies the left predicate
786
789
  if (!it.art) {
787
790
  it.art = this;
788
- if (!it.LowerBound(*tree, lower_bound, left_equal, 0)) {
791
+ if (!it.LowerBound(tree, lower_bound, left_equal, 0)) {
789
792
  // early-out, if the maximum value in the ART is lower than the lower bound
790
793
  return true;
791
794
  }
@@ -940,8 +943,8 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
940
943
  continue;
941
944
  }
942
945
 
943
- auto leaf = Lookup(*tree, keys[i], 0);
944
- if (!leaf.IsSet()) {
946
+ auto leaf = Lookup(tree, keys[i], 0);
947
+ if (!leaf) {
945
948
  if (conflict_manager.AddMiss(i)) {
946
949
  found_conflict = i;
947
950
  }
@@ -950,8 +953,8 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
950
953
 
951
954
  // when we find a node, we need to update the 'matches' and 'row_ids'
952
955
  // NOTE: leaves can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one
953
- D_ASSERT(leaf.GetType() == NType::LEAF_INLINED);
954
- if (conflict_manager.AddHit(i, leaf.GetRowId())) {
956
+ D_ASSERT(leaf->GetType() == NType::LEAF_INLINED);
957
+ if (conflict_manager.AddHit(i, leaf->GetRowId())) {
955
958
  found_conflict = i;
956
959
  }
957
960
  }
@@ -973,14 +976,38 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
973
976
 
974
977
  BlockPointer ART::Serialize(MetadataWriter &writer) {
975
978
 
979
+ D_ASSERT(owns_data);
980
+
981
+ // early-out, if all allocators are empty
982
+ if (!tree.HasMetadata()) {
983
+ root_block_pointer = BlockPointer();
984
+ return root_block_pointer;
985
+ }
986
+
976
987
  lock_guard<mutex> l(lock);
977
- if (tree->IsSet()) {
978
- serialized_data_pointer = tree->Serialize(*this, writer);
979
- } else {
980
- serialized_data_pointer = BlockPointer();
988
+ vector<BlockPointer> allocator_pointers;
989
+ for (auto &allocator : *allocators) {
990
+ allocator_pointers.push_back(allocator->Serialize(writer));
981
991
  }
982
992
 
983
- return serialized_data_pointer;
993
+ root_block_pointer = writer.GetBlockPointer();
994
+ writer.Write(tree);
995
+ for (auto &allocator_pointer : allocator_pointers) {
996
+ writer.Write(allocator_pointer);
997
+ }
998
+
999
+ return root_block_pointer;
1000
+ }
1001
+
1002
+ void ART::Deserialize(const BlockPointer &pointer) {
1003
+
1004
+ D_ASSERT(pointer.IsValid());
1005
+ MetadataReader reader(table_io_manager.GetMetadataManager(), pointer);
1006
+ tree = reader.Read<Node>();
1007
+
1008
+ for (idx_t i = 0; i < ALLOCATOR_COUNT; i++) {
1009
+ (*allocators)[i]->Deserialize(reader.Read<BlockPointer>());
1010
+ }
984
1011
  }
985
1012
 
986
1013
  //===--------------------------------------------------------------------===//
@@ -991,7 +1018,7 @@ void ART::InitializeVacuum(ARTFlags &flags) {
991
1018
 
992
1019
  flags.vacuum_flags.reserve(allocators->size());
993
1020
  for (auto &allocator : *allocators) {
994
- flags.vacuum_flags.push_back(allocator.InitializeVacuum());
1021
+ flags.vacuum_flags.push_back(allocator->InitializeVacuum());
995
1022
  }
996
1023
  }
997
1024
 
@@ -999,7 +1026,7 @@ void ART::FinalizeVacuum(const ARTFlags &flags) {
999
1026
 
1000
1027
  for (idx_t i = 0; i < allocators->size(); i++) {
1001
1028
  if (flags.vacuum_flags[i]) {
1002
- (*allocators)[i].FinalizeVacuum();
1029
+ (*allocators)[i]->FinalizeVacuum();
1003
1030
  }
1004
1031
  }
1005
1032
  }
@@ -1008,9 +1035,9 @@ void ART::Vacuum(IndexLock &state) {
1008
1035
 
1009
1036
  D_ASSERT(owns_data);
1010
1037
 
1011
- if (!tree->IsSet()) {
1038
+ if (!tree.HasMetadata()) {
1012
1039
  for (auto &allocator : *allocators) {
1013
- allocator.Reset();
1040
+ allocator->Reset();
1014
1041
  }
1015
1042
  return;
1016
1043
  }
@@ -1032,14 +1059,10 @@ void ART::Vacuum(IndexLock &state) {
1032
1059
  }
1033
1060
 
1034
1061
  // traverse the allocated memory of the tree to perform a vacuum
1035
- tree->Vacuum(*this, flags);
1062
+ tree.Vacuum(*this, flags);
1036
1063
 
1037
1064
  // finalize the vacuum operation
1038
1065
  FinalizeVacuum(flags);
1039
-
1040
- for (auto &allocator : *allocators) {
1041
- allocator.Verify();
1042
- }
1043
1066
  }
1044
1067
 
1045
1068
  //===--------------------------------------------------------------------===//
@@ -1052,39 +1075,35 @@ void ART::InitializeMerge(ARTFlags &flags) {
1052
1075
 
1053
1076
  flags.merge_buffer_counts.reserve(allocators->size());
1054
1077
  for (auto &allocator : *allocators) {
1055
- flags.merge_buffer_counts.emplace_back(allocator.buffers.size());
1078
+ flags.merge_buffer_counts.emplace_back(allocator->GetUpperBoundBufferId());
1056
1079
  }
1057
1080
  }
1058
1081
 
1059
1082
  bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
1060
1083
 
1061
1084
  auto &other_art = other_index.Cast<ART>();
1062
- if (!other_art.tree->IsSet()) {
1085
+ if (!other_art.tree.HasMetadata()) {
1063
1086
  return true;
1064
1087
  }
1065
1088
 
1066
1089
  if (other_art.owns_data) {
1067
- if (tree->IsSet()) {
1090
+ if (tree.HasMetadata()) {
1068
1091
  // fully deserialize other_index, and traverse it to increment its buffer IDs
1069
1092
  ARTFlags flags;
1070
1093
  InitializeMerge(flags);
1071
- other_art.tree->InitializeMerge(other_art, flags);
1094
+ other_art.tree.InitializeMerge(other_art, flags);
1072
1095
  }
1073
1096
 
1074
1097
  // merge the node storage
1075
1098
  for (idx_t i = 0; i < allocators->size(); i++) {
1076
- (*allocators)[i].Merge((*other_art.allocators)[i]);
1099
+ (*allocators)[i]->Merge(*(*other_art.allocators)[i]);
1077
1100
  }
1078
1101
  }
1079
1102
 
1080
1103
  // merge the ARTs
1081
- if (!tree->Merge(*this, *other_art.tree)) {
1104
+ if (!tree.Merge(*this, other_art.tree)) {
1082
1105
  return false;
1083
1106
  }
1084
-
1085
- for (auto &allocator : *allocators) {
1086
- allocator.Verify();
1087
- }
1088
1107
  return true;
1089
1108
  }
1090
1109
 
@@ -1100,8 +1119,8 @@ string ART::VerifyAndToString(IndexLock &state, const bool only_verify) {
1100
1119
  }
1101
1120
 
1102
1121
  string ART::VerifyAndToStringInternal(const bool only_verify) {
1103
- if (tree->IsSet()) {
1104
- return "ART: " + tree->VerifyAndToString(*this, only_verify);
1122
+ if (tree.HasMetadata()) {
1123
+ return "ART: " + tree.VerifyAndToString(*this, only_verify);
1105
1124
  }
1106
1125
  return "[empty]";
1107
1126
  }