duckdb 0.7.2-dev2552.0 → 0.7.2-dev2675.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  6. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  7. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  8. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  9. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  10. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  11. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  12. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  13. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  14. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  15. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  16. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  17. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  18. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  19. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  20. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  22. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  23. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  24. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  25. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  26. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  27. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  28. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  29. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  30. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  31. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  32. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  33. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  34. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  39. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  40. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  41. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  42. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  44. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  45. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  46. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  47. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  48. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  49. package/src/duckdb/src/include/duckdb.h +26 -0
  50. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  51. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  52. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  53. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  54. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  55. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  56. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  57. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  58. package/src/duckdb/src/storage/data_table.cpp +6 -3
  59. package/src/duckdb/src/storage/index.cpp +18 -6
  60. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  61. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  62. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  63. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  64. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  65. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  66. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,528 +1,593 @@
1
- #include "duckdb/execution/index/art/node.hpp"
2
-
3
- #include "duckdb/common/exception.hpp"
1
+ #include "duckdb/common/limits.hpp"
4
2
  #include "duckdb/common/swap.hpp"
5
3
  #include "duckdb/execution/index/art/art.hpp"
6
- #include "duckdb/execution/index/art/swizzleable_pointer.hpp"
7
- #include "duckdb/storage/storage_manager.hpp"
4
+ #include "duckdb/execution/index/art/leaf.hpp"
5
+ #include "duckdb/execution/index/art/leaf_segment.hpp"
6
+ #include "duckdb/execution/index/art/node.hpp"
7
+ #include "duckdb/execution/index/art/node16.hpp"
8
+ #include "duckdb/execution/index/art/node256.hpp"
9
+ #include "duckdb/execution/index/art/node4.hpp"
10
+ #include "duckdb/execution/index/art/node48.hpp"
11
+ #include "duckdb/execution/index/art/prefix.hpp"
12
+ #include "duckdb/execution/index/art/prefix_segment.hpp"
13
+ #include "duckdb/storage/meta_block_reader.hpp"
14
+ #include "duckdb/storage/meta_block_writer.hpp"
15
+ #include "duckdb/storage/table_io_manager.hpp"
8
16
 
9
17
  namespace duckdb {
10
18
 
11
- InternalType::InternalType(Node *n) {
12
- switch (n->type) {
13
- case NodeType::N4: {
14
- auto n4 = (Node4 *)n;
15
- Set(n4->key, 4, n4->children, 4);
16
- break;
17
- }
18
- case NodeType::N16: {
19
- auto n16 = (Node16 *)n;
20
- Set(n16->key, 16, n16->children, 16);
21
- break;
22
- }
23
- case NodeType::N48: {
24
- auto n48 = (Node48 *)n;
25
- Set(n48->child_index, 256, n48->children, 48);
26
- break;
27
- }
28
- case NodeType::N256: {
29
- auto n256 = (Node256 *)n;
30
- Set(nullptr, 0, n256->children, 256);
31
- break;
32
- }
33
- default:
34
- throw InternalException("This is not an Internal ART Node Type");
35
- }
36
- }
37
-
38
- void InternalType::Set(uint8_t *key_p, uint16_t key_size_p, ARTPointer *children_p, uint16_t children_size_p) {
39
- key = key_p;
40
- key_size = key_size_p;
41
- children = children_p;
42
- children_size = children_size_p;
43
- }
44
-
45
- Node::Node(NodeType type) : count(0), type(type) {
46
- }
47
-
48
- // LCOV_EXCL_START
49
- idx_t Node::MemorySize(ART &, const bool &) {
50
- throw InternalException("MemorySize not implemented for the specific node type.");
51
- }
52
-
53
- idx_t Node::GetMin() {
54
- throw InternalException("GetMin not implemented for the specific node type.");
55
- }
19
+ //===--------------------------------------------------------------------===//
20
+ // Constructors / Destructors
21
+ //===--------------------------------------------------------------------===//
56
22
 
57
- Node *Node::GetChild(ART &art, idx_t pos) {
58
- throw InternalException("GetChild not implemented for the specific node type.");
23
+ Node::Node() : SwizzleablePointer() {
59
24
  }
60
25
 
61
- void Node::ReplaceChildPointer(idx_t pos, Node *node) {
62
- throw InternalException("ReplaceChildPointer not implemented for the specific node type.");
26
+ Node::Node(MetaBlockReader &reader) : SwizzleablePointer(reader) {
63
27
  }
64
28
 
65
- bool Node::ChildIsInMemory(idx_t) {
66
- throw InternalException("ChildIsInMemory not implemented for the specific node type.");
67
- }
68
- // LCOV_EXCL_STOP
29
+ void Node::New(ART &art, Node &node, const NType type) {
69
30
 
70
- void Node::InsertChild(ART &art, Node *&node, uint8_t key_byte, Node *new_child) {
71
- switch (node->type) {
72
- case NodeType::N4:
73
- Node4::InsertChild(art, node, key_byte, new_child);
74
- break;
75
- case NodeType::N16:
76
- Node16::InsertChild(art, node, key_byte, new_child);
77
- break;
78
- case NodeType::N48:
79
- Node48::InsertChild(art, node, key_byte, new_child);
31
+ switch (type) {
32
+ case NType::PREFIX_SEGMENT:
33
+ PrefixSegment::New(art, node);
80
34
  break;
81
- case NodeType::N256:
82
- Node256::InsertChild(art, node, key_byte, new_child);
35
+ case NType::LEAF_SEGMENT:
36
+ LeafSegment::New(art, node);
83
37
  break;
84
- default:
85
- throw InternalException("Unrecognized node type for insert.");
86
- }
87
- }
88
-
89
- void Node::EraseChild(ART &art, Node *&node, idx_t pos) {
90
- switch (node->type) {
91
- case NodeType::N4: {
92
- Node4::EraseChild(art, node, pos);
38
+ case NType::NODE_4:
39
+ Node4::New(art, node);
93
40
  break;
94
- }
95
- case NodeType::N16: {
96
- Node16::EraseChild(art, node, pos);
41
+ case NType::NODE_16:
42
+ Node16::New(art, node);
97
43
  break;
98
- }
99
- case NodeType::N48: {
100
- Node48::EraseChild(art, node, pos);
44
+ case NType::NODE_48:
45
+ Node48::New(art, node);
101
46
  break;
102
- }
103
- case NodeType::N256:
104
- Node256::EraseChild(art, node, pos);
47
+ case NType::NODE_256:
48
+ Node256::New(art, node);
105
49
  break;
106
50
  default:
107
- throw InternalException("Unrecognized node type for erase.");
51
+ throw InternalException("Invalid node type for New.");
108
52
  }
109
53
  }
110
54
 
111
- NodeType Node::GetTypeBySize(idx_t size) {
55
+ void Node::Free(ART &art, Node &node) {
112
56
 
113
- if (size <= Node4::GetSize()) {
114
- return NodeType::N4;
115
- } else if (size <= Node16::GetSize()) {
116
- return NodeType::N16;
117
- } else if (size <= Node48::GetSize()) {
118
- return NodeType::N48;
119
- }
120
- D_ASSERT(size <= Node256::GetSize());
121
- return NodeType::N256;
122
- }
57
+ // recursively free all nodes that are in-memory, and skip swizzled and empty nodes
123
58
 
124
- void Node::New(const NodeType &type, Node *&node) {
125
- switch (type) {
126
- case NodeType::N4:
127
- node = (Node *)Node4::New();
59
+ if (!node.IsSet()) {
128
60
  return;
129
- case NodeType::N16:
130
- node = (Node *)Node16::New();
131
- return;
132
- case NodeType::N48:
133
- node = (Node *)Node48::New();
134
- return;
135
- case NodeType::N256:
136
- node = (Node *)Node256::New();
137
- return;
138
- default:
139
- throw InternalException("Unrecognized node type for new node creation.");
140
61
  }
141
- }
142
62
 
143
- Node4 *Node4::New() {
144
- return AllocateObject<Node4>();
145
- }
63
+ if (!node.IsSwizzled()) {
146
64
 
147
- Node16 *Node16::New() {
148
- return AllocateObject<Node16>();
149
- }
65
+ auto type = node.DecodeARTNodeType();
66
+ if (type != NType::PREFIX_SEGMENT && type != NType::LEAF_SEGMENT) {
67
+ node.GetPrefix(art).Free(art);
68
+ }
150
69
 
151
- Node48 *Node48::New() {
152
- return AllocateObject<Node48>();
153
- }
70
+ // free the prefixes and children of the nodes
71
+ switch (type) {
72
+ case NType::LEAF:
73
+ Leaf::Free(art, node);
74
+ break;
75
+ case NType::NODE_4:
76
+ Node4::Free(art, node);
77
+ break;
78
+ case NType::NODE_16:
79
+ Node16::Free(art, node);
80
+ break;
81
+ case NType::NODE_48:
82
+ Node48::Free(art, node);
83
+ break;
84
+ case NType::NODE_256:
85
+ Node256::Free(art, node);
86
+ break;
87
+ default:
88
+ break;
89
+ }
154
90
 
155
- Node256 *Node256::New() {
156
- return AllocateObject<Node256>();
157
- }
91
+ Node::GetAllocator(art, type).Free(node);
92
+ }
158
93
 
159
- Leaf *Leaf::New() {
160
- return AllocateObject<Leaf>();
94
+ // overwrite with an empty ART node
95
+ node.Reset();
161
96
  }
162
97
 
163
- Leaf *Leaf::New(Key &value, uint32_t depth, row_t row_id) {
164
- return AllocateObject<Leaf>(value, depth, row_id);
165
- }
98
+ //===--------------------------------------------------------------------===//
99
+ // Inserts
100
+ //===--------------------------------------------------------------------===//
101
+
102
+ void Node::ReplaceChild(const ART &art, const uint8_t byte, const Node child) {
166
103
 
167
- Leaf *Leaf::New(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements) {
168
- return AllocateObject<Leaf>(value, depth, row_ids, num_elements);
104
+ D_ASSERT(!IsSwizzled());
105
+
106
+ switch (DecodeARTNodeType()) {
107
+ case NType::NODE_4:
108
+ return Node4::Get(art, *this).ReplaceChild(byte, child);
109
+ case NType::NODE_16:
110
+ return Node16::Get(art, *this).ReplaceChild(byte, child);
111
+ case NType::NODE_48:
112
+ return Node48::Get(art, *this).ReplaceChild(byte, child);
113
+ case NType::NODE_256:
114
+ return Node256::Get(art, *this).ReplaceChild(byte, child);
115
+ default:
116
+ throw InternalException("Invalid node type for ReplaceChild.");
117
+ }
169
118
  }
170
119
 
171
- Leaf *Leaf::New(row_t *row_ids, idx_t num_elements, Prefix &prefix) {
172
- return AllocateObject<Leaf>(row_ids, num_elements, prefix);
120
+ void Node::InsertChild(ART &art, Node &node, const uint8_t byte, const Node child) {
121
+
122
+ switch (node.DecodeARTNodeType()) {
123
+ case NType::NODE_4:
124
+ return Node4::InsertChild(art, node, byte, child);
125
+ case NType::NODE_16:
126
+ return Node16::InsertChild(art, node, byte, child);
127
+ case NType::NODE_48:
128
+ return Node48::InsertChild(art, node, byte, child);
129
+ case NType::NODE_256:
130
+ return Node256::InsertChild(art, node, byte, child);
131
+ default:
132
+ throw InternalException("Invalid node type for InsertChild.");
133
+ }
173
134
  }
174
135
 
175
- Leaf *Leaf::New(row_t row_id, Prefix &prefix) {
176
- return AllocateObject<Leaf>(row_id, prefix);
136
+ //===--------------------------------------------------------------------===//
137
+ // Deletes
138
+ //===--------------------------------------------------------------------===//
139
+
140
+ void Node::DeleteChild(ART &art, Node &node, const uint8_t byte) {
141
+
142
+ switch (node.DecodeARTNodeType()) {
143
+ case NType::NODE_4:
144
+ return Node4::DeleteChild(art, node, byte);
145
+ case NType::NODE_16:
146
+ return Node16::DeleteChild(art, node, byte);
147
+ case NType::NODE_48:
148
+ return Node48::DeleteChild(art, node, byte);
149
+ case NType::NODE_256:
150
+ return Node256::DeleteChild(art, node, byte);
151
+ default:
152
+ throw InternalException("Invalid node type for DeleteChild.");
153
+ }
177
154
  }
178
155
 
179
- void Node::Delete(Node *ptr) {
180
- switch (ptr->type) {
181
- case NodeType::NLeaf:
182
- DestroyObject((Leaf *)ptr);
183
- break;
184
- case NodeType::N4:
185
- DestroyObject((Node4 *)ptr);
156
+ //===--------------------------------------------------------------------===//
157
+ // Get functions
158
+ //===--------------------------------------------------------------------===//
159
+
160
+ optional_ptr<Node> Node::GetChild(ART &art, const uint8_t byte) const {
161
+
162
+ D_ASSERT(!IsSwizzled());
163
+
164
+ optional_ptr<Node> child;
165
+ switch (DecodeARTNodeType()) {
166
+ case NType::NODE_4: {
167
+ child = Node4::Get(art, *this).GetChild(byte);
186
168
  break;
187
- case NodeType::N16:
188
- DestroyObject((Node16 *)ptr);
169
+ }
170
+ case NType::NODE_16: {
171
+ child = Node16::Get(art, *this).GetChild(byte);
189
172
  break;
190
- case NodeType::N48:
191
- DestroyObject((Node48 *)ptr);
173
+ }
174
+ case NType::NODE_48: {
175
+ child = Node48::Get(art, *this).GetChild(byte);
192
176
  break;
193
- case NodeType::N256:
194
- DestroyObject((Node256 *)ptr);
177
+ }
178
+ case NType::NODE_256: {
179
+ child = Node256::Get(art, *this).GetChild(byte);
195
180
  break;
181
+ }
196
182
  default:
197
- throw InternalException("Invalid node type for delete.");
183
+ throw InternalException("Invalid node type for GetChild.");
184
+ }
185
+
186
+ // unswizzle the ART node before returning it
187
+ if (child && child->IsSwizzled()) {
188
+ child->Deserialize(art);
198
189
  }
190
+ return child;
199
191
  }
200
192
 
201
- string Node::ToString(ART &art) {
193
+ optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte) const {
202
194
 
203
- string str = "Node";
204
- switch (this->type) {
205
- case NodeType::NLeaf:
206
- return Leaf::ToString(this);
207
- case NodeType::N4:
208
- str += to_string(Node4::GetSize());
195
+ D_ASSERT(!IsSwizzled());
196
+
197
+ optional_ptr<Node> child;
198
+ switch (DecodeARTNodeType()) {
199
+ case NType::NODE_4: {
200
+ child = Node4::Get(art, *this).GetNextChild(byte);
209
201
  break;
210
- case NodeType::N16:
211
- str += to_string(Node16::GetSize());
202
+ }
203
+ case NType::NODE_16: {
204
+ child = Node16::Get(art, *this).GetNextChild(byte);
212
205
  break;
213
- case NodeType::N48:
214
- str += to_string(Node48::GetSize());
206
+ }
207
+ case NType::NODE_48: {
208
+ child = Node48::Get(art, *this).GetNextChild(byte);
215
209
  break;
216
- case NodeType::N256:
217
- str += to_string(Node256::GetSize());
210
+ }
211
+ case NType::NODE_256: {
212
+ child = Node256::Get(art, *this).GetNextChild(byte);
218
213
  break;
219
214
  }
215
+ default:
216
+ throw InternalException("Invalid node type for GetNextChild.");
217
+ }
220
218
 
221
- str += ": [";
222
- auto next_pos = GetNextPos(DConstants::INVALID_INDEX);
223
- while (next_pos != DConstants::INVALID_INDEX) {
224
- auto child = GetChild(art, next_pos);
225
- str += "(" + to_string(next_pos) + ", " + child->ToString(art) + ")";
226
- next_pos = GetNextPos(next_pos);
219
+ // unswizzle the ART node before returning it
220
+ if (child && child->IsSwizzled()) {
221
+ child->Deserialize(art);
227
222
  }
228
- return str + "]";
223
+ return child;
229
224
  }
230
225
 
231
- BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer, InternalType &internal_type) {
226
+ //===--------------------------------------------------------------------===//
227
+ // (De)serialization
228
+ //===--------------------------------------------------------------------===//
232
229
 
233
- // iterate through children and annotate their offsets
234
- vector<BlockPointer> child_offsets;
235
- for (idx_t i = 0; i < internal_type.children_size; i++) {
236
- child_offsets.emplace_back(internal_type.children[i].Serialize(art, writer));
237
- }
238
- auto ptr = writer.GetBlockPointer();
230
+ BlockPointer Node::Serialize(ART &art, MetaBlockWriter &writer) {
239
231
 
240
- writer.Write(type);
241
- writer.Write<uint16_t>(count);
242
- prefix.Serialize(writer);
232
+ if (!IsSet()) {
233
+ return {(block_id_t)DConstants::INVALID_INDEX, 0};
234
+ }
243
235
 
244
- // write key values
245
- for (idx_t i = 0; i < internal_type.key_size; i++) {
246
- writer.Write(internal_type.key[i]);
236
+ if (IsSwizzled()) {
237
+ Deserialize(art);
247
238
  }
248
239
 
249
- // write child offsets
250
- for (auto &offsets : child_offsets) {
251
- writer.Write(offsets.block_id);
252
- writer.Write(offsets.offset);
240
+ switch (DecodeARTNodeType()) {
241
+ case NType::LEAF:
242
+ return Leaf::Get(art, *this).Serialize(art, writer);
243
+ case NType::NODE_4:
244
+ return Node4::Get(art, *this).Serialize(art, writer);
245
+ case NType::NODE_16:
246
+ return Node16::Get(art, *this).Serialize(art, writer);
247
+ case NType::NODE_48:
248
+ return Node48::Get(art, *this).Serialize(art, writer);
249
+ case NType::NODE_256:
250
+ return Node256::Get(art, *this).Serialize(art, writer);
251
+ default:
252
+ throw InternalException("Invalid node type for Serialize.");
253
253
  }
254
- return ptr;
255
254
  }
256
255
 
257
- BlockPointer Node::Serialize(ART &art, duckdb::MetaBlockWriter &writer) {
256
+ void Node::Deserialize(ART &art) {
257
+
258
+ MetaBlockReader reader(art.table_io_manager.GetIndexBlockManager(), buffer_id);
259
+ reader.offset = offset;
260
+ type = reader.Read<uint8_t>();
261
+ swizzle_flag = 0;
262
+
263
+ auto type = DecodeARTNodeType();
264
+ SetPtr(Node::GetAllocator(art, type).New());
258
265
 
259
266
  switch (type) {
260
- case NodeType::N4:
261
- case NodeType::N16:
262
- case NodeType::N48:
263
- case NodeType::N256: {
264
- InternalType internal_type(this);
265
- return SerializeInternal(art, writer, internal_type);
266
- }
267
- case NodeType::NLeaf: {
268
- auto leaf = (Leaf *)this;
269
- return leaf->Serialize(writer);
270
- }
267
+ case NType::LEAF:
268
+ return Leaf::Get(art, *this).Deserialize(art, reader);
269
+ case NType::NODE_4:
270
+ return Node4::Get(art, *this).Deserialize(art, reader);
271
+ case NType::NODE_16:
272
+ return Node16::Get(art, *this).Deserialize(art, reader);
273
+ case NType::NODE_48:
274
+ return Node48::Get(art, *this).Deserialize(art, reader);
275
+ case NType::NODE_256:
276
+ return Node256::Get(art, *this).Deserialize(art, reader);
271
277
  default:
272
- throw InternalException("Invalid ART node for serialize.");
278
+ throw InternalException("Invalid node type for Deserialize.");
273
279
  }
274
280
  }
275
281
 
276
- void Node::DeserializeInternal(ART &art, duckdb::MetaBlockReader &reader) {
282
+ //===--------------------------------------------------------------------===//
283
+ // Utility
284
+ //===--------------------------------------------------------------------===//
277
285
 
278
- InternalType internal_type(this);
279
- count = reader.Read<uint16_t>();
280
- prefix.Deserialize(reader);
286
+ string Node::ToString(ART &art) const {
281
287
 
282
- // read key values
283
- for (idx_t i = 0; i < internal_type.key_size; i++) {
284
- internal_type.key[i] = reader.Read<uint8_t>();
288
+ D_ASSERT(!IsSwizzled());
289
+
290
+ if (DecodeARTNodeType() == NType::LEAF) {
291
+ return Leaf::Get(art, *this).ToString(art);
285
292
  }
286
293
 
287
- // read child offsets
288
- for (idx_t i = 0; i < internal_type.children_size; i++) {
289
- internal_type.children[i] = ARTPointer(reader);
294
+ string str = "Node" + to_string(GetCapacity()) + ": [";
295
+
296
+ uint8_t byte = 0;
297
+ auto child = GetNextChild(art, byte);
298
+ while (child) {
299
+ str += "(" + to_string(byte) + ", " + child->ToString(art) + ")";
300
+ if (byte == NumericLimits<uint8_t>::Maximum()) {
301
+ break;
302
+ }
303
+ byte++;
304
+ child = GetNextChild(art, byte);
290
305
  }
291
- }
292
306
 
293
- Node *Node::Deserialize(ART &art, idx_t block_id, idx_t offset) {
307
+ return str + "]";
308
+ }
294
309
 
295
- MetaBlockReader reader(art.table_io_manager.GetIndexBlockManager(), block_id);
296
- reader.offset = offset;
310
+ idx_t Node::GetCapacity() const {
297
311
 
298
- auto n = reader.Read<uint8_t>();
299
- NodeType node_type((NodeType)(n));
312
+ D_ASSERT(!IsSwizzled());
300
313
 
301
- Node *deserialized_node = nullptr;
302
- switch (node_type) {
303
- case NodeType::NLeaf: {
304
- auto leaf = Leaf::New();
305
- leaf->Deserialize(art, reader);
306
- art.IncreaseMemorySize(leaf->MemorySize(art, false));
307
- return leaf;
308
- }
309
- case NodeType::N4: {
310
- deserialized_node = (Node *)Node4::New();
311
- break;
312
- }
313
- case NodeType::N16: {
314
- deserialized_node = (Node *)Node16::New();
315
- break;
316
- }
317
- case NodeType::N48: {
318
- deserialized_node = (Node *)Node48::New();
319
- break;
320
- }
321
- case NodeType::N256: {
322
- deserialized_node = (Node *)Node256::New();
323
- break;
324
- }
314
+ switch (DecodeARTNodeType()) {
315
+ case NType::NODE_4:
316
+ return Node::NODE_4_CAPACITY;
317
+ case NType::NODE_16:
318
+ return Node::NODE_16_CAPACITY;
319
+ case NType::NODE_48:
320
+ return Node::NODE_48_CAPACITY;
321
+ case NType::NODE_256:
322
+ return Node::NODE_256_CAPACITY;
325
323
  default:
326
- throw InternalException("Unrecognized node type");
324
+ throw InternalException("Invalid node type for GetCapacity.");
327
325
  }
328
- deserialized_node->DeserializeInternal(art, reader);
329
- art.IncreaseMemorySize(deserialized_node->MemorySize(art, false));
330
- return deserialized_node;
331
326
  }
332
327
 
333
- void UpdateParentsOfNodes(Node *&l_node, Node *&r_node, ParentsOfNodes &parents) {
334
- if (parents.l_parent) {
335
- parents.l_parent->ReplaceChildPointer(parents.l_pos, l_node);
328
+ Prefix &Node::GetPrefix(ART &art) {
329
+
330
+ if (IsSwizzled()) {
331
+ Deserialize(art);
336
332
  }
337
- if (parents.r_parent) {
338
- parents.r_parent->ReplaceChildPointer(parents.r_pos, r_node);
333
+
334
+ switch (DecodeARTNodeType()) {
335
+ case NType::LEAF:
336
+ return Leaf::Get(art, *this).prefix;
337
+ case NType::NODE_4:
338
+ return Node4::Get(art, *this).prefix;
339
+ case NType::NODE_16:
340
+ return Node16::Get(art, *this).prefix;
341
+ case NType::NODE_48:
342
+ return Node48::Get(art, *this).prefix;
343
+ case NType::NODE_256:
344
+ return Node256::Get(art, *this).prefix;
345
+ default:
346
+ throw InternalException("Invalid node type for GetPrefix.");
339
347
  }
340
348
  }
341
349
 
342
- // forward declaration
343
- bool ResolvePrefixesAndMerge(MergeInfo &info, ParentsOfNodes &parents);
350
+ NType Node::GetARTNodeTypeByCount(const idx_t count) {
344
351
 
345
- void SwapNodes(MergeInfo &info, ParentsOfNodes &parents) {
346
- // adjust the memory sizes
347
- auto l_node_memory_size = info.l_node->MemorySize(*info.l_art, true);
348
- auto r_node_memory_size = info.r_node->MemorySize(*info.r_art, true);
349
-
350
- info.root_l_art->DecreaseMemorySize(l_node_memory_size);
351
- info.root_r_art->DecreaseMemorySize(r_node_memory_size);
352
- info.root_l_art->IncreaseMemorySize(r_node_memory_size);
353
- info.root_r_art->IncreaseMemorySize(l_node_memory_size);
352
+ if (count <= NODE_4_CAPACITY) {
353
+ return NType::NODE_4;
354
+ } else if (count <= NODE_16_CAPACITY) {
355
+ return NType::NODE_16;
356
+ } else if (count <= NODE_48_CAPACITY) {
357
+ return NType::NODE_48;
358
+ }
359
+ return NType::NODE_256;
360
+ }
354
361
 
355
- // actual swap
356
- swap(info.l_art, info.r_art);
357
- swap(info.l_node, info.r_node);
358
- UpdateParentsOfNodes(info.l_node, info.r_node, parents);
362
+ FixedSizeAllocator &Node::GetAllocator(const ART &art, NType type) {
363
+ return *art.allocators[(uint8_t)type - 1];
359
364
  }
360
365
 
361
- bool Merge(MergeInfo &info, ParentsOfNodes &parents) {
366
+ //===--------------------------------------------------------------------===//
367
+ // Merging
368
+ //===--------------------------------------------------------------------===//
362
369
 
363
- D_ASSERT(info.l_node);
364
- D_ASSERT(info.r_node);
370
+ void Node::InitializeMerge(ART &art, const ARTFlags &flags) {
365
371
 
366
- // always try to merge the smaller node into the bigger node
367
- // because maybe there is enough free space in the bigger node to fit the smaller one
368
- // without too much recursion
372
+ if (!IsSet()) {
373
+ return;
374
+ }
369
375
 
370
- if (info.l_node->type < info.r_node->type) {
371
- // swap subtrees to ensure that l_node has the bigger node type
372
- SwapNodes(info, parents);
376
+ if (IsSwizzled()) {
377
+ Deserialize(art);
373
378
  }
374
379
 
375
- if (info.r_node->type == NodeType::NLeaf) {
376
- D_ASSERT(info.l_node->type == NodeType::NLeaf);
377
- D_ASSERT(info.r_node->type == NodeType::NLeaf);
378
- if (info.l_art->IsUnique()) {
379
- return false;
380
+ // if not all prefixes are inlined
381
+ if (flags.merge_buffer_counts[(uint8_t)NType::PREFIX_SEGMENT - 1] != 0) {
382
+ // initialize prefix segments
383
+ GetPrefix(art).InitializeMerge(art, flags.merge_buffer_counts[(uint8_t)NType::PREFIX_SEGMENT - 1]);
384
+ }
385
+
386
+ auto type = DecodeARTNodeType();
387
+ switch (type) {
388
+ case NType::LEAF:
389
+ // if not all leaves are inlined
390
+ if (flags.merge_buffer_counts[(uint8_t)NType::LEAF_SEGMENT - 1] != 0) {
391
+ // initialize leaf segments
392
+ Leaf::Get(art, *this).InitializeMerge(art, flags.merge_buffer_counts[(uint8_t)NType::LEAF_SEGMENT - 1]);
380
393
  }
381
- Leaf::Merge(*info.root_l_art, info.l_node, info.r_node);
382
- return true;
394
+ break;
395
+ case NType::NODE_4:
396
+ Node4::Get(art, *this).InitializeMerge(art, flags);
397
+ break;
398
+ case NType::NODE_16:
399
+ Node16::Get(art, *this).InitializeMerge(art, flags);
400
+ break;
401
+ case NType::NODE_48:
402
+ Node48::Get(art, *this).InitializeMerge(art, flags);
403
+ break;
404
+ case NType::NODE_256:
405
+ Node256::Get(art, *this).InitializeMerge(art, flags);
406
+ break;
407
+ default:
408
+ throw InternalException("Invalid node type for InitializeMerge.");
383
409
  }
384
410
 
385
- uint8_t key_byte;
386
- idx_t r_child_pos = DConstants::INVALID_INDEX;
411
+ buffer_id += flags.merge_buffer_counts[(uint8_t)type - 1];
412
+ }
387
413
 
388
- while (true) {
389
- r_child_pos = info.r_node->GetNextPosAndByte(r_child_pos, key_byte);
390
- if (r_child_pos == DConstants::INVALID_INDEX) {
391
- break;
392
- }
393
- auto r_child = info.r_node->GetChild(*info.r_art, r_child_pos);
394
- auto l_child_pos = info.l_node->GetChildPos(key_byte);
395
-
396
- if (l_child_pos == DConstants::INVALID_INDEX) {
397
- // insert child at empty position
398
- auto r_memory_size = r_child->MemorySize(*info.r_art, true);
399
- Node::InsertChild(*info.root_l_art, info.l_node, key_byte, r_child);
400
-
401
- info.root_l_art->IncreaseMemorySize(r_memory_size);
402
- info.root_r_art->DecreaseMemorySize(r_memory_size);
403
- if (parents.l_parent) {
404
- parents.l_parent->ReplaceChildPointer(parents.l_pos, info.l_node);
405
- }
406
- info.r_node->ReplaceChildPointer(r_child_pos, nullptr);
414
+ bool Node::Merge(ART &art, Node &other) {
407
415
 
408
- } else {
409
- // recurse
410
- auto l_child = info.l_node->GetChild(*info.l_art, l_child_pos);
411
- MergeInfo child_info(info.l_art, info.r_art, info.root_l_art, info.root_r_art, l_child, r_child);
412
- ParentsOfNodes child_parents(info.l_node, l_child_pos, info.r_node, r_child_pos);
413
- if (!ResolvePrefixesAndMerge(child_info, child_parents)) {
414
- return false;
415
- }
416
- }
416
+ if (!IsSet()) {
417
+ *this = other;
418
+ other = Node();
419
+ return true;
417
420
  }
418
- return true;
421
+
422
+ return ResolvePrefixes(art, other);
419
423
  }
420
424
 
421
- bool ResolvePrefixesAndMerge(MergeInfo &info, ParentsOfNodes &parents) {
425
+ bool Node::ResolvePrefixes(ART &art, Node &other) {
426
+
422
427
  // NOTE: we always merge into the left ART
423
428
 
424
- D_ASSERT(info.l_node);
425
- D_ASSERT(info.r_node);
429
+ D_ASSERT(IsSet());
430
+ D_ASSERT(other.IsSet());
426
431
 
427
432
  // make sure that r_node has the longer (or equally long) prefix
428
- if (info.l_node->prefix.Size() > info.r_node->prefix.Size()) {
429
- SwapNodes(info, parents);
433
+ if (GetPrefix(art).count > other.GetPrefix(art).count) {
434
+ swap(*this, other);
430
435
  }
431
436
 
432
- Node *null_parent = nullptr;
433
- auto &l_node = info.l_node;
434
- auto &r_node = info.r_node;
435
- auto l_prefix_size = l_node->prefix.Size();
436
- auto r_prefix_size = r_node->prefix.Size();
437
+ auto &l_node = *this;
438
+ auto &r_node = other;
439
+ auto &l_prefix = l_node.GetPrefix(art);
440
+ auto &r_prefix = r_node.GetPrefix(art);
437
441
 
438
- auto mismatch_pos = l_node->prefix.MismatchPosition(r_node->prefix);
442
+ auto mismatch_position = l_prefix.MismatchPosition(art, r_prefix);
439
443
 
440
444
  // both nodes have no prefix or the same prefix
441
- if (mismatch_pos == l_prefix_size && l_prefix_size == r_prefix_size) {
442
- return Merge(info, parents);
445
+ if (mismatch_position == l_prefix.count && l_prefix.count == r_prefix.count) {
446
+ return MergeInternal(art, r_node);
443
447
  }
444
448
 
445
- if (mismatch_pos == l_prefix_size) {
449
+ if (mismatch_position == l_prefix.count) {
446
450
  // r_node's prefix contains l_node's prefix
447
451
  // l_node cannot be a leaf, otherwise the key represented by l_node would be a subset of another key
448
452
  // which is not possible by our construction
449
- D_ASSERT(l_node->type != NodeType::NLeaf);
453
+ D_ASSERT(l_node.DecodeARTNodeType() != NType::LEAF);
450
454
 
451
- // test if the next byte (mismatch_pos) in r_node (longer prefix) exists in l_node
452
- auto mismatch_byte = r_node->prefix[mismatch_pos];
453
- auto child_pos = l_node->GetChildPos(mismatch_byte);
455
+ // test if the next byte (mismatch_position) in r_node (longer prefix) exists in l_node
456
+ auto mismatch_byte = r_prefix.GetByte(art, mismatch_position);
457
+ auto child_node = l_node.GetChild(art, mismatch_byte);
454
458
 
455
- // update the prefix of r_node to only consist of the bytes after mismatch_pos
456
- r_node->prefix.Reduce(*info.root_r_art, mismatch_pos);
459
+ // update the prefix of r_node to only consist of the bytes after mismatch_position
460
+ r_prefix.Reduce(art, mismatch_position);
457
461
 
458
462
  // insert r_node as a child of l_node at empty position
459
- if (child_pos == DConstants::INVALID_INDEX) {
460
-
461
- auto r_memory_size = r_node->MemorySize(*info.r_art, true);
462
- Node::InsertChild(*info.root_l_art, l_node, mismatch_byte, r_node);
463
-
464
- info.root_l_art->IncreaseMemorySize(r_memory_size);
465
- info.root_r_art->DecreaseMemorySize(r_memory_size);
466
- UpdateParentsOfNodes(l_node, null_parent, parents);
467
- r_node = nullptr;
463
+ if (!child_node) {
464
+ Node::InsertChild(art, l_node, mismatch_byte, r_node);
465
+ r_node.Reset();
468
466
  return true;
469
467
  }
470
468
 
471
469
  // recurse
472
- auto child_node = l_node->GetChild(*info.l_art, child_pos);
473
- MergeInfo child_info(info.l_art, info.r_art, info.root_l_art, info.root_r_art, child_node, r_node);
474
- ParentsOfNodes child_parents(l_node, child_pos, parents.r_parent, parents.r_pos);
475
- return ResolvePrefixesAndMerge(child_info, child_parents);
470
+ return child_node->ResolvePrefixes(art, r_node);
476
471
  }
477
472
 
478
473
  // prefixes differ, create new node and insert both nodes as children
479
474
 
480
475
  // create new node
481
- Node *new_node = Node4::New();
482
- new_node->prefix = Prefix(l_node->prefix, mismatch_pos);
483
- info.root_l_art->IncreaseMemorySize(new_node->MemorySize(*info.l_art, false));
476
+ auto old_l_node = l_node;
477
+ auto &new_n4 = Node4::New(art, l_node);
478
+ new_n4.prefix.Initialize(art, l_prefix, mismatch_position);
484
479
 
485
- // insert l_node, break up prefix of l_node
486
- auto key_byte = l_node->prefix.Reduce(*info.root_l_art, mismatch_pos);
487
- Node4::InsertChild(*info.root_l_art, new_node, key_byte, l_node);
480
+ // insert old l_node, break up prefix of old l_node
481
+ auto key_byte = l_prefix.Reduce(art, mismatch_position);
482
+ Node4::InsertChild(art, l_node, key_byte, old_l_node);
488
483
 
489
484
  // insert r_node, break up prefix of r_node
490
- key_byte = r_node->prefix.Reduce(*info.root_r_art, mismatch_pos);
491
- auto r_memory_size = r_node->MemorySize(*info.r_art, true);
492
- Node4::InsertChild(*info.root_l_art, new_node, key_byte, r_node);
485
+ key_byte = r_prefix.Reduce(art, mismatch_position);
486
+ Node4::InsertChild(art, l_node, key_byte, r_node);
493
487
 
494
- info.root_l_art->IncreaseMemorySize(r_memory_size);
495
- info.root_r_art->DecreaseMemorySize(r_memory_size);
496
-
497
- l_node = new_node;
498
- UpdateParentsOfNodes(l_node, null_parent, parents);
499
- r_node = nullptr;
488
+ r_node.Reset();
500
489
  return true;
501
490
  }
502
491
 
503
- bool Node::MergeARTs(ART *l_art, ART *r_art) {
492
+ bool Node::MergeInternal(ART &art, Node &other) {
504
493
 
505
- Node *null_parent = nullptr;
506
- MergeInfo info(l_art, r_art, l_art, r_art, l_art->tree, r_art->tree);
507
- ParentsOfNodes parents(null_parent, 0, null_parent, 0);
508
- return ResolvePrefixesAndMerge(info, parents);
509
- }
494
+ D_ASSERT(IsSet());
495
+ D_ASSERT(other.IsSet());
510
496
 
511
- idx_t Node::RecursiveMemorySize(ART &art) {
497
+ // always try to merge the smaller node into the bigger node
498
+ // because maybe there is enough free space in the bigger node to fit the smaller one
499
+ // without too much recursion
500
+ if (this->DecodeARTNodeType() < other.DecodeARTNodeType()) {
501
+ swap(*this, other);
502
+ }
512
503
 
513
- // get the size of all children
514
- auto memory_size_children = 0;
504
+ Node empty_node;
505
+ auto &l_node = *this;
506
+ auto &r_node = other;
507
+
508
+ if (r_node.DecodeARTNodeType() == NType::LEAF) {
509
+ D_ASSERT(l_node.DecodeARTNodeType() == NType::LEAF);
510
+
511
+ if (art.IsUnique()) {
512
+ return false;
513
+ }
514
+
515
+ Leaf::Get(art, *this).Merge(art, r_node);
516
+ return true;
517
+ }
518
+
519
+ uint8_t byte = 0;
520
+ auto r_child = r_node.GetNextChild(art, byte);
521
+
522
+ // while r_node still has children to merge
523
+ while (r_child) {
524
+ auto l_child = l_node.GetChild(art, byte);
525
+ if (!l_child) {
526
+ // insert child at empty byte
527
+ Node::InsertChild(art, l_node, byte, *r_child);
528
+ r_node.ReplaceChild(art, byte, empty_node);
529
+
530
+ } else {
531
+ // recurse
532
+ if (!l_child->ResolvePrefixes(art, *r_child)) {
533
+ return false;
534
+ }
535
+ }
515
536
 
516
- auto next_pos = GetNextPos(DConstants::INVALID_INDEX);
517
- while (next_pos != DConstants::INVALID_INDEX) {
518
- if (ChildIsInMemory(next_pos)) {
519
- auto child = GetChild(art, next_pos);
520
- memory_size_children += child->MemorySize(art, true);
537
+ if (byte == NumericLimits<uint8_t>::Maximum()) {
538
+ break;
521
539
  }
522
- next_pos = GetNextPos(next_pos);
540
+ byte++;
541
+ r_child = r_node.GetNextChild(art, byte);
523
542
  }
524
543
 
525
- return memory_size_children;
544
+ Node::Free(art, r_node);
545
+ return true;
546
+ }
547
+
548
+ //===--------------------------------------------------------------------===//
549
+ // Vacuum
550
+ //===--------------------------------------------------------------------===//
551
+
552
+ void Node::Vacuum(ART &art, Node &node, const ARTFlags &flags) {
553
+
554
+ if (node.IsSwizzled()) {
555
+ return;
556
+ }
557
+
558
+ // possibly vacuum prefix segments, if not all prefixes are inlined
559
+ bool needs_vacuum = flags.vacuum_flags[(uint8_t)NType::PREFIX_SEGMENT - 1];
560
+ if (needs_vacuum) {
561
+ // vacuum prefix segments
562
+ node.GetPrefix(art).Vacuum(art);
563
+ }
564
+
565
+ auto type = node.DecodeARTNodeType();
566
+ auto &allocator = Node::GetAllocator(art, type);
567
+ needs_vacuum = flags.vacuum_flags[node.type - 1] && allocator.NeedsVacuum(node);
568
+ if (needs_vacuum) {
569
+ node.SetPtr(allocator.VacuumPointer(node));
570
+ }
571
+
572
+ switch (type) {
573
+ case NType::LEAF: {
574
+ // possibly vacuum leaf segments, if not all leaves are inlined
575
+ if (flags.vacuum_flags[(uint8_t)NType::LEAF_SEGMENT - 1]) {
576
+ Leaf::Get(art, node).Vacuum(art);
577
+ }
578
+ return;
579
+ }
580
+ case NType::NODE_4:
581
+ return Node4::Get(art, node).Vacuum(art, flags);
582
+ case NType::NODE_16:
583
+ return Node16::Get(art, node).Vacuum(art, flags);
584
+ case NType::NODE_48:
585
+ return Node48::Get(art, node).Vacuum(art, flags);
586
+ case NType::NODE_256:
587
+ return Node256::Get(art, node).Vacuum(art, flags);
588
+ default:
589
+ throw InternalException("Invalid node type for Vacuum.");
590
+ }
526
591
  }
527
592
 
528
593
  } // namespace duckdb