duckdb 0.8.2-dev1791.0 → 0.8.2-dev1862.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/src/common/constants.cpp +2 -1
  4. package/src/duckdb/src/common/enum_util.cpp +5 -5
  5. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  6. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  7. package/src/duckdb/src/execution/index/art/art.cpp +49 -108
  8. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  9. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
  10. package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
  11. package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
  12. package/src/duckdb/src/execution/index/art/node.cpp +104 -95
  13. package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
  14. package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
  15. package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
  16. package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
  17. package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
  18. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
  19. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
  20. package/src/duckdb/src/execution/window_executor.cpp +1280 -0
  21. package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
  22. package/src/duckdb/src/function/table/read_csv.cpp +4 -3
  23. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  24. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  25. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  26. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  27. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  28. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
  29. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  30. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  31. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
  32. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
  33. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
  34. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
  38. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
  39. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  40. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
  41. package/src/duckdb/src/storage/compression/rle.cpp +52 -12
  42. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  43. package/src/duckdb/ub_src_execution.cpp +2 -0
  44. package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
  45. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  46. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  47. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  48. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
@@ -2,361 +2,410 @@
2
2
 
3
3
  #include "duckdb/execution/index/art/art.hpp"
4
4
  #include "duckdb/execution/index/art/node.hpp"
5
- #include "duckdb/execution/index/art/leaf_segment.hpp"
6
5
  #include "duckdb/storage/meta_block_reader.hpp"
7
6
  #include "duckdb/storage/meta_block_writer.hpp"
8
7
 
9
8
  namespace duckdb {
10
9
 
11
- Leaf &Leaf::New(ART &art, Node &node, const row_t row_id) {
10
+ void Leaf::New(Node &node, const row_t row_id) {
12
11
 
13
- node.SetPtr(Node::GetAllocator(art, NType::LEAF).New());
14
- node.type = (uint8_t)NType::LEAF;
15
- auto &leaf = Leaf::Get(art, node);
16
-
17
- // set the fields of the leaf
18
- leaf.count = 1;
19
- leaf.row_ids.inlined = row_id;
20
- return leaf;
12
+ // we directly inline this row ID into the node pointer
13
+ D_ASSERT(row_id < MAX_ROW_ID_LOCAL);
14
+ node.Reset();
15
+ node.SetType((uint8_t)NType::LEAF_INLINED);
16
+ node.SetRowId(row_id);
21
17
  }
22
18
 
23
- Leaf &Leaf::New(ART &art, Node &node, const row_t *row_ids, const idx_t count) {
19
+ void Leaf::New(ART &art, reference<Node> &node, const row_t *row_ids, idx_t count) {
24
20
 
25
21
  D_ASSERT(count > 1);
26
22
 
27
- node.SetPtr(Node::GetAllocator(art, NType::LEAF).New());
28
- node.type = (uint8_t)NType::LEAF;
29
- auto &leaf = Leaf::Get(art, node);
23
+ idx_t copy_count = 0;
24
+ while (count) {
25
+ node.get() = Node::GetAllocator(art, NType::LEAF).New();
26
+ node.get().SetType((uint8_t)NType::LEAF);
30
27
 
31
- // reset the count to copy the row IDs
32
- leaf.count = 0;
33
- reference<LeafSegment> segment(LeafSegment::New(art, leaf.row_ids.ptr));
34
- for (idx_t i = 0; i < count; i++) {
35
- segment = segment.get().Append(art, leaf.count, row_ids[i]);
36
- }
28
+ auto &leaf = Leaf::Get(art, node);
37
29
 
38
- return leaf;
39
- }
30
+ leaf.count = MinValue((idx_t)Node::LEAF_SIZE, count);
31
+ for (idx_t i = 0; i < leaf.count; i++) {
32
+ leaf.row_ids[i] = row_ids[copy_count + i];
33
+ }
40
34
 
41
- void Leaf::Free(ART &art, Node &node) {
35
+ copy_count += leaf.count;
36
+ count -= leaf.count;
42
37
 
43
- D_ASSERT(node.IsSet());
44
- D_ASSERT(!node.IsSwizzled());
45
-
46
- // free leaf segments
47
- auto &leaf = Leaf::Get(art, node);
48
- if (!leaf.IsInlined()) {
49
- Node::Free(art, leaf.row_ids.ptr);
38
+ node = leaf.ptr;
39
+ leaf.ptr.Reset();
50
40
  }
51
41
  }
52
42
 
53
- void Leaf::InitializeMerge(const ART &art, const idx_t buffer_count) {
43
+ void Leaf::Free(ART &art, Node &node) {
54
44
 
55
- if (IsInlined()) {
56
- return;
57
- }
45
+ D_ASSERT(node.IsSet() && !node.IsSerialized());
46
+ auto &child = Leaf::Get(art, node).ptr;
47
+ Node::Free(art, child);
48
+ }
49
+
50
+ void Leaf::InitializeMerge(ART &art, Node &node, const ARTFlags &flags) {
58
51
 
59
- reference<LeafSegment> segment(LeafSegment::Get(art, row_ids.ptr));
60
- row_ids.ptr.buffer_id += buffer_count;
52
+ D_ASSERT(node.IsSet() && !node.IsSerialized());
53
+ D_ASSERT(node.GetType() == NType::LEAF);
61
54
 
62
- auto ptr = segment.get().next;
63
- while (ptr.IsSet()) {
64
- segment.get().next.buffer_id += buffer_count;
65
- segment = LeafSegment::Get(art, ptr);
66
- ptr = segment.get().next;
55
+ auto &leaf = Leaf::Get(art, node);
56
+ if (leaf.ptr.IsSet()) {
57
+ leaf.ptr.InitializeMerge(art, flags);
67
58
  }
68
59
  }
69
60
 
70
- void Leaf::Merge(ART &art, Node &other) {
61
+ void Leaf::Merge(ART &art, Node &l_node, Node &r_node) {
71
62
 
72
- auto &other_leaf = Leaf::Get(art, other);
63
+ D_ASSERT(l_node.IsSet() && !l_node.IsSerialized());
64
+ D_ASSERT(r_node.IsSet() && !r_node.IsSerialized());
73
65
 
74
- // copy inlined row ID
75
- if (other_leaf.IsInlined()) {
76
- Insert(art, other_leaf.row_ids.inlined);
77
- Node::Free(art, other);
66
+ // copy inlined row ID of r_node
67
+ if (r_node.GetType() == NType::LEAF_INLINED) {
68
+ Leaf::Insert(art, l_node, r_node.GetRowId());
69
+ r_node.Reset();
78
70
  return;
79
71
  }
80
72
 
81
- // row ID was inlined, move to a new segment
82
- if (IsInlined()) {
83
- auto row_id = row_ids.inlined;
84
- auto &segment = LeafSegment::New(art, row_ids.ptr);
85
- segment.row_ids[0] = row_id;
73
+ // l_node has an inlined row ID, swap and insert
74
+ if (l_node.GetType() == NType::LEAF_INLINED) {
75
+ auto row_id = l_node.GetRowId();
76
+ l_node = r_node;
77
+ Leaf::Insert(art, l_node, row_id);
78
+ r_node.Reset();
79
+ return;
86
80
  }
87
81
 
88
- // get the first segment to copy to
89
- reference<LeafSegment> segment(LeafSegment::Get(art, row_ids.ptr).GetTail(art));
82
+ D_ASSERT(l_node.GetType() != NType::LEAF_INLINED);
83
+ D_ASSERT(r_node.GetType() != NType::LEAF_INLINED);
90
84
 
91
- // initialize loop variables
92
- auto other_ptr = other_leaf.row_ids.ptr;
93
- auto remaining = other_leaf.count;
85
+ reference<Node> l_node_ref(l_node);
86
+ reference<Leaf> l_leaf = Leaf::Get(art, l_node_ref);
94
87
 
95
- // copy row IDs
96
- while (other_ptr.IsSet()) {
97
- auto &other_segment = LeafSegment::Get(art, other_ptr);
98
- auto copy_count = MinValue(Node::LEAF_SEGMENT_SIZE, remaining);
88
+ // find a non-full node
89
+ while (l_leaf.get().count == Node::LEAF_SIZE) {
90
+ l_node_ref = l_leaf.get().ptr;
99
91
 
100
- // copy the data
101
- for (idx_t i = 0; i < copy_count; i++) {
102
- segment = segment.get().Append(art, count, other_segment.row_ids[i]);
92
+ // the last leaf is full
93
+ if (!l_leaf.get().ptr.IsSet()) {
94
+ break;
103
95
  }
104
-
105
- // adjust the loop variables
106
- other_ptr = other_segment.next;
107
- remaining -= copy_count;
96
+ l_leaf = Leaf::Get(art, l_node_ref);
108
97
  }
109
- D_ASSERT(remaining == 0);
110
98
 
111
- Node::Free(art, other);
99
+ // store the last leaf and then append r_node
100
+ auto last_leaf_node = l_node_ref.get();
101
+ l_node_ref.get() = r_node;
102
+ r_node.Reset();
103
+
104
+ // append the remaining row IDs of the last leaf node
105
+ if (last_leaf_node.IsSet()) {
106
+ // find the tail
107
+ l_leaf = Leaf::Get(art, l_node_ref);
108
+ while (l_leaf.get().ptr.IsSet()) {
109
+ l_leaf = Leaf::Get(art, l_leaf.get().ptr);
110
+ }
111
+ // append the row IDs
112
+ auto &last_leaf = Leaf::Get(art, last_leaf_node);
113
+ for (idx_t i = 0; i < last_leaf.count; i++) {
114
+ l_leaf = l_leaf.get().Append(art, last_leaf.row_ids[i]);
115
+ }
116
+ Node::GetAllocator(art, NType::LEAF).Free(last_leaf_node);
117
+ }
112
118
  }
113
119
 
114
- void Leaf::Insert(ART &art, const row_t row_id) {
120
+ void Leaf::Insert(ART &art, Node &node, const row_t row_id) {
121
+
122
+ D_ASSERT(node.IsSet() && !node.IsSerialized());
115
123
 
116
- D_ASSERT(count != 0);
117
- if (count == 1) {
118
- MoveInlinedToSegment(art);
124
+ if (node.GetType() == NType::LEAF_INLINED) {
125
+ Leaf::MoveInlinedToLeaf(art, node);
126
+ Leaf::Insert(art, node, row_id);
127
+ return;
119
128
  }
120
129
 
121
130
  // append to the tail
122
- auto &first_segment = LeafSegment::Get(art, row_ids.ptr);
123
- auto &tail = first_segment.GetTail(art);
124
- tail.Append(art, count, row_id);
131
+ reference<Leaf> leaf = Leaf::Get(art, node);
132
+ while (leaf.get().ptr.IsSet()) {
133
+ if (leaf.get().ptr.IsSerialized()) {
134
+ leaf.get().ptr.Deserialize(art);
135
+ }
136
+ leaf = Leaf::Get(art, leaf.get().ptr);
137
+ }
138
+ leaf.get().Append(art, row_id);
125
139
  }
126
140
 
127
- void Leaf::Remove(ART &art, const row_t row_id) {
141
+ bool Leaf::Remove(ART &art, reference<Node> &node, const row_t row_id) {
128
142
 
129
- if (count == 0) {
130
- return;
143
+ D_ASSERT(node.get().IsSet() && !node.get().IsSerialized());
144
+
145
+ if (node.get().GetType() == NType::LEAF_INLINED) {
146
+ if (node.get().GetRowId() == row_id) {
147
+ return true;
148
+ }
149
+ return false;
131
150
  }
132
151
 
133
- if (IsInlined()) {
134
- if (row_ids.inlined == row_id) {
135
- count--;
152
+ reference<Leaf> leaf = Leaf::Get(art, node);
153
+
154
+ // inline the remaining row ID
155
+ if (leaf.get().count == 2) {
156
+ if (leaf.get().row_ids[0] == row_id || leaf.get().row_ids[1] == row_id) {
157
+ auto remaining_row_id = leaf.get().row_ids[0] == row_id ? leaf.get().row_ids[1] : leaf.get().row_ids[0];
158
+ Node::Free(art, node);
159
+ Leaf::New(node, remaining_row_id);
136
160
  }
137
- return;
161
+ return false;
138
162
  }
139
163
 
140
- // possibly inline the row ID
141
- if (count == 2) {
142
- auto &segment = LeafSegment::Get(art, row_ids.ptr);
143
- D_ASSERT(segment.row_ids[0] == row_id || segment.row_ids[1] == row_id);
144
- auto remaining_row_id = segment.row_ids[0] == row_id ? segment.row_ids[1] : segment.row_ids[0];
145
- Node::Free(art, row_ids.ptr);
146
- row_ids.inlined = remaining_row_id;
147
- count--;
148
- return;
164
+ // get the last row ID (the order within a leaf does not matter)
165
+ // because we want to overwrite the row ID to remove with that one
166
+
167
+ // go to the tail and keep track of the previous leaf node
168
+ reference<Leaf> prev_leaf(leaf);
169
+ while (leaf.get().ptr.IsSet()) {
170
+ prev_leaf = leaf;
171
+ if (leaf.get().ptr.IsSerialized()) {
172
+ leaf.get().ptr.Deserialize(art);
173
+ }
174
+ leaf = Leaf::Get(art, leaf.get().ptr);
149
175
  }
150
176
 
151
- // find the row ID, and the segment containing that row ID (stored in ptr)
152
- auto ptr = row_ids.ptr;
153
- auto copy_idx = FindRowId(art, ptr, row_id);
154
- D_ASSERT(copy_idx != (uint32_t)DConstants::INVALID_INDEX);
155
- copy_idx++;
156
-
157
- // iterate all remaining segments and move the row IDs one field to the left
158
- reference<LeafSegment> segment(LeafSegment::Get(art, ptr));
159
- reference<LeafSegment> prev_segment(LeafSegment::Get(art, ptr));
160
- while (copy_idx < count) {
161
-
162
- auto copy_start = copy_idx % Node::LEAF_SEGMENT_SIZE;
163
- D_ASSERT(copy_start != 0);
164
- auto copy_end = MinValue(copy_start + count - copy_idx, Node::LEAF_SEGMENT_SIZE);
165
-
166
- // copy row IDs
167
- for (idx_t i = copy_start; i < copy_end; i++) {
168
- segment.get().row_ids[i - 1] = segment.get().row_ids[i];
169
- copy_idx++;
177
+ auto last_idx = leaf.get().count;
178
+ auto last_row_id = leaf.get().row_ids[last_idx - 1];
179
+
180
+ // only one row ID in this leaf segment, free it
181
+ if (leaf.get().count == 1) {
182
+ Node::Free(art, prev_leaf.get().ptr);
183
+ if (last_row_id == row_id) {
184
+ return false;
170
185
  }
186
+ } else {
187
+ leaf.get().count--;
188
+ }
171
189
 
172
- // adjust loop variables
173
- if (segment.get().next.IsSet()) {
174
- prev_segment = segment;
175
- segment = LeafSegment::Get(art, segment.get().next);
176
- // this segment has at least one element, and we need to copy it into the previous segment
177
- prev_segment.get().row_ids[Node::LEAF_SEGMENT_SIZE - 1] = segment.get().row_ids[0];
178
- copy_idx++;
190
+ // find the row ID and copy the last row ID to that position
191
+ while (node.get().IsSet()) {
192
+ D_ASSERT(!node.get().IsSerialized());
193
+ leaf = Leaf::Get(art, node);
194
+ for (idx_t i = 0; i < leaf.get().count; i++) {
195
+ if (leaf.get().row_ids[i] == row_id) {
196
+ leaf.get().row_ids[i] = last_row_id;
197
+ return false;
198
+ }
179
199
  }
200
+ node = leaf.get().ptr;
180
201
  }
202
+ return false;
203
+ }
181
204
 
182
- // this evaluates to true, if we need to delete the last segment
183
- if (count % Node::LEAF_SEGMENT_SIZE == 1) {
184
- ptr = row_ids.ptr;
185
- while (ptr.IsSet()) {
205
+ idx_t Leaf::TotalCount(ART &art, Node &node) {
186
206
 
187
- // get the segment succeeding the current segment
188
- auto &current_segment = LeafSegment::Get(art, ptr);
189
- D_ASSERT(current_segment.next.IsSet());
190
- auto &next_segment = LeafSegment::Get(art, current_segment.next);
207
+ // NOTE: first leaf in the leaf chain is already deserialized
208
+ D_ASSERT(node.IsSet() && !node.IsSerialized());
191
209
 
192
- // next_segment is the tail of the segment list
193
- if (!next_segment.next.IsSet()) {
194
- Node::Free(art, current_segment.next);
195
- }
210
+ if (node.GetType() == NType::LEAF_INLINED) {
211
+ return 1;
212
+ }
196
213
 
197
- // adjust loop variables
198
- ptr = current_segment.next;
214
+ idx_t count = 0;
215
+ reference<Node> node_ref(node);
216
+ while (node_ref.get().IsSet()) {
217
+ auto &leaf = Leaf::Get(art, node_ref);
218
+ count += leaf.count;
219
+
220
+ if (leaf.ptr.IsSerialized()) {
221
+ leaf.ptr.Deserialize(art);
199
222
  }
223
+ node_ref = leaf.ptr;
200
224
  }
201
- count--;
225
+ return count;
202
226
  }
203
227
 
204
- row_t Leaf::GetRowId(const ART &art, const idx_t position) const {
228
+ bool Leaf::GetRowIds(ART &art, Node &node, vector<row_t> &result_ids, idx_t max_count) {
205
229
 
206
- D_ASSERT(position < count);
207
- if (IsInlined()) {
208
- return row_ids.inlined;
230
+ // adding more elements would exceed the maximum count
231
+ D_ASSERT(node.IsSet());
232
+ if (result_ids.size() + Leaf::TotalCount(art, node) > max_count) {
233
+ return false;
209
234
  }
210
235
 
211
- // get the correct segment
212
- reference<LeafSegment> segment(LeafSegment::Get(art, row_ids.ptr));
213
- for (idx_t i = 0; i < position / Node::LEAF_SEGMENT_SIZE; i++) {
214
- D_ASSERT(segment.get().next.IsSet());
215
- segment = LeafSegment::Get(art, segment.get().next);
236
+ // NOTE: Leaf::TotalCount fully deserializes the leaf
237
+ D_ASSERT(!node.IsSerialized());
238
+
239
+ if (node.GetType() == NType::LEAF_INLINED) {
240
+ // push back the inlined row ID of this leaf
241
+ result_ids.push_back(node.GetRowId());
242
+
243
+ } else {
244
+ // push back all the row IDs of this leaf
245
+ reference<Node> last_leaf_ref(node);
246
+ while (last_leaf_ref.get().IsSet()) {
247
+ auto &leaf = Leaf::Get(art, last_leaf_ref);
248
+ for (idx_t i = 0; i < leaf.count; i++) {
249
+ result_ids.push_back(leaf.row_ids[i]);
250
+ }
251
+
252
+ D_ASSERT(!leaf.ptr.IsSerialized());
253
+ last_leaf_ref = leaf.ptr;
254
+ }
216
255
  }
217
256
 
218
- return segment.get().row_ids[position % Node::LEAF_SEGMENT_SIZE];
257
+ return true;
219
258
  }
220
259
 
221
- uint32_t Leaf::FindRowId(const ART &art, Node &ptr, const row_t row_id) const {
222
-
223
- D_ASSERT(!IsInlined());
260
+ bool Leaf::ContainsRowId(ART &art, Node &node, const row_t row_id) {
224
261
 
225
- auto remaining = count;
226
- while (ptr.IsSet()) {
262
+ // NOTE: we either just removed a row ID from this leaf (by copying the
263
+ // last row ID at a different position) or inserted a row ID into this leaf
264
+ // (at the end), so the whole leaf is deserialized
265
+ D_ASSERT(node.IsSet() && !node.IsSerialized());
227
266
 
228
- auto &segment = LeafSegment::Get(art, ptr);
229
- auto search_count = MinValue(Node::LEAF_SEGMENT_SIZE, remaining);
267
+ if (node.GetType() == NType::LEAF_INLINED) {
268
+ return node.GetRowId() == row_id;
269
+ }
230
270
 
231
- // search in this segment
232
- for (idx_t i = 0; i < search_count; i++) {
233
- if (segment.row_ids[i] == row_id) {
234
- return count - remaining + i;
271
+ reference<Node> ref_node(node);
272
+ while (ref_node.get().IsSet()) {
273
+ auto &leaf = Leaf::Get(art, ref_node);
274
+ for (idx_t i = 0; i < leaf.count; i++) {
275
+ if (leaf.row_ids[i] == row_id) {
276
+ return true;
235
277
  }
236
278
  }
237
279
 
238
- // adjust loop variables
239
- remaining -= search_count;
240
- ptr = segment.next;
280
+ D_ASSERT(!leaf.ptr.IsSerialized());
281
+ ref_node = leaf.ptr;
241
282
  }
242
- return (uint32_t)DConstants::INVALID_INDEX;
283
+
284
+ return false;
243
285
  }
244
286
 
245
- string Leaf::VerifyAndToString(const ART &art, const bool only_verify) const {
287
+ string Leaf::VerifyAndToString(ART &art, Node &node) {
246
288
 
247
- if (IsInlined()) {
248
- return only_verify ? "" : "Leaf [count: 1, row ID: " + to_string(row_ids.inlined) + "]";
289
+ if (node.GetType() == NType::LEAF_INLINED) {
290
+ return "Leaf [count: 1, row ID: " + to_string(node.GetRowId()) + "]";
249
291
  }
250
292
 
251
- auto ptr = row_ids.ptr;
252
- auto remaining = count;
293
+ // NOTE: we could do this recursively, but the function-call overhead can become kinda crazy
253
294
  string str = "";
254
- uint32_t this_count = 0;
255
- while (ptr.IsSet()) {
256
- auto &segment = LeafSegment::Get(art, ptr);
257
- auto to_string_count = Node::LEAF_SEGMENT_SIZE < remaining ? Node::LEAF_SEGMENT_SIZE : remaining;
258
-
259
- for (idx_t i = 0; i < to_string_count; i++) {
260
- str += ", " + to_string(segment.row_ids[i]);
261
- this_count++;
295
+
296
+ reference<Node> node_ref(node);
297
+ while (node_ref.get().IsSet()) {
298
+
299
+ auto &leaf = Leaf::Get(art, node_ref);
300
+ D_ASSERT(leaf.count <= Node::LEAF_SIZE);
301
+
302
+ str += "Leaf [count: " + to_string(leaf.count) + ", row IDs: ";
303
+ for (idx_t i = 0; i < leaf.count; i++) {
304
+ str += to_string(leaf.row_ids[i]) + "-";
262
305
  }
263
- remaining -= to_string_count;
264
- ptr = segment.next;
265
- }
306
+ str += "] ";
266
307
 
267
- D_ASSERT(remaining == 0);
268
- (void)this_count;
269
- D_ASSERT(this_count == count);
270
- return only_verify ? "" : "Leaf [count: " + to_string(count) + ", row IDs: " + str + "] \n";
308
+ // NOTE: we are currently only calling this function during CREATE INDEX
309
+ // statements (and debugging), so the index is never serialized
310
+ D_ASSERT(!leaf.ptr.IsSerialized());
311
+ node_ref = leaf.ptr;
312
+ }
313
+ return str;
271
314
  }
272
315
 
273
- BlockPointer Leaf::Serialize(const ART &art, MetaBlockWriter &writer) const {
274
-
275
- // get pointer and write fields
276
- auto block_pointer = writer.GetBlockPointer();
277
- writer.Write(NType::LEAF);
278
- writer.Write<uint32_t>(count);
316
+ BlockPointer Leaf::Serialize(ART &art, Node &node, MetaBlockWriter &writer) {
279
317
 
280
- if (IsInlined()) {
281
- writer.Write(row_ids.inlined);
318
+ if (node.GetType() == NType::LEAF_INLINED) {
319
+ auto block_pointer = writer.GetBlockPointer();
320
+ writer.Write(NType::LEAF_INLINED);
321
+ writer.Write(node.GetRowId());
282
322
  return block_pointer;
283
323
  }
284
324
 
285
- D_ASSERT(row_ids.ptr.IsSet());
286
- auto ptr = row_ids.ptr;
287
- auto remaining = count;
288
-
289
- // iterate all leaf segments and write their row IDs
290
- while (ptr.IsSet()) {
291
- auto &segment = LeafSegment::Get(art, ptr);
292
- auto write_count = MinValue(Node::LEAF_SEGMENT_SIZE, remaining);
325
+ // recurse into the child and retrieve its block pointer
326
+ auto &leaf = Leaf::Get(art, node);
327
+ auto child_block_pointer = leaf.ptr.Serialize(art, writer);
293
328
 
294
- // write the row IDs
295
- for (idx_t i = 0; i < write_count; i++) {
296
- writer.Write(segment.row_ids[i]);
297
- }
329
+ // get pointer and write fields
330
+ auto block_pointer = writer.GetBlockPointer();
331
+ writer.Write(NType::LEAF);
332
+ writer.Write<uint8_t>(leaf.count);
298
333
 
299
- // adjust loop variables
300
- remaining -= write_count;
301
- ptr = segment.next;
334
+ // write row IDs
335
+ for (idx_t i = 0; i < leaf.count; i++) {
336
+ writer.Write(leaf.row_ids[i]);
302
337
  }
303
- D_ASSERT(remaining == 0);
338
+
339
+ // write child block pointer
340
+ writer.Write(child_block_pointer.block_id);
341
+ writer.Write(child_block_pointer.offset);
304
342
 
305
343
  return block_pointer;
306
344
  }
307
345
 
308
- void Leaf::Deserialize(ART &art, MetaBlockReader &reader) {
346
+ void Leaf::Deserialize(ART &art, Node &node, MetaBlockReader &reader) {
309
347
 
310
- auto count_p = reader.Read<uint32_t>();
348
+ D_ASSERT(node.GetType() == NType::LEAF);
311
349
 
312
- // inlined
313
- if (count_p == 1) {
314
- row_ids.inlined = reader.Read<row_t>();
315
- count = count_p;
316
- return;
317
- }
350
+ auto &leaf = Leaf::Get(art, node);
351
+ leaf.count = reader.Read<uint8_t>();
318
352
 
319
- // copy into segments
320
- count = 0;
321
- reference<LeafSegment> segment(LeafSegment::New(art, row_ids.ptr));
322
- for (idx_t i = 0; i < count_p; i++) {
323
- segment = segment.get().Append(art, count, reader.Read<row_t>());
353
+ // read row IDs
354
+ for (idx_t i = 0; i < leaf.count; i++) {
355
+ leaf.row_ids[i] = reader.Read<row_t>();
324
356
  }
325
- D_ASSERT(count_p == count);
326
- }
327
357
 
328
- void Leaf::Vacuum(ART &art) {
358
+ // read child block pointer
359
+ leaf.ptr = Node(reader);
360
+ }
329
361
 
330
- if (IsInlined()) {
331
- return;
332
- }
362
+ void Leaf::Vacuum(ART &art, Node &node) {
333
363
 
334
- // first pointer has special treatment because we don't obtain it from a leaf segment
335
- auto &allocator = Node::GetAllocator(art, NType::LEAF_SEGMENT);
336
- if (allocator.NeedsVacuum(row_ids.ptr)) {
337
- row_ids.ptr.SetPtr(allocator.VacuumPointer(row_ids.ptr));
338
- row_ids.ptr.type = (uint8_t)NType::LEAF_SEGMENT;
339
- }
364
+ // NOTE: we could do this recursively, but the function-call overhead can become kinda crazy
365
+ auto &allocator = Node::GetAllocator(art, NType::LEAF);
340
366
 
341
- auto ptr = row_ids.ptr;
342
- while (ptr.IsSet()) {
343
- auto &segment = LeafSegment::Get(art, ptr);
344
- ptr = segment.next;
345
- if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
346
- segment.next.SetPtr(allocator.VacuumPointer(ptr));
347
- segment.next.type = (uint8_t)NType::LEAF_SEGMENT;
348
- ptr = segment.next;
367
+ reference<Node> node_ref(node);
368
+ while (node_ref.get().IsSet() && !node_ref.get().IsSerialized()) {
369
+ if (allocator.NeedsVacuum(node_ref)) {
370
+ node_ref.get() = allocator.VacuumPointer(node_ref);
371
+ node_ref.get().SetType((uint8_t)NType::LEAF);
349
372
  }
373
+ auto &leaf = Leaf::Get(art, node_ref);
374
+ node_ref = leaf.ptr;
350
375
  }
376
+ return;
377
+ }
378
+
379
+ void Leaf::MoveInlinedToLeaf(ART &art, Node &node) {
380
+
381
+ D_ASSERT(node.GetType() == NType::LEAF_INLINED);
382
+ auto row_id = node.GetRowId();
383
+ node = Node::GetAllocator(art, NType::LEAF).New();
384
+ node.SetType((uint8_t)NType::LEAF);
385
+
386
+ auto &leaf = Leaf::Get(art, node);
387
+ leaf.count = 1;
388
+ leaf.row_ids[0] = row_id;
389
+ leaf.ptr.Reset();
351
390
  }
352
391
 
353
- void Leaf::MoveInlinedToSegment(ART &art) {
392
+ Leaf &Leaf::Append(ART &art, const row_t row_id) {
393
+
394
+ reference<Leaf> leaf(*this);
354
395
 
355
- D_ASSERT(IsInlined());
396
+ // we need a new leaf node
397
+ if (leaf.get().count == Node::LEAF_SIZE) {
398
+ leaf.get().ptr = Node::GetAllocator(art, NType::LEAF).New();
399
+ leaf.get().ptr.SetType((uint8_t)NType::LEAF);
400
+
401
+ leaf = Leaf::Get(art, leaf.get().ptr);
402
+ leaf.get().count = 0;
403
+ leaf.get().ptr.Reset();
404
+ }
356
405
 
357
- auto row_id = row_ids.inlined;
358
- auto &segment = LeafSegment::New(art, row_ids.ptr);
359
- segment.row_ids[0] = row_id;
406
+ leaf.get().row_ids[leaf.get().count] = row_id;
407
+ leaf.get().count++;
408
+ return leaf.get();
360
409
  }
361
410
 
362
411
  } // namespace duckdb