duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
  6. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  7. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  8. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  9. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  10. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  11. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  12. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  13. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  14. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  15. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  16. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  17. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  18. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  19. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  20. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  21. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  22. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  24. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
  25. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  26. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  27. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  28. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  29. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  30. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  31. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  32. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  33. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  34. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  40. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  41. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  42. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  43. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  48. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  49. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  50. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  51. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  52. package/src/duckdb/src/include/duckdb.h +26 -0
  53. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  54. package/src/duckdb/src/main/client_context.cpp +1 -1
  55. package/src/duckdb/src/main/query_result.cpp +1 -1
  56. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  57. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  58. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  59. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  60. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  61. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  62. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  63. package/src/duckdb/src/storage/data_table.cpp +6 -3
  64. package/src/duckdb/src/storage/index.cpp +18 -6
  65. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  66. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  67. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  68. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  69. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  70. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  71. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,245 +1,394 @@
1
1
  #include "duckdb/execution/index/art/leaf.hpp"
2
2
 
3
3
  #include "duckdb/execution/index/art/art.hpp"
4
+ #include "duckdb/execution/index/art/art_key.hpp"
5
+ #include "duckdb/execution/index/art/leaf_segment.hpp"
4
6
  #include "duckdb/execution/index/art/node.hpp"
5
- #include "duckdb/execution/index/art/prefix.hpp"
6
7
  #include "duckdb/storage/meta_block_reader.hpp"
7
- #include <cstring>
8
+ #include "duckdb/storage/meta_block_writer.hpp"
8
9
 
9
10
  namespace duckdb {
10
- idx_t Leaf::GetCapacity() const {
11
- return IsInlined() ? 1 : rowids.ptr[0];
12
- }
13
11
 
14
- bool Leaf::IsInlined() const {
15
- return count <= 1;
12
+ Leaf &Leaf::New(ART &art, Node &node, const ARTKey &key, const uint32_t depth, const row_t row_id) {
13
+
14
+ node.SetPtr(Node::GetAllocator(art, NType::LEAF).New());
15
+ node.type = (uint8_t)NType::LEAF;
16
+ auto &leaf = Leaf::Get(art, node);
17
+
18
+ // set the fields of the leaf
19
+ leaf.count = 1;
20
+ leaf.row_ids.inlined = row_id;
21
+
22
+ // initialize the prefix
23
+ D_ASSERT(key.len >= depth);
24
+ leaf.prefix.Initialize(art, key, depth, key.len - depth);
25
+
26
+ return leaf;
16
27
  }
17
28
 
18
- row_t Leaf::GetRowId(idx_t index) {
19
- D_ASSERT(index < count);
20
- if (IsInlined()) {
21
- return rowids.inlined;
22
- } else {
23
- D_ASSERT(rowids.ptr[0] >= count);
24
- return rowids.ptr[index + 1];
29
+ Leaf &Leaf::New(ART &art, Node &node, const ARTKey &key, const uint32_t depth, const row_t *row_ids,
30
+ const idx_t count) {
31
+
32
+ // inlined leaf
33
+ D_ASSERT(count >= 1);
34
+ if (count == 1) {
35
+ return Leaf::New(art, node, key, depth, row_ids[0]);
25
36
  }
26
- }
27
37
 
28
- row_t *Leaf::GetRowIds() {
29
- if (IsInlined()) {
30
- return &rowids.inlined;
31
- } else {
32
- return rowids.ptr + 1;
38
+ node.SetPtr(Node::GetAllocator(art, NType::LEAF).New());
39
+ node.type = (uint8_t)NType::LEAF;
40
+ auto &leaf = Leaf::Get(art, node);
41
+
42
+ // set the fields of the leaf
43
+ leaf.count = 0;
44
+
45
+ // copy the row IDs
46
+ reference<LeafSegment> segment(LeafSegment::New(art, leaf.row_ids.ptr));
47
+ for (idx_t i = 0; i < count; i++) {
48
+ segment = segment.get().Append(art, leaf.count, row_ids[i]);
33
49
  }
34
- }
35
50
 
36
- Leaf::Leaf() : Node(NodeType::NLeaf) {
37
- }
51
+ // set the prefix
52
+ D_ASSERT(key.len >= depth);
53
+ leaf.prefix.Initialize(art, key, depth, key.len - depth);
38
54
 
39
- Leaf::Leaf(Key &value, uint32_t depth, row_t row_id) : Node(NodeType::NLeaf) {
40
- count = 1;
41
- rowids.inlined = row_id;
42
- D_ASSERT(value.len >= depth);
43
- prefix = Prefix(value, depth, value.len - depth);
55
+ return leaf;
44
56
  }
45
57
 
46
- Leaf::Leaf(Key &value, uint32_t depth, row_t *row_ids_p, idx_t num_elements_p) : Node(NodeType::NLeaf) {
47
- D_ASSERT(num_elements_p >= 1);
48
- if (num_elements_p == 1) {
49
- // we can inline the row ids
50
- rowids.inlined = row_ids_p[0];
51
- } else {
52
- // new row ids of this leaf
53
- count = 0;
54
- Resize(row_ids_p, num_elements_p, num_elements_p);
55
- }
56
- count = num_elements_p;
57
- D_ASSERT(value.len >= depth);
58
- prefix = Prefix(value, depth, value.len - depth);
59
- }
58
+ void Leaf::Free(ART &art, Node &node) {
60
59
 
61
- Leaf::Leaf(row_t *row_ids_p, idx_t num_elements_p, Prefix &prefix_p) : Node(NodeType::NLeaf) {
62
- D_ASSERT(num_elements_p > 1);
63
- D_ASSERT(row_ids_p[0] == row_t(num_elements_p)); // first element should contain capacity
64
- rowids.ptr = row_ids_p;
65
- count = num_elements_p;
66
- prefix = prefix_p;
67
- }
60
+ D_ASSERT(node.IsSet());
61
+ D_ASSERT(!node.IsSwizzled());
68
62
 
69
- Leaf::Leaf(row_t row_id, Prefix &prefix_p) : Node(NodeType::NLeaf) {
70
- rowids.inlined = row_id;
71
- count = 1;
72
- prefix = prefix_p;
73
- }
63
+ auto &leaf = Leaf::Get(art, node);
74
64
 
75
- Leaf::~Leaf() {
76
- if (!IsInlined()) {
77
- DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
78
- count = 0;
65
+ // delete all leaf segments
66
+ if (!leaf.IsInlined()) {
67
+ auto ptr = leaf.row_ids.ptr;
68
+ while (ptr.IsSet()) {
69
+ auto next_ptr = LeafSegment::Get(art, ptr).next;
70
+ Node::Free(art, ptr);
71
+ ptr = next_ptr;
72
+ }
79
73
  }
80
74
  }
81
75
 
82
- idx_t Leaf::MemorySize(ART &, const bool &) {
76
+ void Leaf::InitializeMerge(const ART &art, const idx_t buffer_count) {
77
+
83
78
  if (IsInlined()) {
84
- return prefix.MemorySize() + sizeof(*this) + sizeof(row_t);
79
+ return;
85
80
  }
86
- return prefix.MemorySize() + sizeof(*this) + sizeof(row_t) * (GetCapacity() + 1);
87
- }
88
81
 
89
- row_t *Leaf::Resize(row_t *current_row_ids, uint32_t current_count, idx_t new_capacity) {
90
- D_ASSERT(new_capacity >= current_count);
91
- auto new_allocation = AllocateArray<row_t>(new_capacity + 1);
92
- new_allocation[0] = new_capacity;
93
- auto new_row_ids = new_allocation + 1;
94
- memcpy(new_row_ids, current_row_ids, current_count * sizeof(row_t));
95
- if (!IsInlined()) {
96
- // delete the old data
97
- DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
98
- }
99
- // set up the new pointers
100
- rowids.ptr = new_allocation;
101
- return new_row_ids;
82
+ reference<LeafSegment> segment(LeafSegment::Get(art, row_ids.ptr));
83
+ row_ids.ptr.buffer_id += buffer_count;
84
+
85
+ auto ptr = segment.get().next;
86
+ while (ptr.IsSet()) {
87
+ segment.get().next.buffer_id += buffer_count;
88
+ segment = LeafSegment::Get(art, ptr);
89
+ ptr = segment.get().next;
90
+ }
102
91
  }
103
92
 
104
- void Leaf::Insert(ART &art, row_t row_id) {
105
- auto capacity = GetCapacity();
106
- row_t *row_ids = GetRowIds();
107
- D_ASSERT(count <= capacity);
108
-
109
- if (count == capacity) {
110
- // grow array
111
- if (IsInlined()) {
112
- art.IncreaseMemorySize((capacity + 1) * sizeof(row_t));
113
- } else {
114
- art.IncreaseMemorySize(capacity * sizeof(row_t));
93
+ void Leaf::Merge(ART &art, Node &other) {
94
+
95
+ auto &other_leaf = Leaf::Get(art, other);
96
+
97
+ // copy inlined row ID
98
+ if (other_leaf.IsInlined()) {
99
+ Insert(art, other_leaf.row_ids.inlined);
100
+ Node::Free(art, other);
101
+ return;
102
+ }
103
+
104
+ // row ID was inlined, move to a new segment
105
+ if (IsInlined()) {
106
+ auto row_id = row_ids.inlined;
107
+ auto &segment = LeafSegment::New(art, row_ids.ptr);
108
+ segment.row_ids[0] = row_id;
109
+ }
110
+
111
+ // get the first segment to copy to
112
+ reference<LeafSegment> segment(LeafSegment::Get(art, row_ids.ptr).GetTail(art));
113
+
114
+ // initialize loop variables
115
+ auto other_ptr = other_leaf.row_ids.ptr;
116
+ auto remaining = other_leaf.count;
117
+
118
+ // copy row IDs
119
+ while (other_ptr.IsSet()) {
120
+ auto &other_segment = LeafSegment::Get(art, other_ptr);
121
+ auto copy_count = MinValue(Node::LEAF_SEGMENT_SIZE, remaining);
122
+
123
+ // copy the data
124
+ for (idx_t i = 0; i < copy_count; i++) {
125
+ segment = segment.get().Append(art, count, other_segment.row_ids[i]);
115
126
  }
116
- row_ids = Resize(row_ids, count, capacity * 2);
127
+
128
+ // adjust the loop variables
129
+ other_ptr = other_segment.next;
130
+ remaining -= copy_count;
117
131
  }
118
- // insert new row ID
119
- row_ids[count++] = row_id;
132
+ D_ASSERT(remaining == 0);
133
+
134
+ Node::Free(art, other);
120
135
  }
121
136
 
122
- void Leaf::Remove(ART &art, row_t row_id) {
123
- idx_t entry_offset = DConstants::INVALID_INDEX;
124
- row_t *row_ids = GetRowIds();
137
+ void Leaf::Insert(ART &art, const row_t row_id) {
125
138
 
126
- // find the row ID in the leaf
127
- for (idx_t i = 0; i < count; i++) {
128
- if (row_ids[i] == row_id) {
129
- entry_offset = i;
130
- break;
131
- }
139
+ if (count == 0) {
140
+ row_ids.inlined = row_id;
141
+ count++;
142
+ return;
132
143
  }
133
144
 
134
- // didn't find the row ID
135
- if (entry_offset == DConstants::INVALID_INDEX) {
145
+ if (count == 1) {
146
+ MoveInlinedToSegment(art);
147
+ }
148
+
149
+ // append to the tail
150
+ auto &first_segment = LeafSegment::Get(art, row_ids.ptr);
151
+ auto &tail = first_segment.GetTail(art);
152
+ tail.Append(art, count, row_id);
153
+ }
154
+
155
+ void Leaf::Remove(ART &art, const row_t row_id) {
156
+
157
+ if (count == 0) {
136
158
  return;
137
159
  }
138
160
 
139
- // now empty leaf
140
161
  if (IsInlined()) {
141
- D_ASSERT(count == 1);
162
+ if (row_ids.inlined == row_id) {
163
+ count--;
164
+ }
165
+ return;
166
+ }
167
+
168
+ // possibly inline the row ID
169
+ if (count == 2) {
170
+ auto &segment = LeafSegment::Get(art, row_ids.ptr);
171
+ if (segment.row_ids[0] != row_id && segment.row_ids[1] != row_id) {
172
+ return;
173
+ }
174
+
175
+ auto remaining_row_id = segment.row_ids[0] == row_id ? segment.row_ids[1] : segment.row_ids[0];
176
+ Node::Free(art, row_ids.ptr);
177
+ row_ids.inlined = remaining_row_id;
142
178
  count--;
143
179
  return;
144
180
  }
145
181
 
146
- auto capacity = GetCapacity();
147
- count--;
148
- if (count == 1) {
149
- // after erasing we can now inline the leaf
150
- // delete the pointer and inline the remaining rowid
151
- auto remaining_row_id = row_ids[0] == row_id ? row_ids[1] : row_ids[0];
152
- DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
153
- rowids.inlined = remaining_row_id;
154
- art.DecreaseMemorySize(capacity * sizeof(row_t));
182
+ // find the row ID, and the segment containing that row ID (stored in ptr)
183
+ auto ptr = row_ids.ptr;
184
+ auto copy_idx = FindRowId(art, ptr, row_id);
185
+ if (copy_idx == (uint32_t)DConstants::INVALID_INDEX) {
155
186
  return;
156
187
  }
188
+ copy_idx++;
189
+
190
+ // iterate all remaining segments and move the row IDs one field to the left
191
+ reference<LeafSegment> segment(LeafSegment::Get(art, ptr));
192
+ reference<LeafSegment> prev_segment(LeafSegment::Get(art, ptr));
193
+ while (copy_idx < count) {
157
194
 
158
- // shrink array, if less than half full
159
- capacity = GetCapacity();
160
- if (capacity > 2 && count < capacity / 2) {
195
+ // calculate the copy count
196
+ auto copy_count = count - copy_idx;
197
+ if (Node::LEAF_SEGMENT_SIZE - 1 < copy_count) {
198
+ copy_count = Node::LEAF_SEGMENT_SIZE - 1;
199
+ }
161
200
 
162
- auto new_capacity = capacity / 2;
163
- art.DecreaseMemorySize((capacity - new_capacity) * sizeof(row_t));
201
+ // copy row IDs
202
+ D_ASSERT((copy_idx % Node::LEAF_SEGMENT_SIZE) != 0);
203
+ for (idx_t i = copy_idx % Node::LEAF_SEGMENT_SIZE; i <= copy_count; i++) {
204
+ segment.get().row_ids[i - 1] = segment.get().row_ids[i];
205
+ copy_idx++;
206
+ }
207
+
208
+ // adjust loop variables
209
+ if (segment.get().next.IsSet()) {
210
+ prev_segment = segment;
211
+ segment = LeafSegment::Get(art, segment.get().next);
212
+ // this segment has at least one element, and we need to copy it into the previous segment
213
+ prev_segment.get().row_ids[Node::LEAF_SEGMENT_SIZE - 1] = segment.get().row_ids[0];
214
+ copy_idx++;
215
+ }
216
+ }
164
217
 
165
- auto new_allocation = AllocateArray<row_t>(new_capacity + 1);
166
- new_allocation[0] = new_capacity;
218
+ // this evaluates to true, if we need to delete the last segment
219
+ if (count % Node::LEAF_SEGMENT_SIZE == 1) {
220
+ ptr = row_ids.ptr;
221
+ while (ptr.IsSet()) {
167
222
 
168
- auto new_row_ids = new_allocation + 1;
169
- memcpy(new_row_ids, row_ids, entry_offset * sizeof(row_t));
170
- memcpy(new_row_ids + entry_offset, row_ids + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
223
+ // get the segment succeeding the current segment
224
+ auto &current_segment = LeafSegment::Get(art, ptr);
225
+ D_ASSERT(current_segment.next.IsSet());
226
+ auto &next_segment = LeafSegment::Get(art, current_segment.next);
171
227
 
172
- DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
173
- rowids.ptr = new_allocation;
228
+ // next_segment is the tail of the segment list
229
+ if (!next_segment.next.IsSet()) {
230
+ Node::Free(art, current_segment.next);
231
+ }
174
232
 
175
- } else {
176
- // move the trailing row IDs (after entry_offset)
177
- memmove(row_ids + entry_offset, row_ids + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
233
+ // adjust loop variables
234
+ ptr = current_segment.next;
235
+ }
178
236
  }
237
+ count--;
179
238
  }
180
239
 
181
- string Leaf::ToString(Node *node) {
182
- Leaf *leaf = (Leaf *)node;
183
- string str = "Leaf: [";
184
- auto row_ids = leaf->GetRowIds();
185
- for (idx_t i = 0; i < leaf->count; i++) {
186
- str += i == 0 ? to_string(row_ids[i]) : ", " + to_string(row_ids[i]);
240
+ row_t Leaf::GetRowId(const ART &art, const idx_t position) const {
241
+
242
+ D_ASSERT(position < count);
243
+ if (IsInlined()) {
244
+ return row_ids.inlined;
187
245
  }
188
- return str + "]";
246
+
247
+ // get the correct segment
248
+ reference<LeafSegment> segment(LeafSegment::Get(art, row_ids.ptr));
249
+ for (idx_t i = 0; i < position / Node::LEAF_SEGMENT_SIZE; i++) {
250
+ D_ASSERT(segment.get().next.IsSet());
251
+ segment = LeafSegment::Get(art, segment.get().next);
252
+ }
253
+
254
+ return segment.get().row_ids[position % Node::LEAF_SEGMENT_SIZE];
189
255
  }
190
256
 
191
- void Leaf::Merge(ART &art, Node *&l_node, Node *&r_node) {
192
- Leaf *l_n = (Leaf *)l_node;
193
- Leaf *r_n = (Leaf *)r_node;
257
+ uint32_t Leaf::FindRowId(const ART &art, Node &ptr, const row_t row_id) const {
194
258
 
195
- auto l_capacity = l_n->GetCapacity();
196
- auto l_row_ids = l_n->GetRowIds();
197
- auto r_row_ids = r_n->GetRowIds();
259
+ D_ASSERT(!IsInlined());
198
260
 
199
- if (l_n->count + r_n->count > l_capacity) {
200
- auto capacity = l_n->GetCapacity();
201
- auto new_capacity = NextPowerOfTwo(l_n->count + r_n->count);
202
- art.IncreaseMemorySize(sizeof(row_t) * (new_capacity - capacity));
203
- l_row_ids = l_n->Resize(l_row_ids, l_n->count, new_capacity);
261
+ auto remaining = count;
262
+ while (ptr.IsSet()) {
263
+
264
+ auto &segment = LeafSegment::Get(art, ptr);
265
+ auto search_count = MinValue(Node::LEAF_SEGMENT_SIZE, remaining);
266
+
267
+ // search in this segment
268
+ for (idx_t i = 0; i < search_count; i++) {
269
+ if (segment.row_ids[i] == row_id) {
270
+ return count - remaining + i;
271
+ }
272
+ }
273
+
274
+ // adjust loop variables
275
+ remaining -= search_count;
276
+ ptr = segment.next;
204
277
  }
278
+ return (uint32_t)DConstants::INVALID_INDEX;
279
+ }
280
+
281
+ string Leaf::ToString(const ART &art) const {
205
282
 
206
- // append row_ids to l_n
207
- memcpy(l_row_ids + l_n->count, r_row_ids, r_n->count * sizeof(row_t));
208
- l_n->count += r_n->count;
283
+ if (IsInlined()) {
284
+ return "Leaf (" + to_string(count) + "): [" + to_string(row_ids.inlined) + "]";
285
+ }
286
+
287
+ auto ptr = row_ids.ptr;
288
+ auto remaining = count;
289
+ string str = "";
290
+ uint32_t this_count = 0;
291
+ while (ptr.IsSet()) {
292
+ auto &segment = LeafSegment::Get(art, ptr);
293
+ auto to_string_count = Node::LEAF_SEGMENT_SIZE < remaining ? Node::LEAF_SEGMENT_SIZE : remaining;
294
+
295
+ for (idx_t i = 0; i < to_string_count; i++) {
296
+ str += ", " + to_string(segment.row_ids[i]);
297
+ this_count++;
298
+ }
299
+ remaining -= to_string_count;
300
+ ptr = segment.next;
301
+ }
302
+ return "Leaf (" + to_string(this_count) + ", " + to_string(count) + "): [" + str + "] \n";
209
303
  }
210
304
 
211
- BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
305
+ BlockPointer Leaf::Serialize(const ART &art, MetaBlockWriter &writer) const {
212
306
 
213
- auto ptr = writer.GetBlockPointer();
214
- writer.Write(type);
215
- prefix.Serialize(writer);
216
- writer.Write<uint16_t>(count);
307
+ // get pointer and write fields
308
+ auto block_pointer = writer.GetBlockPointer();
309
+ writer.Write(NType::LEAF);
310
+ writer.Write<uint32_t>(count);
311
+ prefix.Serialize(art, writer);
217
312
 
218
- auto row_ids = GetRowIds();
219
- for (idx_t i = 0; i < count; i++) {
220
- writer.Write(row_ids[i]);
313
+ if (IsInlined()) {
314
+ writer.Write(row_ids.inlined);
315
+ return block_pointer;
221
316
  }
222
- return ptr;
317
+
318
+ D_ASSERT(row_ids.ptr.IsSet());
319
+ auto ptr = row_ids.ptr;
320
+ auto remaining = count;
321
+
322
+ // iterate all leaf segments and write their row IDs
323
+ while (ptr.IsSet()) {
324
+ auto &segment = LeafSegment::Get(art, ptr);
325
+ auto write_count = MinValue(Node::LEAF_SEGMENT_SIZE, remaining);
326
+
327
+ // write the row IDs
328
+ for (idx_t i = 0; i < write_count; i++) {
329
+ writer.Write(segment.row_ids[i]);
330
+ }
331
+
332
+ // adjust loop variables
333
+ remaining -= write_count;
334
+ ptr = segment.next;
335
+ }
336
+ D_ASSERT(remaining == 0);
337
+
338
+ return block_pointer;
223
339
  }
224
340
 
225
341
  void Leaf::Deserialize(ART &art, MetaBlockReader &reader) {
226
342
 
227
- prefix.Deserialize(reader);
228
- count = reader.Read<uint16_t>();
229
- if (count == 1) {
230
- // inlined
231
- auto row_id = reader.Read<row_t>();
232
- rowids.inlined = row_id;
233
-
234
- } else {
235
- // non-inlined
236
- auto row_ids = AllocateArray<row_t>(count + 1);
237
- row_ids[0] = count;
238
- for (idx_t i = 0; i < count; i++) {
239
- row_ids[i + 1] = reader.Read<row_t>();
343
+ auto count_p = reader.Read<uint32_t>();
344
+ prefix.Deserialize(art, reader);
345
+
346
+ // inlined
347
+ if (count_p == 1) {
348
+ row_ids.inlined = reader.Read<row_t>();
349
+ count = count_p;
350
+ return;
351
+ }
352
+
353
+ // copy into segments
354
+ count = 0;
355
+ reference<LeafSegment> segment(LeafSegment::New(art, row_ids.ptr));
356
+ for (idx_t i = 0; i < count_p; i++) {
357
+ segment = segment.get().Append(art, count, reader.Read<row_t>());
358
+ }
359
+ D_ASSERT(count_p == count);
360
+ }
361
+
362
+ void Leaf::Vacuum(ART &art) {
363
+
364
+ if (IsInlined()) {
365
+ return;
366
+ }
367
+
368
+ // first pointer has special treatment because we don't obtain it from a leaf segment
369
+ auto &allocator = Node::GetAllocator(art, NType::LEAF_SEGMENT);
370
+ if (allocator.NeedsVacuum(row_ids.ptr)) {
371
+ row_ids.ptr.SetPtr(allocator.VacuumPointer(row_ids.ptr));
372
+ }
373
+
374
+ auto ptr = row_ids.ptr;
375
+ while (ptr.IsSet()) {
376
+ auto &segment = LeafSegment::Get(art, ptr);
377
+ ptr = segment.next;
378
+ if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
379
+ segment.next.SetPtr(allocator.VacuumPointer(ptr));
380
+ ptr = segment.next;
240
381
  }
241
- rowids.ptr = row_ids;
242
382
  }
243
383
  }
244
384
 
385
+ void Leaf::MoveInlinedToSegment(ART &art) {
386
+
387
+ D_ASSERT(IsInlined());
388
+
389
+ auto row_id = row_ids.inlined;
390
+ auto &segment = LeafSegment::New(art, row_ids.ptr);
391
+ segment.row_ids[0] = row_id;
392
+ }
393
+
245
394
  } // namespace duckdb
@@ -0,0 +1,42 @@
1
+ #include "duckdb/execution/index/art/leaf_segment.hpp"
2
+
3
+ #include "duckdb/execution/index/art/art.hpp"
4
+ #include "duckdb/execution/index/art/node.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ LeafSegment &LeafSegment::New(ART &art, Node &node) {
9
+
10
+ node.SetPtr(Node::GetAllocator(art, NType::LEAF_SEGMENT).New());
11
+ node.type = (uint8_t)NType::LEAF_SEGMENT;
12
+
13
+ auto &segment = LeafSegment::Get(art, node);
14
+ segment.next.Reset();
15
+ return segment;
16
+ }
17
+
18
+ LeafSegment &LeafSegment::Append(ART &art, uint32_t &count, const row_t row_id) {
19
+
20
+ reference<LeafSegment> segment(*this);
21
+ auto position = count % Node::LEAF_SEGMENT_SIZE;
22
+
23
+ // we need a new segment
24
+ if (position == 0 && count != 0) {
25
+ segment = LeafSegment::New(art, next);
26
+ }
27
+
28
+ segment.get().row_ids[position] = row_id;
29
+ count++;
30
+ return segment.get();
31
+ }
32
+
33
+ LeafSegment &LeafSegment::GetTail(const ART &art) {
34
+
35
+ reference<LeafSegment> segment(*this);
36
+ while (segment.get().next.IsSet()) {
37
+ segment = LeafSegment::Get(art, segment.get().next);
38
+ }
39
+ return segment.get();
40
+ }
41
+
42
+ } // namespace duckdb