duckdb 0.7.2-dev2552.0 → 0.7.2-dev2675.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/package.json +2 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +286 -269
- package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
- package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
- package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/node.cpp +444 -379
- package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
- package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
- package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
- package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
- package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
- package/src/duckdb/src/function/table/read_csv.cpp +5 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
- package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
- package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +26 -0
- package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
- package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
- package/src/duckdb/src/storage/data_table.cpp +6 -3
- package/src/duckdb/src/storage/index.cpp +18 -6
- package/src/duckdb/src/storage/local_storage.cpp +8 -2
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
- package/src/duckdb/src/storage/wal_replay.cpp +1 -1
- package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
- package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,31 +1,34 @@
|
|
1
1
|
#include "duckdb/execution/index/art/iterator.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/common/limits.hpp"
|
3
4
|
#include "duckdb/execution/index/art/art.hpp"
|
5
|
+
#include "duckdb/execution/index/art/node.hpp"
|
6
|
+
#include "duckdb/execution/index/art/prefix.hpp"
|
4
7
|
|
5
8
|
namespace duckdb {
|
6
|
-
uint8_t &IteratorCurrentKey::operator[](idx_t idx) {
|
7
|
-
if (idx >= key.size()) {
|
8
|
-
key.push_back(0);
|
9
|
-
}
|
10
|
-
D_ASSERT(idx < key.size());
|
11
|
-
return key[idx];
|
12
|
-
}
|
13
9
|
|
14
|
-
|
15
|
-
void IteratorCurrentKey::Push(uint8_t byte) {
|
10
|
+
void IteratorCurrentKey::Push(const uint8_t byte) {
|
16
11
|
if (cur_key_pos == key.size()) {
|
17
12
|
key.push_back(byte);
|
18
13
|
}
|
19
14
|
D_ASSERT(cur_key_pos < key.size());
|
20
15
|
key[cur_key_pos++] = byte;
|
21
16
|
}
|
22
|
-
|
23
|
-
void IteratorCurrentKey::Pop(idx_t n) {
|
17
|
+
|
18
|
+
void IteratorCurrentKey::Pop(const idx_t n) {
|
24
19
|
cur_key_pos -= n;
|
25
20
|
D_ASSERT(cur_key_pos <= key.size());
|
26
21
|
}
|
27
22
|
|
28
|
-
|
23
|
+
uint8_t &IteratorCurrentKey::operator[](idx_t idx) {
|
24
|
+
if (idx >= key.size()) {
|
25
|
+
key.push_back(0);
|
26
|
+
}
|
27
|
+
D_ASSERT(idx < key.size());
|
28
|
+
return key[idx];
|
29
|
+
}
|
30
|
+
|
31
|
+
bool IteratorCurrentKey::operator>(const ARTKey &k) const {
|
29
32
|
for (idx_t i = 0; i < MinValue<idx_t>(cur_key_pos, k.len); i++) {
|
30
33
|
if (key[i] > k.data[i]) {
|
31
34
|
return true;
|
@@ -36,7 +39,7 @@ bool IteratorCurrentKey::operator>(const Key &k) const {
|
|
36
39
|
return cur_key_pos > k.len;
|
37
40
|
}
|
38
41
|
|
39
|
-
bool IteratorCurrentKey::operator>=(const
|
42
|
+
bool IteratorCurrentKey::operator>=(const ARTKey &k) const {
|
40
43
|
for (idx_t i = 0; i < MinValue<idx_t>(cur_key_pos, k.len); i++) {
|
41
44
|
if (key[i] > k.data[i]) {
|
42
45
|
return true;
|
@@ -47,7 +50,7 @@ bool IteratorCurrentKey::operator>=(const Key &k) const {
|
|
47
50
|
return cur_key_pos >= k.len;
|
48
51
|
}
|
49
52
|
|
50
|
-
bool IteratorCurrentKey::operator==(const
|
53
|
+
bool IteratorCurrentKey::operator==(const ARTKey &k) const {
|
51
54
|
if (cur_key_pos != k.len) {
|
52
55
|
return false;
|
53
56
|
}
|
@@ -60,96 +63,76 @@ bool IteratorCurrentKey::operator==(const Key &k) const {
|
|
60
63
|
}
|
61
64
|
|
62
65
|
void Iterator::FindMinimum(Node &node) {
|
63
|
-
|
64
|
-
idx_t pos = 0;
|
66
|
+
|
65
67
|
// reconstruct the prefix
|
66
|
-
|
67
|
-
|
68
|
+
// FIXME: get all bytes at once to increase performance
|
69
|
+
auto &node_prefix = node.GetPrefix(*art);
|
70
|
+
for (idx_t i = 0; i < node_prefix.count; i++) {
|
71
|
+
cur_key.Push(node_prefix.GetByte(*art, i));
|
68
72
|
}
|
69
|
-
|
70
|
-
|
71
|
-
|
73
|
+
|
74
|
+
// found the minimum
|
75
|
+
if (node.DecodeARTNodeType() == NType::LEAF) {
|
76
|
+
last_leaf = Node::GetAllocator(*art, NType::LEAF).Get<Leaf>(node);
|
72
77
|
return;
|
73
|
-
case NodeType::N4: {
|
74
|
-
next = ((Node4 &)node).children[0].Unswizzle(*art);
|
75
|
-
cur_key.Push(((Node4 &)node).key[0]);
|
76
|
-
break;
|
77
|
-
}
|
78
|
-
case NodeType::N16: {
|
79
|
-
next = ((Node16 &)node).children[0].Unswizzle(*art);
|
80
|
-
cur_key.Push(((Node16 &)node).key[0]);
|
81
|
-
break;
|
82
|
-
}
|
83
|
-
case NodeType::N48: {
|
84
|
-
auto &n48 = (Node48 &)node;
|
85
|
-
while (n48.child_index[pos] == Node::EMPTY_MARKER) {
|
86
|
-
pos++;
|
87
|
-
}
|
88
|
-
cur_key.Push(pos);
|
89
|
-
next = n48.children[n48.child_index[pos]].Unswizzle(*art);
|
90
|
-
break;
|
91
|
-
}
|
92
|
-
case NodeType::N256: {
|
93
|
-
auto &n256 = (Node256 &)node;
|
94
|
-
while (!n256.children[pos]) {
|
95
|
-
pos++;
|
96
|
-
}
|
97
|
-
cur_key.Push(pos);
|
98
|
-
next = (Node *)n256.children[pos].Unswizzle(*art);
|
99
|
-
break;
|
100
|
-
}
|
101
78
|
}
|
102
|
-
|
79
|
+
|
80
|
+
// go to the leftmost entry in the current node
|
81
|
+
uint8_t byte = 0;
|
82
|
+
auto next = node.GetNextChild(*art, byte);
|
83
|
+
D_ASSERT(next);
|
84
|
+
cur_key.Push(byte);
|
85
|
+
|
86
|
+
// recurse
|
87
|
+
nodes.emplace(node, byte);
|
103
88
|
FindMinimum(*next);
|
104
89
|
}
|
105
90
|
|
106
|
-
void Iterator::PushKey(Node
|
107
|
-
|
108
|
-
|
109
|
-
cur_key.Push(((Node4 *)cur_node)->key[pos]);
|
110
|
-
break;
|
111
|
-
case NodeType::N16:
|
112
|
-
cur_key.Push(((Node16 *)cur_node)->key[pos]);
|
113
|
-
break;
|
114
|
-
case NodeType::N48:
|
115
|
-
case NodeType::N256:
|
116
|
-
cur_key.Push(pos);
|
117
|
-
break;
|
118
|
-
case NodeType::NLeaf:
|
119
|
-
break;
|
91
|
+
void Iterator::PushKey(const Node &node, const uint8_t byte) {
|
92
|
+
if (node.DecodeARTNodeType() != NType::LEAF) {
|
93
|
+
cur_key.Push(byte);
|
120
94
|
}
|
121
95
|
}
|
122
96
|
|
123
|
-
bool Iterator::Scan(
|
97
|
+
bool Iterator::Scan(const ARTKey &key, const idx_t &max_count, vector<row_t> &result_ids, const bool &is_inclusive) {
|
98
|
+
|
124
99
|
bool has_next;
|
125
100
|
do {
|
126
|
-
if (!
|
101
|
+
if (!key.Empty()) {
|
102
|
+
// no more row IDs within the key bounds
|
127
103
|
if (is_inclusive) {
|
128
|
-
if (cur_key >
|
129
|
-
|
104
|
+
if (cur_key > key) {
|
105
|
+
return true;
|
130
106
|
}
|
131
107
|
} else {
|
132
|
-
if (cur_key >=
|
133
|
-
|
108
|
+
if (cur_key >= key) {
|
109
|
+
return true;
|
134
110
|
}
|
135
111
|
}
|
136
112
|
}
|
113
|
+
|
114
|
+
// adding more elements would exceed the max count
|
137
115
|
if (result_ids.size() + last_leaf->count > max_count) {
|
138
|
-
// adding these elements would exceed the max count
|
139
116
|
return false;
|
140
117
|
}
|
118
|
+
|
119
|
+
// FIXME: copy all at once to improve performance
|
141
120
|
for (idx_t i = 0; i < last_leaf->count; i++) {
|
142
|
-
row_t row_id = last_leaf->GetRowId(i);
|
121
|
+
row_t row_id = last_leaf->GetRowId(*art, i);
|
143
122
|
result_ids.push_back(row_id);
|
144
123
|
}
|
124
|
+
|
125
|
+
// get the next leaf
|
145
126
|
has_next = Next();
|
127
|
+
|
146
128
|
} while (has_next);
|
129
|
+
|
147
130
|
return true;
|
148
131
|
}
|
149
132
|
|
150
133
|
void Iterator::PopNode() {
|
151
134
|
auto cur_node = nodes.top();
|
152
|
-
idx_t elements_to_pop = cur_node.node
|
135
|
+
idx_t elements_to_pop = cur_node.node.GetPrefix(*art).count + (nodes.size() != 1);
|
153
136
|
cur_key.Pop(elements_to_pop);
|
154
137
|
nodes.pop();
|
155
138
|
}
|
@@ -157,79 +140,110 @@ void Iterator::PopNode() {
|
|
157
140
|
bool Iterator::Next() {
|
158
141
|
if (!nodes.empty()) {
|
159
142
|
auto cur_node = nodes.top().node;
|
160
|
-
if (cur_node
|
161
|
-
//
|
143
|
+
if (cur_node.DecodeARTNodeType() == NType::LEAF) {
|
144
|
+
// pop leaf
|
145
|
+
// we must pop the prefix size + the key to the node, unless we are popping the root
|
162
146
|
PopNode();
|
163
147
|
}
|
164
148
|
}
|
165
149
|
|
166
|
-
//
|
150
|
+
// look for the next leaf
|
167
151
|
while (!nodes.empty()) {
|
152
|
+
|
168
153
|
// cur_node
|
169
154
|
auto &top = nodes.top();
|
170
|
-
Node
|
171
|
-
|
172
|
-
|
173
|
-
|
155
|
+
Node node = top.node;
|
156
|
+
|
157
|
+
// found a leaf: move to next node
|
158
|
+
if (node.DecodeARTNodeType() == NType::LEAF) {
|
159
|
+
last_leaf = Node::GetAllocator(*art, NType::LEAF).Get<Leaf>(node);
|
174
160
|
return true;
|
175
161
|
}
|
176
|
-
|
177
|
-
|
178
|
-
if (top.
|
179
|
-
//
|
180
|
-
|
181
|
-
|
162
|
+
|
163
|
+
// find next node
|
164
|
+
if (top.byte == NumericLimits<uint8_t>::Maximum()) {
|
165
|
+
// no node found: move up the tree, pop prefix and key of current node
|
166
|
+
PopNode();
|
167
|
+
continue;
|
168
|
+
}
|
169
|
+
|
170
|
+
top.byte == 0 ? top.byte : top.byte++;
|
171
|
+
auto next_node = node.GetNextChild(*art, top.byte);
|
172
|
+
|
173
|
+
if (next_node) {
|
174
|
+
// add the next node's key byte
|
175
|
+
PushKey(node, top.byte);
|
176
|
+
|
182
177
|
// add prefix of new node
|
183
|
-
|
184
|
-
|
178
|
+
// FIXME: get all bytes at once to increase performance
|
179
|
+
auto &next_node_prefix = next_node->GetPrefix(*art);
|
180
|
+
for (idx_t i = 0; i < next_node_prefix.count; i++) {
|
181
|
+
cur_key.Push(next_node_prefix.GetByte(*art, i));
|
185
182
|
}
|
183
|
+
|
186
184
|
// next node found: push it
|
187
|
-
nodes.
|
185
|
+
nodes.emplace(*next_node, 0);
|
188
186
|
} else {
|
189
|
-
|
187
|
+
|
188
|
+
// no node found: move up the tree, pop prefix and key of current node
|
190
189
|
PopNode();
|
191
190
|
}
|
192
191
|
}
|
193
192
|
return false;
|
194
193
|
}
|
195
194
|
|
196
|
-
bool Iterator::LowerBound(Node
|
197
|
-
|
198
|
-
if (!node) {
|
195
|
+
bool Iterator::LowerBound(Node node, const ARTKey &key, const bool &is_inclusive) {
|
196
|
+
|
197
|
+
if (!node.IsSet()) {
|
199
198
|
return false;
|
200
199
|
}
|
200
|
+
|
201
201
|
idx_t depth = 0;
|
202
|
+
bool equal = true;
|
202
203
|
while (true) {
|
203
|
-
|
204
|
+
|
205
|
+
nodes.emplace(node, 0);
|
204
206
|
auto &top = nodes.top();
|
207
|
+
|
205
208
|
// reconstruct the prefix
|
206
|
-
|
207
|
-
|
209
|
+
// FIXME: get all bytes at once to increase performance
|
210
|
+
reference<Prefix> node_prefix(top.node.GetPrefix(*art));
|
211
|
+
for (idx_t i = 0; i < node_prefix.get().count; i++) {
|
212
|
+
cur_key.Push(node_prefix.get().GetByte(*art, i));
|
208
213
|
}
|
214
|
+
|
209
215
|
// greater case: find leftmost leaf node directly
|
210
216
|
if (!equal) {
|
211
|
-
while (node
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
217
|
+
while (node.DecodeARTNodeType() != NType::LEAF) {
|
218
|
+
|
219
|
+
uint8_t byte = 0;
|
220
|
+
auto next_node = *node.GetNextChild(*art, byte);
|
221
|
+
D_ASSERT(next_node.IsSet());
|
222
|
+
|
223
|
+
PushKey(node, byte);
|
224
|
+
nodes.emplace(node, byte);
|
225
|
+
node = next_node;
|
226
|
+
|
216
227
|
// reconstruct the prefix
|
217
|
-
|
218
|
-
|
228
|
+
node_prefix = node.GetPrefix(*art);
|
229
|
+
for (idx_t i = 0; i < node_prefix.get().count; i++) {
|
230
|
+
cur_key.Push(node_prefix.get().GetByte(*art, i));
|
219
231
|
}
|
232
|
+
|
220
233
|
auto &c_top = nodes.top();
|
221
234
|
c_top.node = node;
|
222
235
|
}
|
223
236
|
}
|
224
|
-
|
237
|
+
|
238
|
+
if (node.DecodeARTNodeType() == NType::LEAF) {
|
225
239
|
// found a leaf node: check if it is bigger or equal than the current key
|
226
|
-
|
227
|
-
|
240
|
+
last_leaf = Node::GetAllocator(*art, NType::LEAF).Get<Leaf>(node);
|
241
|
+
|
228
242
|
// if the search is not inclusive the leaf node could still be equal to the current value
|
229
243
|
// check if leaf is equal to the current key
|
230
244
|
if (cur_key == key) {
|
231
245
|
// if it's not inclusive check if there is a next leaf
|
232
|
-
if (!
|
246
|
+
if (!is_inclusive && !Next()) {
|
233
247
|
return false;
|
234
248
|
} else {
|
235
249
|
return true;
|
@@ -248,33 +262,38 @@ bool Iterator::LowerBound(Node *node, Key &key, bool inclusive) {
|
|
248
262
|
|
249
263
|
return Next();
|
250
264
|
}
|
265
|
+
|
251
266
|
// equal case:
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
267
|
+
node_prefix = node.GetPrefix(*art);
|
268
|
+
auto mismatch_pos = node_prefix.get().KeyMismatchPosition(*art, key, depth);
|
269
|
+
if (mismatch_pos != node_prefix.get().count) {
|
270
|
+
if (node_prefix.get().GetByte(*art, mismatch_pos) < key[depth + mismatch_pos]) {
|
271
|
+
// less
|
256
272
|
PopNode();
|
257
273
|
return Next();
|
258
|
-
} else {
|
259
|
-
// Greater
|
260
|
-
top.pos = DConstants::INVALID_INDEX;
|
261
|
-
return Next();
|
262
274
|
}
|
275
|
+
// greater
|
276
|
+
top.byte = 0;
|
277
|
+
return Next();
|
263
278
|
}
|
264
279
|
|
265
280
|
// prefix matches, search inside the child for the key
|
266
|
-
depth +=
|
281
|
+
depth += node_prefix.get().count;
|
282
|
+
top.byte = key[depth];
|
283
|
+
auto child = node.GetNextChild(*art, top.byte);
|
284
|
+
equal = key[depth] == top.byte;
|
267
285
|
|
268
|
-
|
269
|
-
//
|
270
|
-
|
271
|
-
if (top.pos == DConstants::INVALID_INDEX) {
|
286
|
+
// the maximum key byte of the current node is less than the key
|
287
|
+
// fall back to the previous node
|
288
|
+
if (!child) {
|
272
289
|
PopNode();
|
273
290
|
return Next();
|
274
291
|
}
|
275
|
-
|
276
|
-
node
|
277
|
-
|
292
|
+
|
293
|
+
PushKey(node, top.byte);
|
294
|
+
node = *child;
|
295
|
+
|
296
|
+
// all children of this node qualify as greater or equal
|
278
297
|
depth++;
|
279
298
|
}
|
280
299
|
}
|