duckdb 0.8.2-dev2399.0 → 0.8.2-dev2669.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +5 -0
- package/src/duckdb/src/common/enum_util.cpp +35 -1
- package/src/duckdb/src/common/http_state.cpp +78 -0
- package/src/duckdb/src/core_functions/function_list.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
- package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +43 -31
- package/src/duckdb/src/execution/index/art/leaf.cpp +47 -33
- package/src/duckdb/src/execution/index/art/node.cpp +31 -24
- package/src/duckdb/src/execution/index/art/prefix.cpp +100 -16
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +54 -31
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +32 -15
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
- package/src/duckdb/src/function/table/arrow.cpp +95 -92
- package/src/duckdb/src/function/table/arrow_conversion.cpp +45 -68
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +7 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +9 -11
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +8 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +15 -14
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
- package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +17 -1
- package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +20 -5
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
- package/src/duckdb/src/include/duckdb.h +16 -0
- package/src/duckdb/src/main/capi/pending-c.cpp +6 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +52 -4
- package/src/duckdb/src/main/client_context.cpp +27 -17
- package/src/duckdb/src/main/client_verify.cpp +17 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
- package/src/duckdb/src/main/prepared_statement.cpp +38 -11
- package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -7
- package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
- package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
- package/src/duckdb/src/parser/transformer.cpp +27 -9
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +13 -13
- package/src/duckdb/src/planner/planner.cpp +7 -6
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +2 -2
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12855 -12282
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
@@ -42,19 +42,30 @@ void Leaf::New(ART &art, reference<Node> &node, const row_t *row_ids, idx_t coun
|
|
42
42
|
|
43
43
|
void Leaf::Free(ART &art, Node &node) {
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
Node current_node = node;
|
46
|
+
Node next_node;
|
47
|
+
while (current_node.IsSet() && !current_node.IsSerialized()) {
|
48
|
+
next_node = Leaf::Get(art, current_node).ptr;
|
49
|
+
Node::GetAllocator(art, NType::LEAF).Free(current_node);
|
50
|
+
current_node = next_node;
|
51
|
+
}
|
52
|
+
|
53
|
+
node.Reset();
|
48
54
|
}
|
49
55
|
|
50
56
|
void Leaf::InitializeMerge(ART &art, Node &node, const ARTFlags &flags) {
|
51
57
|
|
52
|
-
|
53
|
-
D_ASSERT(node.GetType() == NType::LEAF);
|
58
|
+
auto merge_buffer_count = flags.merge_buffer_counts[(uint8_t)NType::LEAF - 1];
|
54
59
|
|
55
|
-
|
56
|
-
|
57
|
-
|
60
|
+
Node next_node = node;
|
61
|
+
node.AddToBufferID(merge_buffer_count);
|
62
|
+
|
63
|
+
while (next_node.IsSet()) {
|
64
|
+
auto &leaf = Leaf::Get(art, next_node);
|
65
|
+
next_node = leaf.ptr;
|
66
|
+
if (leaf.ptr.IsSet()) {
|
67
|
+
leaf.ptr.AddToBufferID(merge_buffer_count);
|
68
|
+
}
|
58
69
|
}
|
59
70
|
}
|
60
71
|
|
@@ -290,7 +301,6 @@ string Leaf::VerifyAndToString(ART &art, Node &node) {
|
|
290
301
|
return "Leaf [count: 1, row ID: " + to_string(node.GetRowId()) + "]";
|
291
302
|
}
|
292
303
|
|
293
|
-
// NOTE: we could do this recursively, but the function-call overhead can become kinda crazy
|
294
304
|
string str = "";
|
295
305
|
|
296
306
|
reference<Node> node_ref(node);
|
@@ -322,46 +332,51 @@ BlockPointer Leaf::Serialize(ART &art, Node &node, MetaBlockWriter &writer) {
|
|
322
332
|
return block_pointer;
|
323
333
|
}
|
324
334
|
|
325
|
-
// recurse into the child and retrieve its block pointer
|
326
|
-
auto &leaf = Leaf::Get(art, node);
|
327
|
-
auto child_block_pointer = leaf.ptr.Serialize(art, writer);
|
328
|
-
|
329
|
-
// get pointer and write fields
|
330
335
|
auto block_pointer = writer.GetBlockPointer();
|
331
336
|
writer.Write(NType::LEAF);
|
332
|
-
|
337
|
+
idx_t total_count = Leaf::TotalCount(art, node);
|
338
|
+
writer.Write<idx_t>(total_count);
|
333
339
|
|
334
|
-
// write row IDs
|
335
|
-
|
336
|
-
|
337
|
-
|
340
|
+
// iterate all leaves and write their row IDs
|
341
|
+
reference<Node> ref_node(node);
|
342
|
+
while (ref_node.get().IsSet()) {
|
343
|
+
D_ASSERT(!ref_node.get().IsSerialized());
|
344
|
+
auto &leaf = Leaf::Get(art, ref_node);
|
338
345
|
|
339
|
-
|
340
|
-
|
341
|
-
|
346
|
+
// write row IDs
|
347
|
+
for (idx_t i = 0; i < leaf.count; i++) {
|
348
|
+
writer.Write(leaf.row_ids[i]);
|
349
|
+
}
|
350
|
+
ref_node = leaf.ptr;
|
351
|
+
}
|
342
352
|
|
343
353
|
return block_pointer;
|
344
354
|
}
|
345
355
|
|
346
356
|
void Leaf::Deserialize(ART &art, Node &node, MetaBlockReader &reader) {
|
347
357
|
|
348
|
-
|
358
|
+
auto total_count = reader.Read<idx_t>();
|
359
|
+
reference<Node> ref_node(node);
|
349
360
|
|
350
|
-
|
351
|
-
|
361
|
+
while (total_count) {
|
362
|
+
ref_node.get() = Node::GetAllocator(art, NType::LEAF).New();
|
363
|
+
ref_node.get().SetType((uint8_t)NType::LEAF);
|
352
364
|
|
353
|
-
|
354
|
-
|
355
|
-
leaf.
|
356
|
-
|
365
|
+
auto &leaf = Leaf::Get(art, ref_node);
|
366
|
+
|
367
|
+
leaf.count = MinValue((idx_t)Node::LEAF_SIZE, total_count);
|
368
|
+
for (idx_t i = 0; i < leaf.count; i++) {
|
369
|
+
leaf.row_ids[i] = reader.Read<row_t>();
|
370
|
+
}
|
357
371
|
|
358
|
-
|
359
|
-
|
372
|
+
total_count -= leaf.count;
|
373
|
+
ref_node = leaf.ptr;
|
374
|
+
leaf.ptr.Reset();
|
375
|
+
}
|
360
376
|
}
|
361
377
|
|
362
378
|
void Leaf::Vacuum(ART &art, Node &node) {
|
363
379
|
|
364
|
-
// NOTE: we could do this recursively, but the function-call overhead can become kinda crazy
|
365
380
|
auto &allocator = Node::GetAllocator(art, NType::LEAF);
|
366
381
|
|
367
382
|
reference<Node> node_ref(node);
|
@@ -373,7 +388,6 @@ void Leaf::Vacuum(ART &art, Node &node) {
|
|
373
388
|
auto &leaf = Leaf::Get(art, node_ref);
|
374
389
|
node_ref = leaf.ptr;
|
375
390
|
}
|
376
|
-
return;
|
377
391
|
}
|
378
392
|
|
379
393
|
void Leaf::MoveInlinedToLeaf(ART &art, Node &node) {
|
@@ -61,7 +61,6 @@ void Node::New(ART &art, Node &node, const NType type) {
|
|
61
61
|
|
62
62
|
void Node::Free(ART &art, Node &node) {
|
63
63
|
|
64
|
-
// recursively free all nodes that are in-memory, and skip serialized and empty nodes
|
65
64
|
if (!node.IsSet()) {
|
66
65
|
return;
|
67
66
|
}
|
@@ -72,11 +71,11 @@ void Node::Free(ART &art, Node &node) {
|
|
72
71
|
auto type = node.GetType();
|
73
72
|
switch (type) {
|
74
73
|
case NType::PREFIX:
|
75
|
-
|
76
|
-
|
74
|
+
// iterative
|
75
|
+
return Prefix::Free(art, node);
|
77
76
|
case NType::LEAF:
|
78
|
-
|
79
|
-
|
77
|
+
// iterative
|
78
|
+
return Leaf::Free(art, node);
|
80
79
|
case NType::NODE_4:
|
81
80
|
Node4::Free(art, node);
|
82
81
|
break;
|
@@ -90,8 +89,7 @@ void Node::Free(ART &art, Node &node) {
|
|
90
89
|
Node256::Free(art, node);
|
91
90
|
break;
|
92
91
|
case NType::LEAF_INLINED:
|
93
|
-
node.Reset();
|
94
|
-
return;
|
92
|
+
return node.Reset();
|
95
93
|
}
|
96
94
|
|
97
95
|
Node::GetAllocator(art, type).Free(node);
|
@@ -236,8 +234,10 @@ BlockPointer Node::Serialize(ART &art, MetaBlockWriter &writer) {
|
|
236
234
|
|
237
235
|
switch (GetType()) {
|
238
236
|
case NType::PREFIX:
|
239
|
-
|
237
|
+
// iterative
|
238
|
+
return Prefix::Serialize(art, *this, writer);
|
240
239
|
case NType::LEAF:
|
240
|
+
// iterative
|
241
241
|
return Leaf::Serialize(art, *this, writer);
|
242
242
|
case NType::NODE_4:
|
243
243
|
return Node4::Get(art, *this).Serialize(art, writer);
|
@@ -263,19 +263,23 @@ void Node::Deserialize(ART &art) {
|
|
263
263
|
SetType(reader.Read<uint8_t>());
|
264
264
|
|
265
265
|
auto decoded_type = GetType();
|
266
|
+
|
267
|
+
// iterative functions
|
268
|
+
if (decoded_type == NType::PREFIX) {
|
269
|
+
return Prefix::Deserialize(art, *this, reader);
|
270
|
+
}
|
266
271
|
if (decoded_type == NType::LEAF_INLINED) {
|
267
|
-
SetRowId(reader.Read<row_t>());
|
268
|
-
|
272
|
+
return SetRowId(reader.Read<row_t>());
|
273
|
+
}
|
274
|
+
if (decoded_type == NType::LEAF) {
|
275
|
+
return Leaf::Deserialize(art, *this, reader);
|
269
276
|
}
|
270
277
|
|
271
278
|
*this = Node::GetAllocator(art, decoded_type).New();
|
272
279
|
SetType((uint8_t)decoded_type);
|
273
280
|
|
281
|
+
// recursive functions
|
274
282
|
switch (decoded_type) {
|
275
|
-
case NType::PREFIX:
|
276
|
-
return Prefix::Get(art, *this).Deserialize(reader);
|
277
|
-
case NType::LEAF:
|
278
|
-
return Leaf::Deserialize(art, *this, reader);
|
279
283
|
case NType::NODE_4:
|
280
284
|
return Node4::Get(art, *this).Deserialize(reader);
|
281
285
|
case NType::NODE_16:
|
@@ -363,7 +367,7 @@ NType Node::GetARTNodeTypeByCount(const idx_t count) {
|
|
363
367
|
}
|
364
368
|
|
365
369
|
FixedSizeAllocator &Node::GetAllocator(const ART &art, NType type) {
|
366
|
-
return *art.allocators[(uint8_t)type - 1];
|
370
|
+
return (*art.allocators)[(uint8_t)type - 1];
|
367
371
|
}
|
368
372
|
|
369
373
|
//===--------------------------------------------------------------------===//
|
@@ -377,11 +381,11 @@ void Node::InitializeMerge(ART &art, const ARTFlags &flags) {
|
|
377
381
|
|
378
382
|
switch (GetType()) {
|
379
383
|
case NType::PREFIX:
|
380
|
-
|
381
|
-
|
384
|
+
// iterative
|
385
|
+
return Prefix::InitializeMerge(art, *this, flags);
|
382
386
|
case NType::LEAF:
|
383
|
-
|
384
|
-
|
387
|
+
// iterative
|
388
|
+
return Leaf::InitializeMerge(art, *this, flags);
|
385
389
|
case NType::NODE_4:
|
386
390
|
Node4::Get(art, *this).InitializeMerge(art, flags);
|
387
391
|
break;
|
@@ -398,8 +402,7 @@ void Node::InitializeMerge(ART &art, const ARTFlags &flags) {
|
|
398
402
|
return;
|
399
403
|
}
|
400
404
|
|
401
|
-
|
402
|
-
data += flags.merge_buffer_counts[(uint8_t)GetType() - 1];
|
405
|
+
AddToBufferID(flags.merge_buffer_counts[(uint8_t)GetType() - 1]);
|
403
406
|
}
|
404
407
|
|
405
408
|
bool Node::Merge(ART &art, Node &other) {
|
@@ -572,11 +575,16 @@ void Node::Vacuum(ART &art, const ARTFlags &flags) {
|
|
572
575
|
}
|
573
576
|
|
574
577
|
auto node_type = GetType();
|
578
|
+
|
579
|
+
// iterative functions
|
580
|
+
if (node_type == NType::PREFIX) {
|
581
|
+
return Prefix::Vacuum(art, *this, flags);
|
582
|
+
}
|
575
583
|
if (node_type == NType::LEAF_INLINED) {
|
576
584
|
return;
|
577
585
|
}
|
578
586
|
if (node_type == NType::LEAF) {
|
579
|
-
if (flags.vacuum_flags[(uint8_t)
|
587
|
+
if (flags.vacuum_flags[(uint8_t)node_type - 1]) {
|
580
588
|
Leaf::Vacuum(art, *this);
|
581
589
|
}
|
582
590
|
return;
|
@@ -589,9 +597,8 @@ void Node::Vacuum(ART &art, const ARTFlags &flags) {
|
|
589
597
|
SetType((uint8_t)node_type);
|
590
598
|
}
|
591
599
|
|
600
|
+
// recursive functions
|
592
601
|
switch (node_type) {
|
593
|
-
case NType::PREFIX:
|
594
|
-
return Prefix::Get(art, *this).Vacuum(art, flags);
|
595
602
|
case NType::NODE_4:
|
596
603
|
return Node4::Get(art, *this).Vacuum(art, flags);
|
597
604
|
case NType::NODE_16:
|
@@ -55,9 +55,35 @@ void Prefix::New(ART &art, reference<Node> &node, const ARTKey &key, const uint3
|
|
55
55
|
|
56
56
|
void Prefix::Free(ART &art, Node &node) {
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
Node current_node = node;
|
59
|
+
Node next_node;
|
60
|
+
while (current_node.IsSet() && !current_node.IsSerialized() && current_node.GetType() == NType::PREFIX) {
|
61
|
+
next_node = Prefix::Get(art, current_node).ptr;
|
62
|
+
Node::GetAllocator(art, NType::PREFIX).Free(current_node);
|
63
|
+
current_node = next_node;
|
64
|
+
}
|
65
|
+
|
66
|
+
Node::Free(art, current_node);
|
67
|
+
node.Reset();
|
68
|
+
}
|
69
|
+
|
70
|
+
void Prefix::InitializeMerge(ART &art, Node &node, const ARTFlags &flags) {
|
71
|
+
|
72
|
+
auto merge_buffer_count = flags.merge_buffer_counts[(uint8_t)NType::PREFIX - 1];
|
73
|
+
|
74
|
+
Node next_node = node;
|
75
|
+
reference<Prefix> prefix = Prefix::Get(art, next_node);
|
76
|
+
|
77
|
+
while (next_node.GetType() == NType::PREFIX) {
|
78
|
+
next_node = prefix.get().ptr;
|
79
|
+
if (prefix.get().ptr.GetType() == NType::PREFIX) {
|
80
|
+
prefix.get().ptr.AddToBufferID(merge_buffer_count);
|
81
|
+
prefix = Prefix::Get(art, next_node);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
node.AddToBufferID(merge_buffer_count);
|
86
|
+
prefix.get().ptr.InitializeMerge(art, flags);
|
61
87
|
}
|
62
88
|
|
63
89
|
void Prefix::Concatenate(ART &art, Node &prefix_node, const uint8_t byte, Node &child_prefix_node) {
|
@@ -280,19 +306,28 @@ string Prefix::VerifyAndToString(ART &art, Node &node, const bool only_verify) {
|
|
280
306
|
return str + node_ref.get().VerifyAndToString(art, only_verify);
|
281
307
|
}
|
282
308
|
|
283
|
-
BlockPointer Prefix::Serialize(ART &art, MetaBlockWriter &writer) {
|
309
|
+
BlockPointer Prefix::Serialize(ART &art, Node &node, MetaBlockWriter &writer) {
|
284
310
|
|
285
|
-
|
286
|
-
|
311
|
+
reference<Node> first_non_prefix(node);
|
312
|
+
idx_t total_count = Prefix::TotalCount(art, first_non_prefix);
|
313
|
+
auto child_block_pointer = first_non_prefix.get().Serialize(art, writer);
|
287
314
|
|
288
315
|
// get pointer and write fields
|
289
316
|
auto block_pointer = writer.GetBlockPointer();
|
290
317
|
writer.Write(NType::PREFIX);
|
291
|
-
writer.Write<
|
318
|
+
writer.Write<idx_t>(total_count);
|
319
|
+
|
320
|
+
reference<Node> current_node(node);
|
321
|
+
while (current_node.get().GetType() == NType::PREFIX) {
|
292
322
|
|
293
|
-
|
294
|
-
|
295
|
-
|
323
|
+
// write prefix bytes
|
324
|
+
D_ASSERT(!current_node.get().IsSerialized());
|
325
|
+
auto &prefix = Prefix::Get(art, current_node);
|
326
|
+
for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) {
|
327
|
+
writer.Write(prefix.data[i]);
|
328
|
+
}
|
329
|
+
|
330
|
+
current_node = prefix.ptr;
|
296
331
|
}
|
297
332
|
|
298
333
|
// write child block pointer
|
@@ -302,17 +337,48 @@ BlockPointer Prefix::Serialize(ART &art, MetaBlockWriter &writer) {
|
|
302
337
|
return block_pointer;
|
303
338
|
}
|
304
339
|
|
305
|
-
void Prefix::Deserialize(MetaBlockReader &reader) {
|
340
|
+
void Prefix::Deserialize(ART &art, Node &node, MetaBlockReader &reader) {
|
341
|
+
|
342
|
+
auto total_count = reader.Read<idx_t>();
|
343
|
+
reference<Node> current_node(node);
|
344
|
+
|
345
|
+
while (total_count) {
|
346
|
+
current_node.get() = Node::GetAllocator(art, NType::PREFIX).New();
|
347
|
+
current_node.get().SetType((uint8_t)NType::PREFIX);
|
306
348
|
|
307
|
-
|
349
|
+
auto &prefix = Prefix::Get(art, current_node);
|
350
|
+
prefix.data[Node::PREFIX_SIZE] = MinValue((idx_t)Node::PREFIX_SIZE, total_count);
|
308
351
|
|
309
|
-
|
310
|
-
|
311
|
-
|
352
|
+
// read bytes
|
353
|
+
for (idx_t i = 0; i < prefix.data[Node::PREFIX_SIZE]; i++) {
|
354
|
+
prefix.data[i] = reader.Read<uint8_t>();
|
355
|
+
}
|
356
|
+
|
357
|
+
total_count -= prefix.data[Node::PREFIX_SIZE];
|
358
|
+
current_node = prefix.ptr;
|
359
|
+
prefix.ptr.Reset();
|
312
360
|
}
|
313
361
|
|
314
362
|
// read child block pointer
|
315
|
-
|
363
|
+
current_node.get() = Node(reader);
|
364
|
+
}
|
365
|
+
|
366
|
+
void Prefix::Vacuum(ART &art, Node &node, const ARTFlags &flags) {
|
367
|
+
|
368
|
+
bool flag_set = flags.vacuum_flags[(uint8_t)NType::PREFIX - 1];
|
369
|
+
auto &allocator = Node::GetAllocator(art, NType::PREFIX);
|
370
|
+
|
371
|
+
reference<Node> node_ref(node);
|
372
|
+
while (!node_ref.get().IsSerialized() && node_ref.get().GetType() == NType::PREFIX) {
|
373
|
+
if (flag_set && allocator.NeedsVacuum(node_ref)) {
|
374
|
+
node_ref.get() = allocator.VacuumPointer(node_ref);
|
375
|
+
node_ref.get().SetType((uint8_t)NType::PREFIX);
|
376
|
+
}
|
377
|
+
auto &prefix = Prefix::Get(art, node_ref);
|
378
|
+
node_ref = prefix.ptr;
|
379
|
+
}
|
380
|
+
|
381
|
+
node_ref.get().Vacuum(art, flags);
|
316
382
|
}
|
317
383
|
|
318
384
|
Prefix &Prefix::Append(ART &art, const uint8_t byte) {
|
@@ -356,4 +422,22 @@ void Prefix::Append(ART &art, Node other_prefix) {
|
|
356
422
|
D_ASSERT(prefix.get().ptr.GetType() != NType::PREFIX);
|
357
423
|
}
|
358
424
|
|
425
|
+
idx_t Prefix::TotalCount(ART &art, reference<Node> &node) {
|
426
|
+
|
427
|
+
// NOTE: first prefix in the prefix chain is already deserialized
|
428
|
+
D_ASSERT(node.get().IsSet() && !node.get().IsSerialized());
|
429
|
+
|
430
|
+
idx_t count = 0;
|
431
|
+
while (node.get().GetType() == NType::PREFIX) {
|
432
|
+
auto &prefix = Prefix::Get(art, node);
|
433
|
+
count += prefix.data[Node::PREFIX_SIZE];
|
434
|
+
|
435
|
+
if (prefix.ptr.IsSerialized()) {
|
436
|
+
prefix.ptr.Deserialize(art);
|
437
|
+
}
|
438
|
+
node = prefix.ptr;
|
439
|
+
}
|
440
|
+
return count;
|
441
|
+
}
|
442
|
+
|
359
443
|
} // namespace duckdb
|
@@ -4,7 +4,9 @@
|
|
4
4
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
5
5
|
#include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
|
6
6
|
#include "duckdb/main/client_context.hpp"
|
7
|
+
#include "duckdb/storage/index.hpp"
|
7
8
|
#include "duckdb/storage/storage_manager.hpp"
|
9
|
+
#include "duckdb/storage/table/append_state.hpp"
|
8
10
|
#include "duckdb/main/database_manager.hpp"
|
9
11
|
#include "duckdb/execution/index/art/art_key.hpp"
|
10
12
|
#include "duckdb/execution/index/art/node.hpp"
|
@@ -15,10 +17,10 @@ namespace duckdb {
|
|
15
17
|
PhysicalCreateIndex::PhysicalCreateIndex(LogicalOperator &op, TableCatalogEntry &table_p,
|
16
18
|
const vector<column_t> &column_ids, unique_ptr<CreateIndexInfo> info,
|
17
19
|
vector<unique_ptr<Expression>> unbound_expressions,
|
18
|
-
idx_t estimated_cardinality)
|
20
|
+
idx_t estimated_cardinality, const bool sorted)
|
19
21
|
: PhysicalOperator(PhysicalOperatorType::CREATE_INDEX, op.types, estimated_cardinality),
|
20
|
-
table(table_p.Cast<DuckTableEntry>()), info(std::move(info)),
|
21
|
-
|
22
|
+
table(table_p.Cast<DuckTableEntry>()), info(std::move(info)), unbound_expressions(std::move(unbound_expressions)),
|
23
|
+
sorted(sorted) {
|
22
24
|
// convert virtual column ids to storage column ids
|
23
25
|
for (auto &column_id : column_ids) {
|
24
26
|
storage_ids.push_back(table.GetColumns().LogicalToPhysical(LogicalIndex(column_id)).index);
|
@@ -86,43 +88,65 @@ unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionConte
|
|
86
88
|
return std::move(state);
|
87
89
|
}
|
88
90
|
|
89
|
-
SinkResultType PhysicalCreateIndex::
|
91
|
+
SinkResultType PhysicalCreateIndex::SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const {
|
90
92
|
|
91
|
-
|
92
|
-
auto
|
93
|
-
auto &row_identifiers = chunk.data[chunk.ColumnCount() - 1];
|
93
|
+
auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
|
94
|
+
auto count = l_state.key_chunk.size();
|
94
95
|
|
95
|
-
//
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
// get the corresponding row IDs
|
97
|
+
row_identifiers.Flatten(count);
|
98
|
+
auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
|
99
|
+
|
100
|
+
// insert the row IDs
|
101
|
+
auto &art = l_state.local_index->Cast<ART>();
|
102
|
+
for (idx_t i = 0; i < count; i++) {
|
103
|
+
if (!art.Insert(*art.tree, l_state.keys[i], 0, row_ids[i])) {
|
104
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
105
|
+
}
|
106
|
+
}
|
99
107
|
|
108
|
+
return SinkResultType::NEED_MORE_INPUT;
|
109
|
+
}
|
110
|
+
|
111
|
+
SinkResultType PhysicalCreateIndex::SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const {
|
112
|
+
|
113
|
+
auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
|
100
114
|
auto &storage = table.GetStorage();
|
101
|
-
auto
|
102
|
-
|
103
|
-
|
115
|
+
auto &l_index = l_state.local_index;
|
116
|
+
|
117
|
+
// create an ART from the chunk
|
118
|
+
auto art = make_uniq<ART>(l_index->column_ids, l_index->table_io_manager, l_index->unbound_expressions,
|
119
|
+
l_index->constraint_type, storage.db, l_index->Cast<ART>().allocators);
|
120
|
+
if (!art->ConstructFromSorted(l_state.key_chunk.size(), l_state.keys, row_identifiers)) {
|
104
121
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
105
122
|
}
|
106
123
|
|
107
124
|
// merge into the local ART
|
108
|
-
if (!
|
125
|
+
if (!l_index->MergeIndexes(*art)) {
|
109
126
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
110
127
|
}
|
111
128
|
|
112
|
-
#ifdef DEBUG
|
113
|
-
// ensure that all row IDs of this chunk exist in the ART
|
114
|
-
auto &local_art = lstate.local_index->Cast<ART>();
|
115
|
-
auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
|
116
|
-
for (idx_t i = 0; i < lstate.key_chunk.size(); i++) {
|
117
|
-
auto leaf = local_art.Lookup(*local_art.tree, lstate.keys[i], 0);
|
118
|
-
D_ASSERT(leaf.IsSet());
|
119
|
-
D_ASSERT(Leaf::ContainsRowId(local_art, leaf, row_ids[i]));
|
120
|
-
}
|
121
|
-
#endif
|
122
|
-
|
123
129
|
return SinkResultType::NEED_MORE_INPUT;
|
124
130
|
}
|
125
131
|
|
132
|
+
SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
133
|
+
|
134
|
+
D_ASSERT(chunk.ColumnCount() >= 2);
|
135
|
+
|
136
|
+
// generate the keys for the given input
|
137
|
+
auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
|
138
|
+
l_state.key_chunk.ReferenceColumns(chunk, l_state.key_column_ids);
|
139
|
+
l_state.arena_allocator.Reset();
|
140
|
+
ART::GenerateKeys(l_state.arena_allocator, l_state.key_chunk, l_state.keys);
|
141
|
+
|
142
|
+
// insert the keys and their corresponding row IDs
|
143
|
+
auto &row_identifiers = chunk.data[chunk.ColumnCount() - 1];
|
144
|
+
if (sorted) {
|
145
|
+
return SinkSorted(row_identifiers, input);
|
146
|
+
}
|
147
|
+
return SinkUnsorted(row_identifiers, input);
|
148
|
+
}
|
149
|
+
|
126
150
|
SinkCombineResultType PhysicalCreateIndex::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
|
127
151
|
|
128
152
|
auto &gstate = input.global_state.Cast<CreateIndexGlobalSinkState>();
|
@@ -133,18 +157,17 @@ SinkCombineResultType PhysicalCreateIndex::Combine(ExecutionContext &context, Op
|
|
133
157
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
134
158
|
}
|
135
159
|
|
136
|
-
// vacuum excess memory
|
137
|
-
gstate.global_index->Vacuum();
|
138
|
-
|
139
160
|
return SinkCombineResultType::FINISHED;
|
140
161
|
}
|
141
162
|
|
142
163
|
SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
143
164
|
OperatorSinkFinalizeInput &input) const {
|
144
165
|
|
145
|
-
// here, we
|
146
|
-
|
166
|
+
// here, we set the resulting global index as the newly created index of the table
|
147
167
|
auto &state = input.global_state.Cast<CreateIndexGlobalSinkState>();
|
168
|
+
|
169
|
+
// vacuum excess memory and verify
|
170
|
+
state.global_index->Vacuum();
|
148
171
|
D_ASSERT(!state.global_index->VerifyAndToString(true).empty());
|
149
172
|
|
150
173
|
auto &storage = table.GetStorage();
|
@@ -68,27 +68,44 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
68
68
|
null_filter->types.emplace_back(LogicalType::ROW_TYPE);
|
69
69
|
null_filter->children.push_back(std::move(projection));
|
70
70
|
|
71
|
-
//
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
projections.emplace_back(i);
|
71
|
+
// determine if we sort the data prior to index creation
|
72
|
+
// we don't sort, if either VARCHAR or compound key
|
73
|
+
auto perform_sorting = true;
|
74
|
+
if (op.unbound_expressions.size() > 1) {
|
75
|
+
perform_sorting = false;
|
76
|
+
} else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) {
|
77
|
+
perform_sorting = false;
|
79
78
|
}
|
80
|
-
projections.emplace_back(new_column_types.size() - 1);
|
81
|
-
|
82
|
-
auto physical_order =
|
83
|
-
make_uniq<PhysicalOrder>(new_column_types, std::move(orders), std::move(projections), op.estimated_cardinality);
|
84
|
-
physical_order->children.push_back(std::move(null_filter));
|
85
79
|
|
86
80
|
// actual physical create index operator
|
87
81
|
|
88
82
|
auto physical_create_index =
|
89
83
|
make_uniq<PhysicalCreateIndex>(op, op.table, op.info->column_ids, std::move(op.info),
|
90
|
-
std::move(op.unbound_expressions), op.estimated_cardinality);
|
91
|
-
|
84
|
+
std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting);
|
85
|
+
|
86
|
+
if (perform_sorting) {
|
87
|
+
|
88
|
+
// optional order operator
|
89
|
+
vector<BoundOrderByNode> orders;
|
90
|
+
vector<idx_t> projections;
|
91
|
+
for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
|
92
|
+
auto col_expr = make_uniq_base<Expression, BoundReferenceExpression>(new_column_types[i], i);
|
93
|
+
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr));
|
94
|
+
projections.emplace_back(i);
|
95
|
+
}
|
96
|
+
projections.emplace_back(new_column_types.size() - 1);
|
97
|
+
|
98
|
+
auto physical_order = make_uniq<PhysicalOrder>(new_column_types, std::move(orders), std::move(projections),
|
99
|
+
op.estimated_cardinality);
|
100
|
+
physical_order->children.push_back(std::move(null_filter));
|
101
|
+
|
102
|
+
physical_create_index->children.push_back(std::move(physical_order));
|
103
|
+
} else {
|
104
|
+
|
105
|
+
// no ordering
|
106
|
+
physical_create_index->children.push_back(std::move(null_filter));
|
107
|
+
}
|
108
|
+
|
92
109
|
return std::move(physical_create_index);
|
93
110
|
}
|
94
111
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#include "duckdb/function/table/arrow/arrow_duck_schema.hpp"
|
2
|
+
#include "duckdb/common/arrow/arrow.hpp"
|
3
|
+
#include "duckdb/common/exception.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
void ArrowTableType::AddColumn(idx_t index, unique_ptr<ArrowType> type) {
|
8
|
+
D_ASSERT(arrow_convert_data.find(index) == arrow_convert_data.end());
|
9
|
+
arrow_convert_data.emplace(std::make_pair(index, std::move(type)));
|
10
|
+
}
|
11
|
+
|
12
|
+
const arrow_column_map_t &ArrowTableType::GetColumns() const {
|
13
|
+
return arrow_convert_data;
|
14
|
+
}
|
15
|
+
|
16
|
+
void ArrowType::AddChild(unique_ptr<ArrowType> child) {
|
17
|
+
children.emplace_back(std::move(child));
|
18
|
+
}
|
19
|
+
|
20
|
+
void ArrowType::AssignChildren(vector<unique_ptr<ArrowType>> children) {
|
21
|
+
D_ASSERT(this->children.empty());
|
22
|
+
this->children = std::move(children);
|
23
|
+
}
|
24
|
+
|
25
|
+
void ArrowType::SetDictionary(unique_ptr<ArrowType> dictionary) {
|
26
|
+
D_ASSERT(!this->dictionary_type);
|
27
|
+
dictionary_type = std::move(dictionary);
|
28
|
+
}
|
29
|
+
|
30
|
+
const ArrowType &ArrowType::GetDictionary() const {
|
31
|
+
D_ASSERT(dictionary_type);
|
32
|
+
return *dictionary_type;
|
33
|
+
}
|
34
|
+
|
35
|
+
const LogicalType &ArrowType::GetDuckType() const {
|
36
|
+
return type;
|
37
|
+
}
|
38
|
+
|
39
|
+
ArrowVariableSizeType ArrowType::GetSizeType() const {
|
40
|
+
return size_type;
|
41
|
+
}
|
42
|
+
|
43
|
+
ArrowDateTimeType ArrowType::GetDateTimeType() const {
|
44
|
+
return date_time_precision;
|
45
|
+
}
|
46
|
+
|
47
|
+
const ArrowType &ArrowType::operator[](idx_t index) const {
|
48
|
+
D_ASSERT(index < children.size());
|
49
|
+
return *children[index];
|
50
|
+
}
|
51
|
+
|
52
|
+
idx_t ArrowType::FixedSize() const {
|
53
|
+
D_ASSERT(size_type == ArrowVariableSizeType::FIXED_SIZE);
|
54
|
+
return fixed_size;
|
55
|
+
}
|
56
|
+
|
57
|
+
} // namespace duckdb
|