duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/package.json +2 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
- package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +286 -269
- package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
- package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
- package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/node.cpp +444 -379
- package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
- package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
- package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
- package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
- package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
- package/src/duckdb/src/function/table/read_csv.cpp +5 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
- package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
- package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +26 -0
- package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
- package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
- package/src/duckdb/src/storage/data_table.cpp +6 -3
- package/src/duckdb/src/storage/index.cpp +18 -6
- package/src/duckdb/src/storage/local_storage.cpp +8 -2
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
- package/src/duckdb/src/storage/wal_replay.cpp +1 -1
- package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
- package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -0,0 +1,114 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/art/fixed_size_allocator.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/unordered_set.hpp"
|
12
|
+
#include "duckdb/common/constants.hpp"
|
13
|
+
#include "duckdb/common/vector.hpp"
|
14
|
+
#include "duckdb/common/assert.hpp"
|
15
|
+
#include "duckdb/common/types/validity_mask.hpp"
|
16
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
17
|
+
#include "duckdb/execution/index/art/swizzleable_pointer.hpp"
|
18
|
+
|
19
|
+
namespace duckdb {
|
20
|
+
|
21
|
+
struct BufferEntry {
|
22
|
+
BufferEntry(const data_ptr_t &ptr, const idx_t &allocation_count) : ptr(ptr), allocation_count(allocation_count) {
|
23
|
+
}
|
24
|
+
data_ptr_t ptr;
|
25
|
+
idx_t allocation_count;
|
26
|
+
};
|
27
|
+
|
28
|
+
//! The FixedSizeAllocator provides pointers to fixed-size sections of pre-allocated memory buffers.
|
29
|
+
//! The pointers are SwizzleablePointers, and the leftmost byte (swizzle flag and type) must always be zero.
|
30
|
+
class FixedSizeAllocator {
|
31
|
+
public:
|
32
|
+
//! Fixed size of the buffers
|
33
|
+
static constexpr idx_t BUFFER_ALLOC_SIZE = Storage::BLOCK_ALLOC_SIZE;
|
34
|
+
//! We can vacuum 10% or more of the total memory usage of the allocator
|
35
|
+
static constexpr uint8_t VACUUM_THRESHOLD = 10;
|
36
|
+
|
37
|
+
//! Constants for fast offset calculations in the bitmask
|
38
|
+
static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
|
39
|
+
static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
|
40
|
+
|
41
|
+
public:
|
42
|
+
explicit FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator);
|
43
|
+
~FixedSizeAllocator();
|
44
|
+
|
45
|
+
//! Allocation size of one element in a buffer
|
46
|
+
idx_t allocation_size;
|
47
|
+
//! Total number of allocations
|
48
|
+
idx_t total_allocations;
|
49
|
+
//! Number of validity_t values in the bitmask
|
50
|
+
idx_t bitmask_count;
|
51
|
+
//! First starting byte of the payload
|
52
|
+
idx_t allocation_offset;
|
53
|
+
//! Number of possible allocations per buffer
|
54
|
+
idx_t allocations_per_buffer;
|
55
|
+
|
56
|
+
//! Buffers containing the data
|
57
|
+
vector<BufferEntry> buffers;
|
58
|
+
//! Buffers with free space
|
59
|
+
unordered_set<idx_t> buffers_with_free_space;
|
60
|
+
|
61
|
+
//! Minimum buffer ID of buffers that can be vacuumed
|
62
|
+
idx_t min_vacuum_buffer_id;
|
63
|
+
|
64
|
+
//! Buffer manager of the database instance
|
65
|
+
Allocator &allocator;
|
66
|
+
|
67
|
+
public:
|
68
|
+
//! Get a new pointer to data, might cause a new buffer allocation
|
69
|
+
SwizzleablePointer New();
|
70
|
+
//! Free the data of the pointer
|
71
|
+
void Free(const SwizzleablePointer ptr);
|
72
|
+
//! Get the data of the pointer
|
73
|
+
template <class T>
|
74
|
+
inline T *Get(const SwizzleablePointer ptr) const {
|
75
|
+
return (T *)Get(ptr);
|
76
|
+
}
|
77
|
+
|
78
|
+
//! Resets the allocator, which e.g. becomes necessary during DELETE FROM table
|
79
|
+
void Reset();
|
80
|
+
|
81
|
+
//! Returns the allocated memory size in bytes
|
82
|
+
inline idx_t GetMemoryUsage() const {
|
83
|
+
return buffers.size() * BUFFER_ALLOC_SIZE;
|
84
|
+
}
|
85
|
+
|
86
|
+
//! Merge another FixedSizeAllocator with this allocator. Both must have the same allocation size
|
87
|
+
void Merge(FixedSizeAllocator &other);
|
88
|
+
|
89
|
+
//! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
|
90
|
+
bool InitializeVacuum();
|
91
|
+
//! Finalize a vacuum operation by freeing all buffers exceeding the min_vacuum_buffer_id
|
92
|
+
void FinalizeVacuum();
|
93
|
+
//! Returns true, if a pointer qualifies for a vacuum operation, and false otherwise
|
94
|
+
inline bool NeedsVacuum(const SwizzleablePointer ptr) const {
|
95
|
+
if (ptr.buffer_id >= min_vacuum_buffer_id) {
|
96
|
+
return true;
|
97
|
+
}
|
98
|
+
return false;
|
99
|
+
}
|
100
|
+
//! Vacuums a pointer
|
101
|
+
SwizzleablePointer VacuumPointer(const SwizzleablePointer ptr);
|
102
|
+
|
103
|
+
private:
|
104
|
+
//! Returns the data_ptr_t of a pointer
|
105
|
+
inline data_ptr_t Get(const SwizzleablePointer ptr) const {
|
106
|
+
D_ASSERT(ptr.buffer_id < buffers.size());
|
107
|
+
D_ASSERT(ptr.offset < allocations_per_buffer);
|
108
|
+
return buffers[ptr.buffer_id].ptr + ptr.offset * allocation_size + allocation_offset;
|
109
|
+
}
|
110
|
+
//! Returns the first free offset in a bitmask
|
111
|
+
uint32_t GetOffset(ValidityMask &mask, const idx_t allocation_count);
|
112
|
+
};
|
113
|
+
|
114
|
+
} // namespace duckdb
|
@@ -8,6 +8,7 @@
|
|
8
8
|
#pragma once
|
9
9
|
|
10
10
|
#include "duckdb/common/stack.hpp"
|
11
|
+
#include "duckdb/execution/index/art/art_key.hpp"
|
11
12
|
#include "duckdb/execution/index/art/leaf.hpp"
|
12
13
|
#include "duckdb/execution/index/art/node.hpp"
|
13
14
|
|
@@ -16,57 +17,62 @@ namespace duckdb {
|
|
16
17
|
struct IteratorEntry {
|
17
18
|
IteratorEntry() {
|
18
19
|
}
|
19
|
-
IteratorEntry(Node
|
20
|
+
IteratorEntry(Node node, uint8_t byte) : node(node), byte(byte) {
|
20
21
|
}
|
21
22
|
|
22
|
-
Node
|
23
|
-
|
23
|
+
Node node;
|
24
|
+
uint8_t byte = 0;
|
24
25
|
};
|
25
26
|
|
26
27
|
//! Keeps track of the current key in the iterator
|
27
28
|
class IteratorCurrentKey {
|
28
29
|
public:
|
29
|
-
//! Push
|
30
|
-
void Push(uint8_t key);
|
31
|
-
//! Pops n elements
|
32
|
-
void Pop(idx_t n);
|
30
|
+
//! Push byte into current key
|
31
|
+
void Push(const uint8_t key);
|
32
|
+
//! Pops n elements from the key
|
33
|
+
void Pop(const idx_t n);
|
34
|
+
|
33
35
|
//! Subscript operator
|
34
36
|
uint8_t &operator[](idx_t idx);
|
35
|
-
|
36
|
-
bool operator
|
37
|
-
|
37
|
+
//! Greater than operator
|
38
|
+
bool operator>(const ARTKey &k) const;
|
39
|
+
//! Greater than or equal to operator
|
40
|
+
bool operator>=(const ARTKey &k) const;
|
41
|
+
//! Equal to operator
|
42
|
+
bool operator==(const ARTKey &k) const;
|
38
43
|
|
39
44
|
private:
|
40
45
|
//! The current key position
|
41
46
|
idx_t cur_key_pos = 0;
|
42
|
-
//! The current key
|
47
|
+
//! The current key corresponding to the current leaf
|
43
48
|
vector<uint8_t> key;
|
44
49
|
};
|
45
50
|
|
46
51
|
class Iterator {
|
47
52
|
public:
|
48
|
-
//!
|
53
|
+
//! All information about the current key
|
49
54
|
IteratorCurrentKey cur_key;
|
50
|
-
//! Pointer to the ART
|
55
|
+
//! Pointer to the ART
|
51
56
|
ART *art = nullptr;
|
52
57
|
|
53
58
|
//! Scan the tree
|
54
|
-
bool Scan(
|
55
|
-
//! Finds minimum value of the tree
|
59
|
+
bool Scan(const ARTKey &key, const idx_t &max_count, vector<row_t> &result_ids, const bool &is_inclusive);
|
60
|
+
//! Finds the minimum value of the tree
|
56
61
|
void FindMinimum(Node &node);
|
57
|
-
//! Goes to lower bound
|
58
|
-
bool LowerBound(Node
|
62
|
+
//! Goes to the lower bound of the tree
|
63
|
+
bool LowerBound(Node node, const ARTKey &key, const bool &is_inclusive);
|
59
64
|
|
60
65
|
private:
|
61
66
|
//! Stack of iterator entries
|
62
67
|
stack<IteratorEntry> nodes;
|
63
68
|
//! Last visited leaf
|
64
69
|
Leaf *last_leaf = nullptr;
|
70
|
+
|
65
71
|
//! Go to the next node
|
66
72
|
bool Next();
|
67
|
-
//! Push part of the key to
|
68
|
-
void PushKey(Node
|
69
|
-
//! Pop node
|
73
|
+
//! Push part of the key to the current key
|
74
|
+
void PushKey(const Node &node, const uint8_t byte);
|
75
|
+
//! Pop node from the stack of iterator entries
|
70
76
|
void PopNode();
|
71
77
|
};
|
72
78
|
} // namespace duckdb
|
@@ -8,61 +8,85 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/execution/index/art/art.hpp"
|
12
|
+
#include "duckdb/execution/index/art/fixed_size_allocator.hpp"
|
11
13
|
#include "duckdb/execution/index/art/node.hpp"
|
12
|
-
#include "duckdb/
|
14
|
+
#include "duckdb/execution/index/art/prefix.hpp"
|
13
15
|
|
14
16
|
namespace duckdb {
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
// classes
|
19
|
+
class Node;
|
20
|
+
class ARTKey;
|
21
|
+
class MetaBlockWriter;
|
22
|
+
class MetaBlockReader;
|
23
|
+
|
24
|
+
// structs
|
25
|
+
struct BlockPointer;
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
//!
|
28
|
-
|
29
|
-
//!
|
30
|
-
|
31
|
-
|
32
|
-
|
27
|
+
class Leaf {
|
28
|
+
public:
|
29
|
+
//! Number of row IDs
|
30
|
+
uint32_t count;
|
31
|
+
//! Compressed path (prefix)
|
32
|
+
Prefix prefix;
|
33
|
+
union {
|
34
|
+
//! The pointer to the head of the list of leaf segments
|
35
|
+
Node ptr;
|
36
|
+
//! Inlined row ID
|
37
|
+
row_t inlined;
|
38
|
+
} row_ids;
|
33
39
|
|
34
40
|
public:
|
35
|
-
|
36
|
-
|
37
|
-
static Leaf
|
38
|
-
|
39
|
-
|
41
|
+
//! Get a new leaf node, might cause a new buffer allocation, and initializes a leaf holding one
|
42
|
+
//! row ID and a prefix starting at depth
|
43
|
+
static Leaf &New(ART &art, Node &node, const ARTKey &key, const uint32_t depth, const row_t row_id);
|
44
|
+
//! Get a new leaf node, might cause a new buffer allocation, and initializes a leaf holding
|
45
|
+
//! n_row_ids row IDs and a prefix starting at depth
|
46
|
+
static Leaf &New(ART &art, Node &node, const ARTKey &key, const uint32_t depth, const row_t *row_ids,
|
47
|
+
const idx_t count);
|
48
|
+
//! Free the leaf
|
49
|
+
static void Free(ART &art, Node &node);
|
50
|
+
//! Get a reference to the leaf
|
51
|
+
static inline Leaf &Get(const ART &art, const Node ptr) {
|
52
|
+
return *Node::GetAllocator(art, NType::LEAF).Get<Leaf>(ptr);
|
53
|
+
}
|
54
|
+
|
55
|
+
//! Initializes a merge by incrementing the buffer IDs of the leaf segments
|
56
|
+
void InitializeMerge(const ART &art, const idx_t buffer_count);
|
57
|
+
//! Merge leaves
|
58
|
+
void Merge(ART &art, Node &other);
|
40
59
|
|
41
|
-
//! Returns the memory size of the leaf
|
42
|
-
idx_t MemorySize(ART &art, const bool &recurse) override;
|
43
60
|
//! Insert a row ID into a leaf
|
44
|
-
void Insert(ART &art, row_t row_id);
|
61
|
+
void Insert(ART &art, const row_t row_id);
|
45
62
|
//! Remove a row ID from a leaf
|
46
|
-
void Remove(ART &art, row_t row_id);
|
63
|
+
void Remove(ART &art, const row_t row_id);
|
64
|
+
|
65
|
+
//! Returns whether this leaf is inlined
|
66
|
+
inline bool IsInlined() const {
|
67
|
+
return count <= 1;
|
68
|
+
}
|
69
|
+
//! Get the row ID at the position
|
70
|
+
row_t GetRowId(const ART &art, const idx_t position) const;
|
71
|
+
//! Returns the position of a row ID, and an invalid index, if the leaf does not contain the row ID,
|
72
|
+
//! and sets the ptr to point to the segment containing the row ID
|
73
|
+
uint32_t FindRowId(const ART &art, Node &ptr, const row_t row_id) const;
|
47
74
|
|
48
75
|
//! Returns the string representation of a leaf
|
49
|
-
|
50
|
-
//! Merge two NLeaf nodes
|
51
|
-
static void Merge(ART &art, Node *&l_node, Node *&r_node);
|
76
|
+
string ToString(const ART &art) const;
|
52
77
|
|
53
|
-
//! Serialize
|
54
|
-
BlockPointer Serialize(
|
55
|
-
//! Deserialize
|
56
|
-
void Deserialize(ART &art,
|
78
|
+
//! Serialize this leaf
|
79
|
+
BlockPointer Serialize(const ART &art, MetaBlockWriter &writer) const;
|
80
|
+
//! Deserialize this leaf
|
81
|
+
void Deserialize(ART &art, MetaBlockReader &reader);
|
57
82
|
|
58
|
-
|
59
|
-
|
60
|
-
row_t inlined;
|
61
|
-
row_t *ptr;
|
62
|
-
} rowids;
|
83
|
+
//! Vacuum the leaf segments of a leaf, if not inlined
|
84
|
+
void Vacuum(ART &art);
|
63
85
|
|
64
86
|
private:
|
65
|
-
|
87
|
+
//! Moves the inlined row ID onto a leaf segment, does not change the size
|
88
|
+
//! so this will be a (temporarily) invalid leaf
|
89
|
+
void MoveInlinedToSegment(ART &art);
|
66
90
|
};
|
67
91
|
|
68
92
|
} // namespace duckdb
|
@@ -0,0 +1,36 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/art/leaf_segment.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include "duckdb/execution/index/art/art.hpp"
|
11
|
+
#include "duckdb/execution/index/art/node.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
class LeafSegment {
|
16
|
+
public:
|
17
|
+
//! The row IDs stored in this segment
|
18
|
+
row_t row_ids[Node::LEAF_SEGMENT_SIZE];
|
19
|
+
//! The pointer of the next segment, if the row IDs exceeds this segment
|
20
|
+
Node next;
|
21
|
+
|
22
|
+
public:
|
23
|
+
//! Get a new leaf segment node, might cause a new buffer allocation, and initialize it
|
24
|
+
static LeafSegment &New(ART &art, Node &node);
|
25
|
+
//! Get a reference to the leaf segment
|
26
|
+
static inline LeafSegment &Get(const ART &art, const Node ptr) {
|
27
|
+
return *Node::GetAllocator(art, NType::LEAF_SEGMENT).Get<LeafSegment>(ptr);
|
28
|
+
}
|
29
|
+
|
30
|
+
//! Append a row ID to the current segment, or create a new segment containing that row ID
|
31
|
+
LeafSegment &Append(ART &art, uint32_t &count, const row_t row_id);
|
32
|
+
//! Get the tail of a list of segments
|
33
|
+
LeafSegment &GetTail(const ART &art);
|
34
|
+
};
|
35
|
+
|
36
|
+
} // namespace duckdb
|
@@ -1,138 +1,120 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/execution/index/art/
|
4
|
+
// duckdb/execution/index/art/art_node.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include "duckdb/
|
12
|
-
#include "duckdb/execution/index/art/
|
13
|
-
#include "duckdb/execution/index/art/prefix.hpp"
|
14
|
-
#include "duckdb/storage/index.hpp"
|
15
|
-
#include "duckdb/storage/meta_block_reader.hpp"
|
16
|
-
#include "duckdb/storage/meta_block_writer.hpp"
|
17
|
-
#include "duckdb/common/allocator.hpp"
|
11
|
+
#include "duckdb/execution/index/art/fixed_size_allocator.hpp"
|
12
|
+
#include "duckdb/execution/index/art/swizzleable_pointer.hpp"
|
18
13
|
|
19
14
|
namespace duckdb {
|
20
|
-
enum class NodeType : uint8_t { NLeaf = 0, N4 = 1, N16 = 2, N48 = 3, N256 = 4 };
|
21
|
-
class ART;
|
22
|
-
class Node;
|
23
|
-
|
24
|
-
// Note: SwizzleablePointer assumes top 33 bits of the block_id are 0. Use a different
|
25
|
-
// pointer implementation if that does not hold.
|
26
|
-
class SwizzleablePointer;
|
27
|
-
using ARTPointer = SwizzleablePointer;
|
28
|
-
|
29
|
-
struct InternalType {
|
30
|
-
explicit InternalType(Node *n);
|
31
|
-
|
32
|
-
void Set(uint8_t *key_p, uint16_t key_size_p, ARTPointer *children_p, uint16_t children_size_p);
|
33
|
-
uint8_t *key;
|
34
|
-
uint16_t key_size;
|
35
|
-
ARTPointer *children;
|
36
|
-
uint16_t children_size;
|
37
|
-
};
|
38
|
-
|
39
|
-
struct MergeInfo {
|
40
|
-
MergeInfo(ART *l_art, ART *r_art, ART *root_l_art, ART *root_r_art, Node *&l_node, Node *&r_node)
|
41
|
-
: l_art(l_art), r_art(r_art), root_l_art(root_l_art), root_r_art(root_r_art), l_node(l_node), r_node(r_node) {};
|
42
|
-
ART *l_art;
|
43
|
-
ART *r_art;
|
44
|
-
ART *root_l_art;
|
45
|
-
ART *root_r_art;
|
46
|
-
Node *&l_node;
|
47
|
-
Node *&r_node;
|
48
|
-
};
|
49
15
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
16
|
+
// classes
|
17
|
+
enum class NType : uint8_t {
|
18
|
+
PREFIX_SEGMENT = 1,
|
19
|
+
LEAF_SEGMENT = 2,
|
20
|
+
LEAF = 3,
|
21
|
+
NODE_4 = 4,
|
22
|
+
NODE_16 = 5,
|
23
|
+
NODE_48 = 6,
|
24
|
+
NODE_256 = 7
|
57
25
|
};
|
58
|
-
|
59
|
-
class Node
|
26
|
+
class ART;
|
27
|
+
class Node;
|
28
|
+
class Prefix;
|
29
|
+
class MetaBlockReader;
|
30
|
+
class MetaBlockWriter;
|
31
|
+
|
32
|
+
// structs
|
33
|
+
struct BlockPointer;
|
34
|
+
struct ARTFlags;
|
35
|
+
|
36
|
+
//! The ARTNode is the swizzleable pointer class of the ART index.
|
37
|
+
//! If the ARTNode pointer is not swizzled, then the leftmost byte identifies the NType.
|
38
|
+
//! The remaining bytes are the position in the respective ART buffer.
|
39
|
+
class Node : public SwizzleablePointer {
|
60
40
|
public:
|
61
|
-
|
62
|
-
|
63
|
-
|
41
|
+
// constants (this allows testing performance with different ART node sizes)
|
42
|
+
|
43
|
+
//! Node prefixes (NOTE: this should always hold: PREFIX_SEGMENT_SIZE >= PREFIX_INLINE_BYTES)
|
44
|
+
static constexpr uint32_t PREFIX_INLINE_BYTES = 8;
|
45
|
+
static constexpr uint32_t PREFIX_SEGMENT_SIZE = 32;
|
46
|
+
//! Node thresholds
|
47
|
+
static constexpr uint8_t NODE_48_SHRINK_THRESHOLD = 12;
|
48
|
+
static constexpr uint8_t NODE_256_SHRINK_THRESHOLD = 36;
|
49
|
+
//! Node sizes
|
50
|
+
static constexpr uint8_t NODE_4_CAPACITY = 4;
|
51
|
+
static constexpr uint8_t NODE_16_CAPACITY = 16;
|
52
|
+
static constexpr uint8_t NODE_48_CAPACITY = 48;
|
53
|
+
static constexpr uint16_t NODE_256_CAPACITY = 256;
|
54
|
+
//! Other constants
|
55
|
+
static constexpr uint8_t EMPTY_MARKER = 48;
|
56
|
+
static constexpr uint32_t LEAF_SEGMENT_SIZE = 8;
|
64
57
|
|
65
58
|
public:
|
66
|
-
|
67
|
-
|
59
|
+
//! Constructs an empty ARTNode
|
60
|
+
Node();
|
61
|
+
//! Constructs a swizzled pointer from a block ID and an offset
|
62
|
+
explicit Node(MetaBlockReader &reader);
|
63
|
+
//! Get a new pointer to a node, might cause a new buffer allocation, and initialize it
|
64
|
+
static void New(ART &art, Node &node, const NType type);
|
65
|
+
//! Free the node (and its subtree)
|
66
|
+
static void Free(ART &art, Node &node);
|
67
|
+
|
68
|
+
//! Retrieve the node type from the leftmost byte
|
69
|
+
inline NType DecodeARTNodeType() const {
|
70
|
+
return NType(type);
|
68
71
|
}
|
69
72
|
|
70
|
-
//!
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
//! Compressed path (prefix)
|
75
|
-
Prefix prefix;
|
76
|
-
|
77
|
-
static void Delete(Node *node);
|
78
|
-
//! Returns the memory size of the node
|
79
|
-
virtual idx_t MemorySize(ART &art, const bool &recurse);
|
80
|
-
//! Get the position of a child corresponding exactly to the specific byte, returns DConstants::INVALID_INDEX if not
|
81
|
-
//! exists
|
82
|
-
virtual idx_t GetChildPos(uint8_t k) {
|
83
|
-
return DConstants::INVALID_INDEX;
|
84
|
-
}
|
85
|
-
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
86
|
-
//! if there are no children matching the criteria
|
87
|
-
virtual idx_t GetChildGreaterEqual(uint8_t k, bool &equal) {
|
88
|
-
throw InternalException("Unimplemented GetChildGreaterEqual for ART node");
|
73
|
+
//! Set the pointer
|
74
|
+
inline void SetPtr(const SwizzleablePointer ptr) {
|
75
|
+
offset = ptr.offset;
|
76
|
+
buffer_id = ptr.buffer_id;
|
89
77
|
}
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
//!
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
//!
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
//! the
|
104
|
-
|
105
|
-
//!
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
//!
|
111
|
-
|
112
|
-
//!
|
113
|
-
|
114
|
-
//!
|
115
|
-
static
|
116
|
-
//!
|
117
|
-
static
|
118
|
-
|
119
|
-
//!
|
120
|
-
|
121
|
-
//!
|
122
|
-
|
123
|
-
//!
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
//!
|
129
|
-
static
|
130
|
-
|
131
|
-
private:
|
132
|
-
//! Serialize internal nodes
|
133
|
-
BlockPointer SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer, InternalType &internal_type);
|
134
|
-
//! Deserialize internal nodes
|
135
|
-
void DeserializeInternal(ART &art, duckdb::MetaBlockReader &reader);
|
78
|
+
|
79
|
+
//! Replace the child node at the respective byte
|
80
|
+
void ReplaceChild(const ART &art, const uint8_t byte, const Node child);
|
81
|
+
//! Insert the child node at byte
|
82
|
+
static void InsertChild(ART &art, Node &node, const uint8_t byte, const Node child);
|
83
|
+
//! Delete the child node at the respective byte
|
84
|
+
static void DeleteChild(ART &art, Node &node, const uint8_t byte);
|
85
|
+
|
86
|
+
//! Get the child for the respective byte in the node
|
87
|
+
optional_ptr<Node> GetChild(ART &art, const uint8_t byte) const;
|
88
|
+
//! Get the first child that is greater or equal to the specific byte
|
89
|
+
optional_ptr<Node> GetNextChild(ART &art, uint8_t &byte) const;
|
90
|
+
|
91
|
+
//! Serialize the node
|
92
|
+
BlockPointer Serialize(ART &art, MetaBlockWriter &writer);
|
93
|
+
//! Deserialize the node
|
94
|
+
void Deserialize(ART &art);
|
95
|
+
|
96
|
+
//! Returns the string representation of the node
|
97
|
+
string ToString(ART &art) const;
|
98
|
+
//! Returns the capacity of the node
|
99
|
+
idx_t GetCapacity() const;
|
100
|
+
//! Returns a pointer to the prefix of the node
|
101
|
+
Prefix &GetPrefix(ART &art);
|
102
|
+
//! Returns the matching node type for a given count
|
103
|
+
static NType GetARTNodeTypeByCount(const idx_t count);
|
104
|
+
//! Get references to the different allocators
|
105
|
+
static FixedSizeAllocator &GetAllocator(const ART &art, NType type);
|
106
|
+
|
107
|
+
//! Initializes a merge by fully deserializing the subtree of the node and incrementing its buffer IDs
|
108
|
+
void InitializeMerge(ART &art, const ARTFlags &flags);
|
109
|
+
//! Merge another node into this node
|
110
|
+
bool Merge(ART &art, Node &other);
|
111
|
+
//! Merge two nodes by first resolving their prefixes
|
112
|
+
bool ResolvePrefixes(ART &art, Node &other);
|
113
|
+
//! Merge two nodes that have no prefix or the same prefix
|
114
|
+
bool MergeInternal(ART &art, Node &other);
|
115
|
+
|
116
|
+
//! Vacuum all nodes that exceed their respective vacuum thresholds
|
117
|
+
static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
|
136
118
|
};
|
137
119
|
|
138
120
|
} // namespace duckdb
|