duckdb 0.8.2-dev4025.0 → 0.8.2-dev4126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +7 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -7,8 +7,8 @@
|
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
#pragma once
|
9
9
|
|
10
|
+
#include "duckdb/execution/index/fixed_size_allocator.hpp"
|
10
11
|
#include "duckdb/execution/index/art/art.hpp"
|
11
|
-
#include "duckdb/execution/index/art/fixed_size_allocator.hpp"
|
12
12
|
#include "duckdb/execution/index/art/node.hpp"
|
13
13
|
|
14
14
|
namespace duckdb {
|
@@ -17,30 +17,28 @@ namespace duckdb {
|
|
17
17
|
class ARTKey;
|
18
18
|
|
19
19
|
//! The Prefix is a special node type that contains up to PREFIX_SIZE bytes, and one byte for the count,
|
20
|
-
//! and a Node pointer. This pointer either points to another
|
21
|
-
//! node or the 'actual' ART node.
|
20
|
+
//! and a Node pointer. This pointer either points to a prefix node or another Node.
|
22
21
|
class Prefix {
|
23
22
|
public:
|
23
|
+
//! Delete copy constructors, as any Prefix can never own its memory
|
24
|
+
Prefix(const Prefix &) = delete;
|
25
|
+
Prefix &operator=(const Prefix &) = delete;
|
26
|
+
|
24
27
|
//! Up to PREFIX_SIZE bytes of prefix data and the count
|
25
28
|
uint8_t data[Node::PREFIX_SIZE + 1];
|
26
|
-
//! A pointer to the next
|
29
|
+
//! A pointer to the next Node
|
27
30
|
Node ptr;
|
28
31
|
|
29
32
|
public:
|
30
33
|
//! Get a new empty prefix node, might cause a new buffer allocation
|
31
34
|
static Prefix &New(ART &art, Node &node);
|
32
35
|
//! Create a new prefix node containing a single byte and a pointer to a next node
|
33
|
-
static Prefix &New(ART &art, Node &node, uint8_t byte, Node next);
|
36
|
+
static Prefix &New(ART &art, Node &node, uint8_t byte, const Node &next = Node());
|
34
37
|
//! Get a new chain of prefix nodes, might cause new buffer allocations,
|
35
38
|
//! with the node parameter holding the tail of the chain
|
36
39
|
static void New(ART &art, reference<Node> &node, const ARTKey &key, const uint32_t depth, uint32_t count);
|
37
40
|
//! Free the node (and its subtree)
|
38
41
|
static void Free(ART &art, Node &node);
|
39
|
-
//! Get a reference to the prefix
|
40
|
-
static inline Prefix &Get(const ART &art, const Node ptr) {
|
41
|
-
D_ASSERT(!ptr.IsSerialized());
|
42
|
-
return *Node::GetAllocator(art, NType::PREFIX).Get<Prefix>(ptr);
|
43
|
-
}
|
44
42
|
|
45
43
|
//! Initializes a merge by incrementing the buffer ID of the prefix and its child node(s)
|
46
44
|
static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
|
@@ -51,13 +49,17 @@ public:
|
|
51
49
|
static void Concatenate(ART &art, Node &prefix_node, const uint8_t byte, Node &child_prefix_node);
|
52
50
|
//! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering
|
53
51
|
//! a mismatching byte, in which case depth indexes the mismatching byte in the key
|
54
|
-
static idx_t Traverse(ART &art, reference<Node> &prefix_node, const ARTKey &key, idx_t &depth);
|
52
|
+
static idx_t Traverse(ART &art, reference<const Node> &prefix_node, const ARTKey &key, idx_t &depth);
|
53
|
+
//! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering
|
54
|
+
//! a mismatching byte, in which case depth indexes the mismatching byte in the key
|
55
|
+
static idx_t TraverseMutable(ART &art, reference<Node> &prefix_node, const ARTKey &key, idx_t &depth);
|
55
56
|
//! Traverse two prefixes to find (1) that they match (so far), or (2) that they have a mismatching position,
|
56
|
-
//! or (3) that one prefix contains the other prefix
|
57
|
+
//! or (3) that one prefix contains the other prefix. This function aids in merging Nodes, and, therefore,
|
58
|
+
//! the nodes are not const
|
57
59
|
static bool Traverse(ART &art, reference<Node> &l_node, reference<Node> &r_node, idx_t &mismatch_position);
|
58
60
|
//! Returns the byte at position
|
59
61
|
static inline uint8_t GetByte(const ART &art, const Node &prefix_node, const idx_t position) {
|
60
|
-
auto prefix = Prefix
|
62
|
+
auto &prefix = Node::Ref<const Prefix>(art, prefix_node, NType::PREFIX);
|
61
63
|
D_ASSERT(position < Node::PREFIX_SIZE);
|
62
64
|
D_ASSERT(position < prefix.data[Node::PREFIX_SIZE]);
|
63
65
|
return prefix.data[position];
|
@@ -71,12 +73,7 @@ public:
|
|
71
73
|
static void Split(ART &art, reference<Node> &prefix_node, Node &child_node, idx_t position);
|
72
74
|
|
73
75
|
//! Returns the string representation of the node, or only traverses and verifies the node and its subtree
|
74
|
-
static string VerifyAndToString(ART &art, Node &node, const bool only_verify);
|
75
|
-
|
76
|
-
//! Serialize this node and all subsequent nodes
|
77
|
-
static BlockPointer Serialize(ART &art, Node &node, MetadataWriter &writer);
|
78
|
-
//! Deserialize this node and all subsequent prefix nodes
|
79
|
-
static void Deserialize(ART &art, Node &node, MetadataReader &reader);
|
76
|
+
static string VerifyAndToString(ART &art, const Node &node, const bool only_verify);
|
80
77
|
|
81
78
|
//! Vacuum the child of the node
|
82
79
|
static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
|
@@ -88,8 +85,5 @@ private:
|
|
88
85
|
//! Appends the other_prefix and all its subsequent prefix nodes to this prefix node.
|
89
86
|
//! Also frees all copied/appended nodes
|
90
87
|
void Append(ART &art, Node other_prefix);
|
91
|
-
//! Get the total count of bytes in the chain of prefixes, with the node reference pointing to first non-prefix node
|
92
|
-
static idx_t TotalCount(ART &art, reference<Node> &node);
|
93
88
|
};
|
94
|
-
|
95
89
|
} // namespace duckdb
|
@@ -0,0 +1,126 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/fixed_size_allocator.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/types/validity_mask.hpp"
|
12
|
+
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
14
|
+
#include "duckdb/storage/metadata/metadata_manager.hpp"
|
15
|
+
#include "duckdb/storage/metadata/metadata_writer.hpp"
|
16
|
+
#include "duckdb/execution/index/fixed_size_buffer.hpp"
|
17
|
+
#include "duckdb/execution/index/index_pointer.hpp"
|
18
|
+
#include "duckdb/common/unordered_map.hpp"
|
19
|
+
#include "duckdb/common/constants.hpp"
|
20
|
+
#include "duckdb/common/map.hpp"
|
21
|
+
|
22
|
+
namespace duckdb {
|
23
|
+
|
24
|
+
//! The FixedSizeAllocator provides pointers to fixed-size memory segments of pre-allocated memory buffers.
|
25
|
+
//! The pointers are IndexPointers, and the leftmost byte (metadata) must always be zero.
|
26
|
+
//! It is also possible to directly request a C++ pointer to the underlying segment of an index pointer.
|
27
|
+
class FixedSizeAllocator {
|
28
|
+
public:
|
29
|
+
//! We can vacuum 10% or more of the total in-memory footprint
|
30
|
+
static constexpr uint8_t VACUUM_THRESHOLD = 10;
|
31
|
+
|
32
|
+
//! Constants for fast offset calculations in the bitmask
|
33
|
+
static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
|
34
|
+
static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
|
35
|
+
|
36
|
+
public:
|
37
|
+
FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager);
|
38
|
+
|
39
|
+
//! Block manager of the database instance
|
40
|
+
BlockManager &block_manager;
|
41
|
+
//! Buffer manager of the database instance
|
42
|
+
BufferManager &buffer_manager;
|
43
|
+
//! Metadata manager for (de)serialization
|
44
|
+
MetadataManager &metadata_manager;
|
45
|
+
|
46
|
+
public:
|
47
|
+
//! Get a new IndexPointer to a segment, might cause a new buffer allocation
|
48
|
+
IndexPointer New();
|
49
|
+
//! Free the segment of the IndexPointer
|
50
|
+
void Free(const IndexPointer ptr);
|
51
|
+
//! Returns a pointer of type T to a segment. If dirty is false, then T should be a const class
|
52
|
+
template <class T>
|
53
|
+
inline T *Get(const IndexPointer ptr, const bool dirty = true) {
|
54
|
+
return (T *)Get(ptr, dirty);
|
55
|
+
}
|
56
|
+
|
57
|
+
//! Resets the allocator, e.g., during 'DELETE FROM table'
|
58
|
+
void Reset();
|
59
|
+
|
60
|
+
//! Returns the in-memory usage in bytes
|
61
|
+
inline idx_t GetMemoryUsage() const;
|
62
|
+
|
63
|
+
//! Returns the upper bound of the available buffer IDs, i.e., upper_bound > max_buffer_id
|
64
|
+
idx_t GetUpperBoundBufferId() const;
|
65
|
+
//! Merge another FixedSizeAllocator into this allocator. Both must have the same segment size
|
66
|
+
void Merge(FixedSizeAllocator &other);
|
67
|
+
|
68
|
+
//! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
|
69
|
+
bool InitializeVacuum();
|
70
|
+
//! Finalize a vacuum operation by freeing all vacuumed buffers
|
71
|
+
void FinalizeVacuum();
|
72
|
+
//! Returns true, if an IndexPointer qualifies for a vacuum operation, and false otherwise
|
73
|
+
inline bool NeedsVacuum(const IndexPointer ptr) const {
|
74
|
+
if (vacuum_buffers.find(ptr.GetBufferId()) != vacuum_buffers.end()) {
|
75
|
+
return true;
|
76
|
+
}
|
77
|
+
return false;
|
78
|
+
}
|
79
|
+
//! Vacuums an IndexPointer
|
80
|
+
IndexPointer VacuumPointer(const IndexPointer ptr);
|
81
|
+
|
82
|
+
//! Serializes all in-memory buffers and the metadata
|
83
|
+
BlockPointer Serialize(MetadataWriter &writer);
|
84
|
+
//! Deserializes all metadata
|
85
|
+
void Deserialize(const BlockPointer &block_pointer);
|
86
|
+
|
87
|
+
private:
|
88
|
+
//! Allocation size of one segment in a buffer
|
89
|
+
//! We only need this value to calculate bitmask_count, bitmask_offset, and
|
90
|
+
//! available_segments_per_buffer
|
91
|
+
idx_t segment_size;
|
92
|
+
|
93
|
+
//! Number of validity_t values in the bitmask
|
94
|
+
idx_t bitmask_count;
|
95
|
+
//! First starting byte of the payload (segments)
|
96
|
+
idx_t bitmask_offset;
|
97
|
+
//! Number of possible segment allocations per buffer
|
98
|
+
idx_t available_segments_per_buffer;
|
99
|
+
|
100
|
+
//! Total number of allocated segments in all buffers
|
101
|
+
//! We can recalculate this by iterating over all buffers
|
102
|
+
idx_t total_segment_count;
|
103
|
+
|
104
|
+
//! Buffers containing the segments
|
105
|
+
unordered_map<idx_t, FixedSizeBuffer> buffers;
|
106
|
+
//! Buffers with free space
|
107
|
+
unordered_set<idx_t> buffers_with_free_space;
|
108
|
+
//! Buffers qualifying for a vacuum (helper field to allow for fast NeedsVacuum checks)
|
109
|
+
unordered_set<idx_t> vacuum_buffers;
|
110
|
+
|
111
|
+
private:
|
112
|
+
//! Returns the data_ptr_t to a segment, and sets the dirty flag of the buffer containing that segment
|
113
|
+
inline data_ptr_t Get(const IndexPointer ptr, const bool dirty = true) {
|
114
|
+
D_ASSERT(ptr.GetOffset() < available_segments_per_buffer);
|
115
|
+
D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end());
|
116
|
+
auto &buffer = buffers.find(ptr.GetBufferId())->second;
|
117
|
+
auto buffer_ptr = buffer.Get(dirty);
|
118
|
+
return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset;
|
119
|
+
}
|
120
|
+
//! Returns the first free offset in a bitmask
|
121
|
+
uint32_t GetOffset(ValidityMask &mask, const idx_t segment_count);
|
122
|
+
//! Returns an available buffer id
|
123
|
+
idx_t GetAvailableBufferId() const;
|
124
|
+
};
|
125
|
+
|
126
|
+
} // namespace duckdb
|
@@ -0,0 +1,79 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/fixed_size_buffer.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/typedefs.hpp"
|
12
|
+
#include "duckdb/storage/buffer/block_handle.hpp"
|
13
|
+
#include "duckdb/storage/buffer/buffer_handle.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
|
17
|
+
class FixedSizeAllocator;
|
18
|
+
class MetadataWriter;
|
19
|
+
|
20
|
+
//! A fixed-size buffer holds fixed-size segments of data. It lazily deserializes a buffer, if on-disk and not
|
21
|
+
//! yet in memory, and it only serializes dirty and non-written buffers to disk during
|
22
|
+
//! serialization.
|
23
|
+
class FixedSizeBuffer {
|
24
|
+
public:
|
25
|
+
//! Constructor for a new in-memory buffer
|
26
|
+
explicit FixedSizeBuffer(BlockManager &block_manager);
|
27
|
+
//! Constructor for deserializing buffer metadata from disk
|
28
|
+
FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id);
|
29
|
+
|
30
|
+
//! Block manager of the database instance
|
31
|
+
BlockManager &block_manager;
|
32
|
+
|
33
|
+
//! The number of allocated segments
|
34
|
+
idx_t segment_count;
|
35
|
+
|
36
|
+
//! True: the in-memory buffer is no longer consistent with a (possibly existing) copy on disk
|
37
|
+
bool dirty;
|
38
|
+
//! True: can be vacuumed after the vacuum operation
|
39
|
+
bool vacuum;
|
40
|
+
|
41
|
+
public:
|
42
|
+
//! Returns true, if the buffer is in-memory
|
43
|
+
inline bool InMemory() const {
|
44
|
+
return buffer_handle.IsValid();
|
45
|
+
}
|
46
|
+
//! Returns true, if the block is on-disk
|
47
|
+
inline bool OnDisk() const {
|
48
|
+
return (block_handle != nullptr) && (block_handle->BlockId() < MAXIMUM_BLOCK);
|
49
|
+
}
|
50
|
+
//! Returns the block ID
|
51
|
+
inline block_id_t BlockId() const {
|
52
|
+
D_ASSERT(OnDisk());
|
53
|
+
return block_handle->BlockId();
|
54
|
+
}
|
55
|
+
//! Returns a pointer to the buffer in memory, and calls Deserialize, if the buffer is not in memory
|
56
|
+
inline data_ptr_t Get(const bool dirty_p = true) {
|
57
|
+
if (!InMemory()) {
|
58
|
+
Pin();
|
59
|
+
}
|
60
|
+
if (dirty_p) {
|
61
|
+
dirty = dirty_p;
|
62
|
+
}
|
63
|
+
return buffer_handle.Ptr();
|
64
|
+
}
|
65
|
+
//! Destroys the in-memory buffer and the on-disk block
|
66
|
+
void Destroy();
|
67
|
+
//! Serializes a buffer (if dirty or not on disk)
|
68
|
+
void Serialize();
|
69
|
+
//! Pin a buffer (if not in-memory)
|
70
|
+
void Pin();
|
71
|
+
|
72
|
+
private:
|
73
|
+
//! The buffer handle of the in-memory buffer
|
74
|
+
BufferHandle buffer_handle;
|
75
|
+
//! The block handle of the on-disk buffer
|
76
|
+
shared_ptr<BlockHandle> block_handle;
|
77
|
+
};
|
78
|
+
|
79
|
+
} // namespace duckdb
|
@@ -0,0 +1,96 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/index_pointer.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/typedefs.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
class IndexPointer {
|
16
|
+
public:
|
17
|
+
//! Bit-shifting
|
18
|
+
static constexpr idx_t SHIFT_OFFSET = 32;
|
19
|
+
static constexpr idx_t SHIFT_METADATA = 56;
|
20
|
+
//! AND operations
|
21
|
+
static constexpr idx_t AND_OFFSET = 0x0000000000FFFFFF;
|
22
|
+
static constexpr idx_t AND_BUFFER_ID = 0x00000000FFFFFFFF;
|
23
|
+
static constexpr idx_t AND_METADATA = 0xFF00000000000000;
|
24
|
+
|
25
|
+
public:
|
26
|
+
//! Constructs an empty IndexPointer
|
27
|
+
IndexPointer() : data(0) {};
|
28
|
+
//! Constructs an in-memory IndexPointer with a buffer ID and an offset
|
29
|
+
IndexPointer(const uint32_t buffer_id, const uint32_t offset) : data(0) {
|
30
|
+
auto shifted_offset = ((idx_t)offset) << SHIFT_OFFSET;
|
31
|
+
data += shifted_offset;
|
32
|
+
data += buffer_id;
|
33
|
+
};
|
34
|
+
|
35
|
+
public:
|
36
|
+
//! Get data (all 64 bits)
|
37
|
+
inline idx_t Get() const {
|
38
|
+
return data;
|
39
|
+
}
|
40
|
+
//! Set data (all 64 bits)
|
41
|
+
inline void Set(const idx_t data_p) {
|
42
|
+
data = data_p;
|
43
|
+
}
|
44
|
+
|
45
|
+
//! Returns false, if the metadata is empty
|
46
|
+
inline bool HasMetadata() const {
|
47
|
+
return data & AND_METADATA;
|
48
|
+
}
|
49
|
+
//! Get metadata (zero to 7th bit)
|
50
|
+
inline uint8_t GetMetadata() const {
|
51
|
+
return data >> SHIFT_METADATA;
|
52
|
+
}
|
53
|
+
//! Set metadata (zero to 7th bit)
|
54
|
+
inline void SetMetadata(const uint8_t metadata) {
|
55
|
+
data += (idx_t)metadata << SHIFT_METADATA;
|
56
|
+
}
|
57
|
+
|
58
|
+
//! Get the offset (8th to 23rd bit)
|
59
|
+
inline idx_t GetOffset() const {
|
60
|
+
auto offset = data >> SHIFT_OFFSET;
|
61
|
+
return offset & AND_OFFSET;
|
62
|
+
}
|
63
|
+
//! Get the buffer ID (24th to 63rd bit)
|
64
|
+
inline idx_t GetBufferId() const {
|
65
|
+
return data & AND_BUFFER_ID;
|
66
|
+
}
|
67
|
+
|
68
|
+
//! Resets the IndexPointer
|
69
|
+
inline void Clear() {
|
70
|
+
data = 0;
|
71
|
+
}
|
72
|
+
|
73
|
+
//! Adds an idx_t to a buffer ID, the rightmost 32 bits of data contain the buffer ID
|
74
|
+
inline void IncreaseBufferId(const idx_t summand) {
|
75
|
+
data += summand;
|
76
|
+
}
|
77
|
+
|
78
|
+
//! Comparison operator
|
79
|
+
inline bool operator==(const IndexPointer &ptr) const {
|
80
|
+
return data == ptr.data;
|
81
|
+
}
|
82
|
+
|
83
|
+
private:
|
84
|
+
//! Data holds all the information contained in an IndexPointer
|
85
|
+
//! [0 - 7: metadata,
|
86
|
+
//! 8 - 23: offset, 24 - 63: buffer ID]
|
87
|
+
//! NOTE: we do not use bit fields because when using bit fields Windows compiles
|
88
|
+
//! the IndexPointer class into 16 bytes instead of the intended 8 bytes, doubling the
|
89
|
+
//! space requirements
|
90
|
+
//! https://learn.microsoft.com/en-us/cpp/cpp/cpp-bit-fields?view=msvc-170
|
91
|
+
idx_t data;
|
92
|
+
};
|
93
|
+
|
94
|
+
static_assert(sizeof(IndexPointer) == sizeof(idx_t), "Invalid size for IndexPointer.");
|
95
|
+
|
96
|
+
} // namespace duckdb
|
@@ -54,8 +54,6 @@ public:
|
|
54
54
|
|
55
55
|
//! Attached database instance
|
56
56
|
AttachedDatabase &db;
|
57
|
-
//! Buffer manager of the database instance
|
58
|
-
BufferManager &buffer_manager;
|
59
57
|
|
60
58
|
public:
|
61
59
|
//! Initialize a single predicate scan on the index with the given expression and column IDs
|
@@ -85,6 +83,10 @@ public:
|
|
85
83
|
//! Performs constraint checking for a chunk of input data
|
86
84
|
virtual void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) = 0;
|
87
85
|
|
86
|
+
//! Deletes all data from the index. The lock obtained from InitializeLock must be held
|
87
|
+
virtual void CommitDrop(IndexLock &index_lock) = 0;
|
88
|
+
//! Deletes all data from the index
|
89
|
+
void CommitDrop();
|
88
90
|
//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
|
89
91
|
virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0;
|
90
92
|
//! Obtains a lock and calls Delete while holding that lock
|
@@ -125,11 +127,11 @@ public:
|
|
125
127
|
return (constraint_type == IndexConstraintType::FOREIGN);
|
126
128
|
}
|
127
129
|
|
128
|
-
//! Serializes the index
|
130
|
+
//! Serializes the index to disk
|
129
131
|
virtual BlockPointer Serialize(MetadataWriter &writer);
|
130
|
-
//! Returns the serialized
|
131
|
-
BlockPointer
|
132
|
-
return
|
132
|
+
//! Returns the serialized root block pointer
|
133
|
+
BlockPointer GetRootBlockPointer() const {
|
134
|
+
return root_block_pointer;
|
133
135
|
}
|
134
136
|
|
135
137
|
//! Execute the index expressions on an input chunk
|
@@ -139,8 +141,8 @@ public:
|
|
139
141
|
protected:
|
140
142
|
//! Lock used for any changes to the index
|
141
143
|
mutex lock;
|
142
|
-
//! Pointer to
|
143
|
-
BlockPointer
|
144
|
+
//! Pointer to the index on disk
|
145
|
+
BlockPointer root_block_pointer;
|
144
146
|
|
145
147
|
private:
|
146
148
|
//! Bound expressions used during expression execution
|
@@ -14,6 +14,9 @@ namespace duckdb {
|
|
14
14
|
|
15
15
|
class MetadataWriter : public Serializer {
|
16
16
|
public:
|
17
|
+
MetadataWriter(const MetadataWriter &) = delete;
|
18
|
+
MetadataWriter &operator=(const MetadataWriter &) = delete;
|
19
|
+
|
17
20
|
explicit MetadataWriter(MetadataManager &manager);
|
18
21
|
~MetadataWriter() override;
|
19
22
|
|
@@ -69,21 +69,30 @@ BindResult ExpressionBinder::BindExpression(FunctionExpression &function, idx_t
|
|
69
69
|
}
|
70
70
|
|
71
71
|
switch (func->type) {
|
72
|
-
case CatalogType::SCALAR_FUNCTION_ENTRY:
|
72
|
+
case CatalogType::SCALAR_FUNCTION_ENTRY: {
|
73
73
|
// scalar function
|
74
74
|
|
75
75
|
// check for lambda parameters, ignore ->> operator (JSON extension)
|
76
|
+
bool try_bind_lambda = false;
|
76
77
|
if (function.function_name != "->>") {
|
77
78
|
for (auto &child : function.children) {
|
78
79
|
if (child->expression_class == ExpressionClass::LAMBDA) {
|
79
|
-
|
80
|
+
try_bind_lambda = true;
|
80
81
|
}
|
81
82
|
}
|
82
83
|
}
|
83
84
|
|
85
|
+
if (try_bind_lambda) {
|
86
|
+
auto result = BindLambdaFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
|
87
|
+
if (!result.HasError()) {
|
88
|
+
// Lambda bind successful
|
89
|
+
return result;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
84
93
|
// other scalar function
|
85
94
|
return BindFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
|
86
|
-
|
95
|
+
}
|
87
96
|
case CatalogType::MACRO_ENTRY:
|
88
97
|
// macro function
|
89
98
|
return BindMacro(function, func->Cast<ScalarMacroCatalogEntry>(), depth, expr_ptr);
|
@@ -134,7 +143,7 @@ BindResult ExpressionBinder::BindLambdaFunction(FunctionExpression &function, Sc
|
|
134
143
|
string error;
|
135
144
|
|
136
145
|
if (function.children.size() != 2) {
|
137
|
-
|
146
|
+
return BindResult("Invalid function arguments!");
|
138
147
|
}
|
139
148
|
D_ASSERT(function.children[1]->GetExpressionClass() == ExpressionClass::LAMBDA);
|
140
149
|
|
@@ -148,7 +157,7 @@ BindResult ExpressionBinder::BindLambdaFunction(FunctionExpression &function, Sc
|
|
148
157
|
auto &list_child = BoundExpression::GetExpression(*function.children[0]);
|
149
158
|
if (list_child->return_type.id() != LogicalTypeId::LIST && list_child->return_type.id() != LogicalTypeId::SQLNULL &&
|
150
159
|
list_child->return_type.id() != LogicalTypeId::UNKNOWN) {
|
151
|
-
|
160
|
+
return BindResult(" Invalid LIST argument to " + function.function_name + "!");
|
152
161
|
}
|
153
162
|
|
154
163
|
LogicalType list_child_type = list_child->return_type.id();
|
@@ -69,9 +69,8 @@ void SingleFileTableDataWriter::FinalizeTable(TableStatistics &&global_stats, Da
|
|
69
69
|
|
70
70
|
// Write-off to metadata block ids and offsets of indexes
|
71
71
|
meta_data_writer.Write<idx_t>(index_pointers.size());
|
72
|
-
for (auto &
|
73
|
-
meta_data_writer.Write<
|
74
|
-
meta_data_writer.Write<uint32_t>(block_info.offset);
|
72
|
+
for (const auto &index_pointer : index_pointers) {
|
73
|
+
meta_data_writer.Write<BlockPointer>(index_pointer);
|
75
74
|
}
|
76
75
|
}
|
77
76
|
|
@@ -262,7 +262,8 @@ void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
|
|
262
262
|
void CheckpointReader::ReadSchema(ClientContext &context, MetadataReader &reader) {
|
263
263
|
// read the schema and create it in the catalog
|
264
264
|
auto info = CatalogEntry::Deserialize(reader);
|
265
|
-
|
265
|
+
|
266
|
+
// we set create conflict to IGNORE_ON_CONFLICT, so that we can ignore a failure when recreating the main schema
|
266
267
|
info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
|
267
268
|
catalog.CreateSchema(context, info->Cast<CreateSchemaInfo>());
|
268
269
|
|
@@ -336,14 +337,12 @@ void CheckpointReader::ReadSequence(ClientContext &context, MetadataReader &read
|
|
336
337
|
// Indexes
|
337
338
|
//===--------------------------------------------------------------------===//
|
338
339
|
void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
|
339
|
-
//
|
340
|
-
//
|
341
|
-
auto
|
340
|
+
// we write the index data in WriteTableData
|
341
|
+
// here, we only write the root pointer
|
342
|
+
const auto root_block_pointer = index_catalog.index->GetRootBlockPointer();
|
342
343
|
auto &metadata_writer = GetMetadataWriter();
|
343
344
|
index_catalog.Serialize(metadata_writer);
|
344
|
-
|
345
|
-
metadata_writer.Write(root_offset.block_id);
|
346
|
-
metadata_writer.Write(root_offset.offset);
|
345
|
+
metadata_writer.Write(root_block_pointer);
|
347
346
|
}
|
348
347
|
|
349
348
|
void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader) {
|
@@ -358,10 +357,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
|
|
358
357
|
auto &index_catalog = schema_catalog.CreateIndex(context, index_info, table_catalog)->Cast<DuckIndexEntry>();
|
359
358
|
index_catalog.info = table_catalog.GetStorage().info;
|
360
359
|
|
361
|
-
// we deserialize the index lazily, i.e., we
|
362
|
-
|
363
|
-
auto root_block_id = reader.Read<block_id_t>();
|
364
|
-
auto root_offset = reader.Read<uint32_t>();
|
360
|
+
// we deserialize the index lazily, i.e., we only load the root block pointer
|
361
|
+
const auto index_block_pointer = reader.Read<BlockPointer>();
|
365
362
|
|
366
363
|
// obtain the expressions of the ART from the index metadata
|
367
364
|
vector<unique_ptr<Expression>> unbound_expressions;
|
@@ -401,9 +398,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
|
|
401
398
|
switch (index_info.index_type) {
|
402
399
|
case IndexType::ART: {
|
403
400
|
auto &storage = table_catalog.GetStorage();
|
404
|
-
auto art =
|
405
|
-
|
406
|
-
index_info.constraint_type, storage.db, nullptr, BlockPointer(root_block_id, root_offset));
|
401
|
+
auto art = make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
|
402
|
+
index_info.constraint_type, storage.db, nullptr, index_block_pointer);
|
407
403
|
index_catalog.index = art.get();
|
408
404
|
storage.info->indexes.AddIndex(std::move(art));
|
409
405
|
break;
|
@@ -450,7 +446,7 @@ void CheckpointReader::ReadTableMacro(ClientContext &context, MetadataReader &re
|
|
450
446
|
// Table Metadata
|
451
447
|
//===--------------------------------------------------------------------===//
|
452
448
|
void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
|
453
|
-
// write the table
|
449
|
+
// write the table metadata
|
454
450
|
table.Serialize(GetMetadataWriter());
|
455
451
|
// now we need to write the table data.
|
456
452
|
if (auto writer = GetTableDataWriter(table)) {
|
@@ -483,12 +479,11 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetadataReader &rea
|
|
483
479
|
data_reader.ReadTableData();
|
484
480
|
bound_info.data->total_rows = reader.Read<idx_t>();
|
485
481
|
|
486
|
-
//
|
487
|
-
idx_t
|
488
|
-
for (idx_t i = 0; i <
|
489
|
-
auto
|
490
|
-
|
491
|
-
bound_info.indexes.emplace_back(idx_block_id, idx_offset);
|
482
|
+
// get the root block pointers of each index
|
483
|
+
idx_t index_count = reader.Read<idx_t>();
|
484
|
+
for (idx_t i = 0; i < index_count; i++) {
|
485
|
+
const auto index_pointer = reader.Read<BlockPointer>();
|
486
|
+
bound_info.indexes.emplace_back(index_pointer);
|
492
487
|
}
|
493
488
|
}
|
494
489
|
|
@@ -429,13 +429,13 @@ void DataTable::VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk,
|
|
429
429
|
data_table.info->indexes.VerifyForeignKey(*dst_keys_ptr, dst_chunk, regular_conflicts);
|
430
430
|
regular_conflicts.Finalize();
|
431
431
|
auto ®ular_matches = regular_conflicts.Conflicts();
|
432
|
-
// check whether or not the chunk can be inserted or deleted into the referenced table' transaction local storage
|
433
|
-
auto &local_storage = LocalStorage::Get(context, db);
|
434
432
|
|
433
|
+
// check if we can insert the chunk into the reference table's local storage
|
434
|
+
auto &local_storage = LocalStorage::Get(context, db);
|
435
435
|
bool error = IsForeignKeyConstraintError(is_append, count, regular_matches);
|
436
436
|
bool transaction_error = false;
|
437
|
-
|
438
437
|
bool transaction_check = local_storage.Find(data_table);
|
438
|
+
|
439
439
|
if (transaction_check) {
|
440
440
|
auto &transact_index = local_storage.GetIndexes(data_table);
|
441
441
|
transact_index.VerifyForeignKey(*dst_keys_ptr, dst_chunk, transaction_conflicts);
|
@@ -12,7 +12,7 @@ Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_mana
|
|
12
12
|
IndexConstraintType constraint_type_p)
|
13
13
|
|
14
14
|
: type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
|
15
|
-
db(db)
|
15
|
+
db(db) {
|
16
16
|
|
17
17
|
for (auto &expr : unbound_expressions) {
|
18
18
|
types.push_back(expr->return_type.InternalType());
|
@@ -39,6 +39,12 @@ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
|
|
39
39
|
return Append(state, entries, row_identifiers);
|
40
40
|
}
|
41
41
|
|
42
|
+
void Index::CommitDrop() {
|
43
|
+
IndexLock index_lock;
|
44
|
+
InitializeLock(index_lock);
|
45
|
+
CommitDrop(index_lock);
|
46
|
+
}
|
47
|
+
|
42
48
|
void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
|
43
49
|
IndexLock state;
|
44
50
|
InitializeLock(state);
|