duckdb 0.8.2-dev3989.0 → 0.8.2-dev4126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +8 -7
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -13
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/crypto/md5.cpp +2 -12
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +8 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +32 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +10 -16
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +4 -1
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +24 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -7,8 +7,8 @@
|
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
#pragma once
|
9
9
|
|
10
|
+
#include "duckdb/execution/index/fixed_size_allocator.hpp"
|
10
11
|
#include "duckdb/execution/index/art/art.hpp"
|
11
|
-
#include "duckdb/execution/index/art/fixed_size_allocator.hpp"
|
12
12
|
#include "duckdb/execution/index/art/node.hpp"
|
13
13
|
|
14
14
|
namespace duckdb {
|
@@ -17,30 +17,28 @@ namespace duckdb {
|
|
17
17
|
class ARTKey;
|
18
18
|
|
19
19
|
//! The Prefix is a special node type that contains up to PREFIX_SIZE bytes, and one byte for the count,
|
20
|
-
//! and a Node pointer. This pointer either points to another
|
21
|
-
//! node or the 'actual' ART node.
|
20
|
+
//! and a Node pointer. This pointer either points to a prefix node or another Node.
|
22
21
|
class Prefix {
|
23
22
|
public:
|
23
|
+
//! Delete copy constructors, as any Prefix can never own its memory
|
24
|
+
Prefix(const Prefix &) = delete;
|
25
|
+
Prefix &operator=(const Prefix &) = delete;
|
26
|
+
|
24
27
|
//! Up to PREFIX_SIZE bytes of prefix data and the count
|
25
28
|
uint8_t data[Node::PREFIX_SIZE + 1];
|
26
|
-
//! A pointer to the next
|
29
|
+
//! A pointer to the next Node
|
27
30
|
Node ptr;
|
28
31
|
|
29
32
|
public:
|
30
33
|
//! Get a new empty prefix node, might cause a new buffer allocation
|
31
34
|
static Prefix &New(ART &art, Node &node);
|
32
35
|
//! Create a new prefix node containing a single byte and a pointer to a next node
|
33
|
-
static Prefix &New(ART &art, Node &node, uint8_t byte, Node next);
|
36
|
+
static Prefix &New(ART &art, Node &node, uint8_t byte, const Node &next = Node());
|
34
37
|
//! Get a new chain of prefix nodes, might cause new buffer allocations,
|
35
38
|
//! with the node parameter holding the tail of the chain
|
36
39
|
static void New(ART &art, reference<Node> &node, const ARTKey &key, const uint32_t depth, uint32_t count);
|
37
40
|
//! Free the node (and its subtree)
|
38
41
|
static void Free(ART &art, Node &node);
|
39
|
-
//! Get a reference to the prefix
|
40
|
-
static inline Prefix &Get(const ART &art, const Node ptr) {
|
41
|
-
D_ASSERT(!ptr.IsSerialized());
|
42
|
-
return *Node::GetAllocator(art, NType::PREFIX).Get<Prefix>(ptr);
|
43
|
-
}
|
44
42
|
|
45
43
|
//! Initializes a merge by incrementing the buffer ID of the prefix and its child node(s)
|
46
44
|
static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
|
@@ -51,13 +49,17 @@ public:
|
|
51
49
|
static void Concatenate(ART &art, Node &prefix_node, const uint8_t byte, Node &child_prefix_node);
|
52
50
|
//! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering
|
53
51
|
//! a mismatching byte, in which case depth indexes the mismatching byte in the key
|
54
|
-
static idx_t Traverse(ART &art, reference<Node> &prefix_node, const ARTKey &key, idx_t &depth);
|
52
|
+
static idx_t Traverse(ART &art, reference<const Node> &prefix_node, const ARTKey &key, idx_t &depth);
|
53
|
+
//! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering
|
54
|
+
//! a mismatching byte, in which case depth indexes the mismatching byte in the key
|
55
|
+
static idx_t TraverseMutable(ART &art, reference<Node> &prefix_node, const ARTKey &key, idx_t &depth);
|
55
56
|
//! Traverse two prefixes to find (1) that they match (so far), or (2) that they have a mismatching position,
|
56
|
-
//! or (3) that one prefix contains the other prefix
|
57
|
+
//! or (3) that one prefix contains the other prefix. This function aids in merging Nodes, and, therefore,
|
58
|
+
//! the nodes are not const
|
57
59
|
static bool Traverse(ART &art, reference<Node> &l_node, reference<Node> &r_node, idx_t &mismatch_position);
|
58
60
|
//! Returns the byte at position
|
59
61
|
static inline uint8_t GetByte(const ART &art, const Node &prefix_node, const idx_t position) {
|
60
|
-
auto prefix = Prefix
|
62
|
+
auto &prefix = Node::Ref<const Prefix>(art, prefix_node, NType::PREFIX);
|
61
63
|
D_ASSERT(position < Node::PREFIX_SIZE);
|
62
64
|
D_ASSERT(position < prefix.data[Node::PREFIX_SIZE]);
|
63
65
|
return prefix.data[position];
|
@@ -71,12 +73,7 @@ public:
|
|
71
73
|
static void Split(ART &art, reference<Node> &prefix_node, Node &child_node, idx_t position);
|
72
74
|
|
73
75
|
//! Returns the string representation of the node, or only traverses and verifies the node and its subtree
|
74
|
-
static string VerifyAndToString(ART &art, Node &node, const bool only_verify);
|
75
|
-
|
76
|
-
//! Serialize this node and all subsequent nodes
|
77
|
-
static BlockPointer Serialize(ART &art, Node &node, MetadataWriter &writer);
|
78
|
-
//! Deserialize this node and all subsequent prefix nodes
|
79
|
-
static void Deserialize(ART &art, Node &node, MetadataReader &reader);
|
76
|
+
static string VerifyAndToString(ART &art, const Node &node, const bool only_verify);
|
80
77
|
|
81
78
|
//! Vacuum the child of the node
|
82
79
|
static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
|
@@ -88,8 +85,5 @@ private:
|
|
88
85
|
//! Appends the other_prefix and all its subsequent prefix nodes to this prefix node.
|
89
86
|
//! Also frees all copied/appended nodes
|
90
87
|
void Append(ART &art, Node other_prefix);
|
91
|
-
//! Get the total count of bytes in the chain of prefixes, with the node reference pointing to first non-prefix node
|
92
|
-
static idx_t TotalCount(ART &art, reference<Node> &node);
|
93
88
|
};
|
94
|
-
|
95
89
|
} // namespace duckdb
|
@@ -0,0 +1,126 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/fixed_size_allocator.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/types/validity_mask.hpp"
|
12
|
+
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
14
|
+
#include "duckdb/storage/metadata/metadata_manager.hpp"
|
15
|
+
#include "duckdb/storage/metadata/metadata_writer.hpp"
|
16
|
+
#include "duckdb/execution/index/fixed_size_buffer.hpp"
|
17
|
+
#include "duckdb/execution/index/index_pointer.hpp"
|
18
|
+
#include "duckdb/common/unordered_map.hpp"
|
19
|
+
#include "duckdb/common/constants.hpp"
|
20
|
+
#include "duckdb/common/map.hpp"
|
21
|
+
|
22
|
+
namespace duckdb {
|
23
|
+
|
24
|
+
//! The FixedSizeAllocator provides pointers to fixed-size memory segments of pre-allocated memory buffers.
|
25
|
+
//! The pointers are IndexPointers, and the leftmost byte (metadata) must always be zero.
|
26
|
+
//! It is also possible to directly request a C++ pointer to the underlying segment of an index pointer.
|
27
|
+
class FixedSizeAllocator {
|
28
|
+
public:
|
29
|
+
//! We can vacuum 10% or more of the total in-memory footprint
|
30
|
+
static constexpr uint8_t VACUUM_THRESHOLD = 10;
|
31
|
+
|
32
|
+
//! Constants for fast offset calculations in the bitmask
|
33
|
+
static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
|
34
|
+
static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
|
35
|
+
|
36
|
+
public:
|
37
|
+
FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager);
|
38
|
+
|
39
|
+
//! Block manager of the database instance
|
40
|
+
BlockManager &block_manager;
|
41
|
+
//! Buffer manager of the database instance
|
42
|
+
BufferManager &buffer_manager;
|
43
|
+
//! Metadata manager for (de)serialization
|
44
|
+
MetadataManager &metadata_manager;
|
45
|
+
|
46
|
+
public:
|
47
|
+
//! Get a new IndexPointer to a segment, might cause a new buffer allocation
|
48
|
+
IndexPointer New();
|
49
|
+
//! Free the segment of the IndexPointer
|
50
|
+
void Free(const IndexPointer ptr);
|
51
|
+
//! Returns a pointer of type T to a segment. If dirty is false, then T should be a const class
|
52
|
+
template <class T>
|
53
|
+
inline T *Get(const IndexPointer ptr, const bool dirty = true) {
|
54
|
+
return (T *)Get(ptr, dirty);
|
55
|
+
}
|
56
|
+
|
57
|
+
//! Resets the allocator, e.g., during 'DELETE FROM table'
|
58
|
+
void Reset();
|
59
|
+
|
60
|
+
//! Returns the in-memory usage in bytes
|
61
|
+
inline idx_t GetMemoryUsage() const;
|
62
|
+
|
63
|
+
//! Returns the upper bound of the available buffer IDs, i.e., upper_bound > max_buffer_id
|
64
|
+
idx_t GetUpperBoundBufferId() const;
|
65
|
+
//! Merge another FixedSizeAllocator into this allocator. Both must have the same segment size
|
66
|
+
void Merge(FixedSizeAllocator &other);
|
67
|
+
|
68
|
+
//! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
|
69
|
+
bool InitializeVacuum();
|
70
|
+
//! Finalize a vacuum operation by freeing all vacuumed buffers
|
71
|
+
void FinalizeVacuum();
|
72
|
+
//! Returns true, if an IndexPointer qualifies for a vacuum operation, and false otherwise
|
73
|
+
inline bool NeedsVacuum(const IndexPointer ptr) const {
|
74
|
+
if (vacuum_buffers.find(ptr.GetBufferId()) != vacuum_buffers.end()) {
|
75
|
+
return true;
|
76
|
+
}
|
77
|
+
return false;
|
78
|
+
}
|
79
|
+
//! Vacuums an IndexPointer
|
80
|
+
IndexPointer VacuumPointer(const IndexPointer ptr);
|
81
|
+
|
82
|
+
//! Serializes all in-memory buffers and the metadata
|
83
|
+
BlockPointer Serialize(MetadataWriter &writer);
|
84
|
+
//! Deserializes all metadata
|
85
|
+
void Deserialize(const BlockPointer &block_pointer);
|
86
|
+
|
87
|
+
private:
|
88
|
+
//! Allocation size of one segment in a buffer
|
89
|
+
//! We only need this value to calculate bitmask_count, bitmask_offset, and
|
90
|
+
//! available_segments_per_buffer
|
91
|
+
idx_t segment_size;
|
92
|
+
|
93
|
+
//! Number of validity_t values in the bitmask
|
94
|
+
idx_t bitmask_count;
|
95
|
+
//! First starting byte of the payload (segments)
|
96
|
+
idx_t bitmask_offset;
|
97
|
+
//! Number of possible segment allocations per buffer
|
98
|
+
idx_t available_segments_per_buffer;
|
99
|
+
|
100
|
+
//! Total number of allocated segments in all buffers
|
101
|
+
//! We can recalculate this by iterating over all buffers
|
102
|
+
idx_t total_segment_count;
|
103
|
+
|
104
|
+
//! Buffers containing the segments
|
105
|
+
unordered_map<idx_t, FixedSizeBuffer> buffers;
|
106
|
+
//! Buffers with free space
|
107
|
+
unordered_set<idx_t> buffers_with_free_space;
|
108
|
+
//! Buffers qualifying for a vacuum (helper field to allow for fast NeedsVacuum checks)
|
109
|
+
unordered_set<idx_t> vacuum_buffers;
|
110
|
+
|
111
|
+
private:
|
112
|
+
//! Returns the data_ptr_t to a segment, and sets the dirty flag of the buffer containing that segment
|
113
|
+
inline data_ptr_t Get(const IndexPointer ptr, const bool dirty = true) {
|
114
|
+
D_ASSERT(ptr.GetOffset() < available_segments_per_buffer);
|
115
|
+
D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end());
|
116
|
+
auto &buffer = buffers.find(ptr.GetBufferId())->second;
|
117
|
+
auto buffer_ptr = buffer.Get(dirty);
|
118
|
+
return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset;
|
119
|
+
}
|
120
|
+
//! Returns the first free offset in a bitmask
|
121
|
+
uint32_t GetOffset(ValidityMask &mask, const idx_t segment_count);
|
122
|
+
//! Returns an available buffer id
|
123
|
+
idx_t GetAvailableBufferId() const;
|
124
|
+
};
|
125
|
+
|
126
|
+
} // namespace duckdb
|
@@ -0,0 +1,79 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/fixed_size_buffer.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/typedefs.hpp"
|
12
|
+
#include "duckdb/storage/buffer/block_handle.hpp"
|
13
|
+
#include "duckdb/storage/buffer/buffer_handle.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
|
17
|
+
class FixedSizeAllocator;
|
18
|
+
class MetadataWriter;
|
19
|
+
|
20
|
+
//! A fixed-size buffer holds fixed-size segments of data. It lazily deserializes a buffer, if on-disk and not
|
21
|
+
//! yet in memory, and it only serializes dirty and non-written buffers to disk during
|
22
|
+
//! serialization.
|
23
|
+
class FixedSizeBuffer {
|
24
|
+
public:
|
25
|
+
//! Constructor for a new in-memory buffer
|
26
|
+
explicit FixedSizeBuffer(BlockManager &block_manager);
|
27
|
+
//! Constructor for deserializing buffer metadata from disk
|
28
|
+
FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id);
|
29
|
+
|
30
|
+
//! Block manager of the database instance
|
31
|
+
BlockManager &block_manager;
|
32
|
+
|
33
|
+
//! The number of allocated segments
|
34
|
+
idx_t segment_count;
|
35
|
+
|
36
|
+
//! True: the in-memory buffer is no longer consistent with a (possibly existing) copy on disk
|
37
|
+
bool dirty;
|
38
|
+
//! True: can be vacuumed after the vacuum operation
|
39
|
+
bool vacuum;
|
40
|
+
|
41
|
+
public:
|
42
|
+
//! Returns true, if the buffer is in-memory
|
43
|
+
inline bool InMemory() const {
|
44
|
+
return buffer_handle.IsValid();
|
45
|
+
}
|
46
|
+
//! Returns true, if the block is on-disk
|
47
|
+
inline bool OnDisk() const {
|
48
|
+
return (block_handle != nullptr) && (block_handle->BlockId() < MAXIMUM_BLOCK);
|
49
|
+
}
|
50
|
+
//! Returns the block ID
|
51
|
+
inline block_id_t BlockId() const {
|
52
|
+
D_ASSERT(OnDisk());
|
53
|
+
return block_handle->BlockId();
|
54
|
+
}
|
55
|
+
//! Returns a pointer to the buffer in memory, and calls Deserialize, if the buffer is not in memory
|
56
|
+
inline data_ptr_t Get(const bool dirty_p = true) {
|
57
|
+
if (!InMemory()) {
|
58
|
+
Pin();
|
59
|
+
}
|
60
|
+
if (dirty_p) {
|
61
|
+
dirty = dirty_p;
|
62
|
+
}
|
63
|
+
return buffer_handle.Ptr();
|
64
|
+
}
|
65
|
+
//! Destroys the in-memory buffer and the on-disk block
|
66
|
+
void Destroy();
|
67
|
+
//! Serializes a buffer (if dirty or not on disk)
|
68
|
+
void Serialize();
|
69
|
+
//! Pin a buffer (if not in-memory)
|
70
|
+
void Pin();
|
71
|
+
|
72
|
+
private:
|
73
|
+
//! The buffer handle of the in-memory buffer
|
74
|
+
BufferHandle buffer_handle;
|
75
|
+
//! The block handle of the on-disk buffer
|
76
|
+
shared_ptr<BlockHandle> block_handle;
|
77
|
+
};
|
78
|
+
|
79
|
+
} // namespace duckdb
|
@@ -0,0 +1,96 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/index/index_pointer.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/typedefs.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
class IndexPointer {
|
16
|
+
public:
|
17
|
+
//! Bit-shifting
|
18
|
+
static constexpr idx_t SHIFT_OFFSET = 32;
|
19
|
+
static constexpr idx_t SHIFT_METADATA = 56;
|
20
|
+
//! AND operations
|
21
|
+
static constexpr idx_t AND_OFFSET = 0x0000000000FFFFFF;
|
22
|
+
static constexpr idx_t AND_BUFFER_ID = 0x00000000FFFFFFFF;
|
23
|
+
static constexpr idx_t AND_METADATA = 0xFF00000000000000;
|
24
|
+
|
25
|
+
public:
|
26
|
+
//! Constructs an empty IndexPointer
|
27
|
+
IndexPointer() : data(0) {};
|
28
|
+
//! Constructs an in-memory IndexPointer with a buffer ID and an offset
|
29
|
+
IndexPointer(const uint32_t buffer_id, const uint32_t offset) : data(0) {
|
30
|
+
auto shifted_offset = ((idx_t)offset) << SHIFT_OFFSET;
|
31
|
+
data += shifted_offset;
|
32
|
+
data += buffer_id;
|
33
|
+
};
|
34
|
+
|
35
|
+
public:
|
36
|
+
//! Get data (all 64 bits)
|
37
|
+
inline idx_t Get() const {
|
38
|
+
return data;
|
39
|
+
}
|
40
|
+
//! Set data (all 64 bits)
|
41
|
+
inline void Set(const idx_t data_p) {
|
42
|
+
data = data_p;
|
43
|
+
}
|
44
|
+
|
45
|
+
//! Returns false, if the metadata is empty
|
46
|
+
inline bool HasMetadata() const {
|
47
|
+
return data & AND_METADATA;
|
48
|
+
}
|
49
|
+
//! Get metadata (zero to 7th bit)
|
50
|
+
inline uint8_t GetMetadata() const {
|
51
|
+
return data >> SHIFT_METADATA;
|
52
|
+
}
|
53
|
+
//! Set metadata (zero to 7th bit)
|
54
|
+
inline void SetMetadata(const uint8_t metadata) {
|
55
|
+
data += (idx_t)metadata << SHIFT_METADATA;
|
56
|
+
}
|
57
|
+
|
58
|
+
//! Get the offset (8th to 23rd bit)
|
59
|
+
inline idx_t GetOffset() const {
|
60
|
+
auto offset = data >> SHIFT_OFFSET;
|
61
|
+
return offset & AND_OFFSET;
|
62
|
+
}
|
63
|
+
//! Get the buffer ID (24th to 63rd bit)
|
64
|
+
inline idx_t GetBufferId() const {
|
65
|
+
return data & AND_BUFFER_ID;
|
66
|
+
}
|
67
|
+
|
68
|
+
//! Resets the IndexPointer
|
69
|
+
inline void Clear() {
|
70
|
+
data = 0;
|
71
|
+
}
|
72
|
+
|
73
|
+
//! Adds an idx_t to a buffer ID, the rightmost 32 bits of data contain the buffer ID
|
74
|
+
inline void IncreaseBufferId(const idx_t summand) {
|
75
|
+
data += summand;
|
76
|
+
}
|
77
|
+
|
78
|
+
//! Comparison operator
|
79
|
+
inline bool operator==(const IndexPointer &ptr) const {
|
80
|
+
return data == ptr.data;
|
81
|
+
}
|
82
|
+
|
83
|
+
private:
|
84
|
+
//! Data holds all the information contained in an IndexPointer
|
85
|
+
//! [0 - 7: metadata,
|
86
|
+
//! 8 - 23: offset, 24 - 63: buffer ID]
|
87
|
+
//! NOTE: we do not use bit fields because when using bit fields Windows compiles
|
88
|
+
//! the IndexPointer class into 16 bytes instead of the intended 8 bytes, doubling the
|
89
|
+
//! space requirements
|
90
|
+
//! https://learn.microsoft.com/en-us/cpp/cpp/cpp-bit-fields?view=msvc-170
|
91
|
+
idx_t data;
|
92
|
+
};
|
93
|
+
|
94
|
+
static_assert(sizeof(IndexPointer) == sizeof(idx_t), "Invalid size for IndexPointer.");
|
95
|
+
|
96
|
+
} // namespace duckdb
|
@@ -30,7 +30,7 @@ public:
|
|
30
30
|
//! The type of the join (INNER, OUTER, etc...)
|
31
31
|
JoinType join_type;
|
32
32
|
//! Table index used to refer to the MARK column (in case of a MARK join)
|
33
|
-
idx_t mark_index;
|
33
|
+
idx_t mark_index {};
|
34
34
|
//! The columns of the LHS that are output by the join
|
35
35
|
vector<idx_t> left_projection_map;
|
36
36
|
//! The columns of the RHS that are output by the join
|
@@ -54,8 +54,6 @@ public:
|
|
54
54
|
|
55
55
|
//! Attached database instance
|
56
56
|
AttachedDatabase &db;
|
57
|
-
//! Buffer manager of the database instance
|
58
|
-
BufferManager &buffer_manager;
|
59
57
|
|
60
58
|
public:
|
61
59
|
//! Initialize a single predicate scan on the index with the given expression and column IDs
|
@@ -85,6 +83,10 @@ public:
|
|
85
83
|
//! Performs constraint checking for a chunk of input data
|
86
84
|
virtual void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) = 0;
|
87
85
|
|
86
|
+
//! Deletes all data from the index. The lock obtained from InitializeLock must be held
|
87
|
+
virtual void CommitDrop(IndexLock &index_lock) = 0;
|
88
|
+
//! Deletes all data from the index
|
89
|
+
void CommitDrop();
|
88
90
|
//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
|
89
91
|
virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0;
|
90
92
|
//! Obtains a lock and calls Delete while holding that lock
|
@@ -125,11 +127,11 @@ public:
|
|
125
127
|
return (constraint_type == IndexConstraintType::FOREIGN);
|
126
128
|
}
|
127
129
|
|
128
|
-
//! Serializes the index
|
130
|
+
//! Serializes the index to disk
|
129
131
|
virtual BlockPointer Serialize(MetadataWriter &writer);
|
130
|
-
//! Returns the serialized
|
131
|
-
BlockPointer
|
132
|
-
return
|
132
|
+
//! Returns the serialized root block pointer
|
133
|
+
BlockPointer GetRootBlockPointer() const {
|
134
|
+
return root_block_pointer;
|
133
135
|
}
|
134
136
|
|
135
137
|
//! Execute the index expressions on an input chunk
|
@@ -139,8 +141,8 @@ public:
|
|
139
141
|
protected:
|
140
142
|
//! Lock used for any changes to the index
|
141
143
|
mutex lock;
|
142
|
-
//! Pointer to
|
143
|
-
BlockPointer
|
144
|
+
//! Pointer to the index on disk
|
145
|
+
BlockPointer root_block_pointer;
|
144
146
|
|
145
147
|
private:
|
146
148
|
//! Bound expressions used during expression execution
|
@@ -14,6 +14,9 @@ namespace duckdb {
|
|
14
14
|
|
15
15
|
class MetadataWriter : public Serializer {
|
16
16
|
public:
|
17
|
+
MetadataWriter(const MetadataWriter &) = delete;
|
18
|
+
MetadataWriter &operator=(const MetadataWriter &) = delete;
|
19
|
+
|
17
20
|
explicit MetadataWriter(MetadataManager &manager);
|
18
21
|
~MetadataWriter() override;
|
19
22
|
|
@@ -12,6 +12,10 @@
|
|
12
12
|
#define DUCKDB_EXTENSION_ICU_LINKED false
|
13
13
|
#endif
|
14
14
|
|
15
|
+
#ifndef DUCKDB_EXTENSION_EXCEL_LINKED
|
16
|
+
#define DUCKDB_EXTENSION_EXCEL_LINKED false
|
17
|
+
#endif
|
18
|
+
|
15
19
|
#ifndef DUCKDB_EXTENSION_PARQUET_LINKED
|
16
20
|
#define DUCKDB_EXTENSION_PARQUET_LINKED false
|
17
21
|
#endif
|
@@ -54,6 +58,10 @@
|
|
54
58
|
#include "icu_extension.hpp"
|
55
59
|
#endif
|
56
60
|
|
61
|
+
#if DUCKDB_EXTENSION_EXCEL_LINKED
|
62
|
+
#include "excel_extension.hpp"
|
63
|
+
#endif
|
64
|
+
|
57
65
|
#if DUCKDB_EXTENSION_PARQUET_LINKED
|
58
66
|
#include "parquet_extension.hpp"
|
59
67
|
#endif
|
@@ -94,6 +102,7 @@ namespace duckdb {
|
|
94
102
|
//===--------------------------------------------------------------------===//
|
95
103
|
static DefaultExtension internal_extensions[] = {
|
96
104
|
{"icu", "Adds support for time zones and collations using the ICU library", DUCKDB_EXTENSION_ICU_LINKED},
|
105
|
+
{"excel", "Adds support for Excel-like format strings", DUCKDB_EXTENSION_EXCEL_LINKED},
|
97
106
|
{"parquet", "Adds support for reading and writing parquet files", DUCKDB_EXTENSION_PARQUET_LINKED},
|
98
107
|
{"tpch", "Adds TPC-H data generation and query support", DUCKDB_EXTENSION_TPCH_LINKED},
|
99
108
|
{"tpcds", "Adds TPC-DS data generation and query support", DUCKDB_EXTENSION_TPCDS_LINKED},
|
@@ -101,12 +110,17 @@ static DefaultExtension internal_extensions[] = {
|
|
101
110
|
{"httpfs", "Adds support for reading and writing files over a HTTP(S) connection", DUCKDB_EXTENSION_HTTPFS_LINKED},
|
102
111
|
{"json", "Adds support for JSON operations", DUCKDB_EXTENSION_JSON_LINKED},
|
103
112
|
{"jemalloc", "Overwrites system allocator with JEMalloc", DUCKDB_EXTENSION_JEMALLOC_LINKED},
|
104
|
-
{"autocomplete", "
|
113
|
+
{"autocomplete", "Adds support for autocomplete in the shell", DUCKDB_EXTENSION_AUTOCOMPLETE_LINKED},
|
105
114
|
{"motherduck", "Enables motherduck integration with the system", false},
|
106
115
|
{"sqlite_scanner", "Adds support for reading SQLite database files", false},
|
107
116
|
{"postgres_scanner", "Adds support for reading from a Postgres database", false},
|
108
117
|
{"inet", "Adds support for IP-related data types and functions", false},
|
109
118
|
{"spatial", "Geospatial extension that adds support for working with spatial data and functions", false},
|
119
|
+
{"substrait", "Adds support for the Substrait integration", false},
|
120
|
+
{"aws", "Provides features that depend on the AWS SDK", false},
|
121
|
+
{"azure", "Adds a filesystem abstraction for Azure blob storage to DuckDB", false},
|
122
|
+
{"iceberg", "Adds support for Apache Iceberg", false},
|
123
|
+
{"visualizer", "Creates an HTML-based visualization of the query plan", false},
|
110
124
|
{nullptr, nullptr, false}};
|
111
125
|
|
112
126
|
idx_t ExtensionHelper::DefaultExtensionCount() {
|
@@ -69,21 +69,30 @@ BindResult ExpressionBinder::BindExpression(FunctionExpression &function, idx_t
|
|
69
69
|
}
|
70
70
|
|
71
71
|
switch (func->type) {
|
72
|
-
case CatalogType::SCALAR_FUNCTION_ENTRY:
|
72
|
+
case CatalogType::SCALAR_FUNCTION_ENTRY: {
|
73
73
|
// scalar function
|
74
74
|
|
75
75
|
// check for lambda parameters, ignore ->> operator (JSON extension)
|
76
|
+
bool try_bind_lambda = false;
|
76
77
|
if (function.function_name != "->>") {
|
77
78
|
for (auto &child : function.children) {
|
78
79
|
if (child->expression_class == ExpressionClass::LAMBDA) {
|
79
|
-
|
80
|
+
try_bind_lambda = true;
|
80
81
|
}
|
81
82
|
}
|
82
83
|
}
|
83
84
|
|
85
|
+
if (try_bind_lambda) {
|
86
|
+
auto result = BindLambdaFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
|
87
|
+
if (!result.HasError()) {
|
88
|
+
// Lambda bind successful
|
89
|
+
return result;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
84
93
|
// other scalar function
|
85
94
|
return BindFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
|
86
|
-
|
95
|
+
}
|
87
96
|
case CatalogType::MACRO_ENTRY:
|
88
97
|
// macro function
|
89
98
|
return BindMacro(function, func->Cast<ScalarMacroCatalogEntry>(), depth, expr_ptr);
|
@@ -134,7 +143,7 @@ BindResult ExpressionBinder::BindLambdaFunction(FunctionExpression &function, Sc
|
|
134
143
|
string error;
|
135
144
|
|
136
145
|
if (function.children.size() != 2) {
|
137
|
-
|
146
|
+
return BindResult("Invalid function arguments!");
|
138
147
|
}
|
139
148
|
D_ASSERT(function.children[1]->GetExpressionClass() == ExpressionClass::LAMBDA);
|
140
149
|
|
@@ -148,7 +157,7 @@ BindResult ExpressionBinder::BindLambdaFunction(FunctionExpression &function, Sc
|
|
148
157
|
auto &list_child = BoundExpression::GetExpression(*function.children[0]);
|
149
158
|
if (list_child->return_type.id() != LogicalTypeId::LIST && list_child->return_type.id() != LogicalTypeId::SQLNULL &&
|
150
159
|
list_child->return_type.id() != LogicalTypeId::UNKNOWN) {
|
151
|
-
|
160
|
+
return BindResult(" Invalid LIST argument to " + function.function_name + "!");
|
152
161
|
}
|
153
162
|
|
154
163
|
LogicalType list_child_type = list_child->return_type.id();
|
@@ -69,9 +69,8 @@ void SingleFileTableDataWriter::FinalizeTable(TableStatistics &&global_stats, Da
|
|
69
69
|
|
70
70
|
// Write-off to metadata block ids and offsets of indexes
|
71
71
|
meta_data_writer.Write<idx_t>(index_pointers.size());
|
72
|
-
for (auto &
|
73
|
-
meta_data_writer.Write<
|
74
|
-
meta_data_writer.Write<uint32_t>(block_info.offset);
|
72
|
+
for (const auto &index_pointer : index_pointers) {
|
73
|
+
meta_data_writer.Write<BlockPointer>(index_pointer);
|
75
74
|
}
|
76
75
|
}
|
77
76
|
|