duckdb 0.8.2-dev4025.0 → 0.8.2-dev4126.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  8. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  9. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  10. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  11. package/src/duckdb/src/core_functions/function_list.cpp +7 -0
  12. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  13. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  15. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  16. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  17. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  18. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  19. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  20. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  21. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  22. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  23. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  24. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  25. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  26. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  28. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  30. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  31. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  32. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  33. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  34. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  35. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  38. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  40. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  41. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  42. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  43. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  44. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  45. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  48. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  49. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  50. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  51. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  52. package/src/duckdb/src/storage/data_table.cpp +3 -3
  53. package/src/duckdb/src/storage/index.cpp +7 -1
  54. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  55. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
  56. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  57. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  58. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  59. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  60. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  61. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  62. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  63. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -7,8 +7,8 @@
7
7
  //===----------------------------------------------------------------------===//
8
8
  #pragma once
9
9
 
10
+ #include "duckdb/execution/index/fixed_size_allocator.hpp"
10
11
  #include "duckdb/execution/index/art/art.hpp"
11
- #include "duckdb/execution/index/art/fixed_size_allocator.hpp"
12
12
  #include "duckdb/execution/index/art/node.hpp"
13
13
 
14
14
  namespace duckdb {
@@ -17,30 +17,28 @@ namespace duckdb {
17
17
  class ARTKey;
18
18
 
19
19
  //! The Prefix is a special node type that contains up to PREFIX_SIZE bytes, and one byte for the count,
20
- //! and a Node pointer. This pointer either points to another prefix
21
- //! node or the 'actual' ART node.
20
+ //! and a Node pointer. This pointer either points to a prefix node or another Node.
22
21
  class Prefix {
23
22
  public:
23
+ //! Delete copy constructors, as any Prefix can never own its memory
24
+ Prefix(const Prefix &) = delete;
25
+ Prefix &operator=(const Prefix &) = delete;
26
+
24
27
  //! Up to PREFIX_SIZE bytes of prefix data and the count
25
28
  uint8_t data[Node::PREFIX_SIZE + 1];
26
- //! A pointer to the next ART node
29
+ //! A pointer to the next Node
27
30
  Node ptr;
28
31
 
29
32
  public:
30
33
  //! Get a new empty prefix node, might cause a new buffer allocation
31
34
  static Prefix &New(ART &art, Node &node);
32
35
  //! Create a new prefix node containing a single byte and a pointer to a next node
33
- static Prefix &New(ART &art, Node &node, uint8_t byte, Node next);
36
+ static Prefix &New(ART &art, Node &node, uint8_t byte, const Node &next = Node());
34
37
  //! Get a new chain of prefix nodes, might cause new buffer allocations,
35
38
  //! with the node parameter holding the tail of the chain
36
39
  static void New(ART &art, reference<Node> &node, const ARTKey &key, const uint32_t depth, uint32_t count);
37
40
  //! Free the node (and its subtree)
38
41
  static void Free(ART &art, Node &node);
39
- //! Get a reference to the prefix
40
- static inline Prefix &Get(const ART &art, const Node ptr) {
41
- D_ASSERT(!ptr.IsSerialized());
42
- return *Node::GetAllocator(art, NType::PREFIX).Get<Prefix>(ptr);
43
- }
44
42
 
45
43
  //! Initializes a merge by incrementing the buffer ID of the prefix and its child node(s)
46
44
  static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
@@ -51,13 +49,17 @@ public:
51
49
  static void Concatenate(ART &art, Node &prefix_node, const uint8_t byte, Node &child_prefix_node);
52
50
  //! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering
53
51
  //! a mismatching byte, in which case depth indexes the mismatching byte in the key
54
- static idx_t Traverse(ART &art, reference<Node> &prefix_node, const ARTKey &key, idx_t &depth);
52
+ static idx_t Traverse(ART &art, reference<const Node> &prefix_node, const ARTKey &key, idx_t &depth);
53
+ //! Traverse a prefix and a key until (1) encountering a non-prefix node, or (2) encountering
54
+ //! a mismatching byte, in which case depth indexes the mismatching byte in the key
55
+ static idx_t TraverseMutable(ART &art, reference<Node> &prefix_node, const ARTKey &key, idx_t &depth);
55
56
  //! Traverse two prefixes to find (1) that they match (so far), or (2) that they have a mismatching position,
56
- //! or (3) that one prefix contains the other prefix
57
+ //! or (3) that one prefix contains the other prefix. This function aids in merging Nodes, and, therefore,
58
+ //! the nodes are not const
57
59
  static bool Traverse(ART &art, reference<Node> &l_node, reference<Node> &r_node, idx_t &mismatch_position);
58
60
  //! Returns the byte at position
59
61
  static inline uint8_t GetByte(const ART &art, const Node &prefix_node, const idx_t position) {
60
- auto prefix = Prefix::Get(art, prefix_node);
62
+ auto &prefix = Node::Ref<const Prefix>(art, prefix_node, NType::PREFIX);
61
63
  D_ASSERT(position < Node::PREFIX_SIZE);
62
64
  D_ASSERT(position < prefix.data[Node::PREFIX_SIZE]);
63
65
  return prefix.data[position];
@@ -71,12 +73,7 @@ public:
71
73
  static void Split(ART &art, reference<Node> &prefix_node, Node &child_node, idx_t position);
72
74
 
73
75
  //! Returns the string representation of the node, or only traverses and verifies the node and its subtree
74
- static string VerifyAndToString(ART &art, Node &node, const bool only_verify);
75
-
76
- //! Serialize this node and all subsequent nodes
77
- static BlockPointer Serialize(ART &art, Node &node, MetadataWriter &writer);
78
- //! Deserialize this node and all subsequent prefix nodes
79
- static void Deserialize(ART &art, Node &node, MetadataReader &reader);
76
+ static string VerifyAndToString(ART &art, const Node &node, const bool only_verify);
80
77
 
81
78
  //! Vacuum the child of the node
82
79
  static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
@@ -88,8 +85,5 @@ private:
88
85
  //! Appends the other_prefix and all its subsequent prefix nodes to this prefix node.
89
86
  //! Also frees all copied/appended nodes
90
87
  void Append(ART &art, Node other_prefix);
91
- //! Get the total count of bytes in the chain of prefixes, with the node reference pointing to first non-prefix node
92
- static idx_t TotalCount(ART &art, reference<Node> &node);
93
88
  };
94
-
95
89
  } // namespace duckdb
@@ -0,0 +1,126 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/index/fixed_size_allocator.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/types/validity_mask.hpp"
12
+ #include "duckdb/common/unordered_set.hpp"
13
+ #include "duckdb/storage/buffer_manager.hpp"
14
+ #include "duckdb/storage/metadata/metadata_manager.hpp"
15
+ #include "duckdb/storage/metadata/metadata_writer.hpp"
16
+ #include "duckdb/execution/index/fixed_size_buffer.hpp"
17
+ #include "duckdb/execution/index/index_pointer.hpp"
18
+ #include "duckdb/common/unordered_map.hpp"
19
+ #include "duckdb/common/constants.hpp"
20
+ #include "duckdb/common/map.hpp"
21
+
22
+ namespace duckdb {
23
+
24
+ //! The FixedSizeAllocator provides pointers to fixed-size memory segments of pre-allocated memory buffers.
25
+ //! The pointers are IndexPointers, and the leftmost byte (metadata) must always be zero.
26
+ //! It is also possible to directly request a C++ pointer to the underlying segment of an index pointer.
27
+ class FixedSizeAllocator {
28
+ public:
29
+ //! We can vacuum 10% or more of the total in-memory footprint
30
+ static constexpr uint8_t VACUUM_THRESHOLD = 10;
31
+
32
+ //! Constants for fast offset calculations in the bitmask
33
+ static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
34
+ static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
35
+
36
+ public:
37
+ FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager);
38
+
39
+ //! Block manager of the database instance
40
+ BlockManager &block_manager;
41
+ //! Buffer manager of the database instance
42
+ BufferManager &buffer_manager;
43
+ //! Metadata manager for (de)serialization
44
+ MetadataManager &metadata_manager;
45
+
46
+ public:
47
+ //! Get a new IndexPointer to a segment, might cause a new buffer allocation
48
+ IndexPointer New();
49
+ //! Free the segment of the IndexPointer
50
+ void Free(const IndexPointer ptr);
51
+ //! Returns a pointer of type T to a segment. If dirty is false, then T should be a const class
52
+ template <class T>
53
+ inline T *Get(const IndexPointer ptr, const bool dirty = true) {
54
+ return (T *)Get(ptr, dirty);
55
+ }
56
+
57
+ //! Resets the allocator, e.g., during 'DELETE FROM table'
58
+ void Reset();
59
+
60
+ //! Returns the in-memory usage in bytes
61
+ inline idx_t GetMemoryUsage() const;
62
+
63
+ //! Returns the upper bound of the available buffer IDs, i.e., upper_bound > max_buffer_id
64
+ idx_t GetUpperBoundBufferId() const;
65
+ //! Merge another FixedSizeAllocator into this allocator. Both must have the same segment size
66
+ void Merge(FixedSizeAllocator &other);
67
+
68
+ //! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
69
+ bool InitializeVacuum();
70
+ //! Finalize a vacuum operation by freeing all vacuumed buffers
71
+ void FinalizeVacuum();
72
+ //! Returns true, if an IndexPointer qualifies for a vacuum operation, and false otherwise
73
+ inline bool NeedsVacuum(const IndexPointer ptr) const {
74
+ if (vacuum_buffers.find(ptr.GetBufferId()) != vacuum_buffers.end()) {
75
+ return true;
76
+ }
77
+ return false;
78
+ }
79
+ //! Vacuums an IndexPointer
80
+ IndexPointer VacuumPointer(const IndexPointer ptr);
81
+
82
+ //! Serializes all in-memory buffers and the metadata
83
+ BlockPointer Serialize(MetadataWriter &writer);
84
+ //! Deserializes all metadata
85
+ void Deserialize(const BlockPointer &block_pointer);
86
+
87
+ private:
88
+ //! Allocation size of one segment in a buffer
89
+ //! We only need this value to calculate bitmask_count, bitmask_offset, and
90
+ //! available_segments_per_buffer
91
+ idx_t segment_size;
92
+
93
+ //! Number of validity_t values in the bitmask
94
+ idx_t bitmask_count;
95
+ //! First starting byte of the payload (segments)
96
+ idx_t bitmask_offset;
97
+ //! Number of possible segment allocations per buffer
98
+ idx_t available_segments_per_buffer;
99
+
100
+ //! Total number of allocated segments in all buffers
101
+ //! We can recalculate this by iterating over all buffers
102
+ idx_t total_segment_count;
103
+
104
+ //! Buffers containing the segments
105
+ unordered_map<idx_t, FixedSizeBuffer> buffers;
106
+ //! Buffers with free space
107
+ unordered_set<idx_t> buffers_with_free_space;
108
+ //! Buffers qualifying for a vacuum (helper field to allow for fast NeedsVacuum checks)
109
+ unordered_set<idx_t> vacuum_buffers;
110
+
111
+ private:
112
+ //! Returns the data_ptr_t to a segment, and sets the dirty flag of the buffer containing that segment
113
+ inline data_ptr_t Get(const IndexPointer ptr, const bool dirty = true) {
114
+ D_ASSERT(ptr.GetOffset() < available_segments_per_buffer);
115
+ D_ASSERT(buffers.find(ptr.GetBufferId()) != buffers.end());
116
+ auto &buffer = buffers.find(ptr.GetBufferId())->second;
117
+ auto buffer_ptr = buffer.Get(dirty);
118
+ return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset;
119
+ }
120
+ //! Returns the first free offset in a bitmask
121
+ uint32_t GetOffset(ValidityMask &mask, const idx_t segment_count);
122
+ //! Returns an available buffer id
123
+ idx_t GetAvailableBufferId() const;
124
+ };
125
+
126
+ } // namespace duckdb
@@ -0,0 +1,79 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/index/fixed_size_buffer.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/typedefs.hpp"
12
+ #include "duckdb/storage/buffer/block_handle.hpp"
13
+ #include "duckdb/storage/buffer/buffer_handle.hpp"
14
+
15
+ namespace duckdb {
16
+
17
+ class FixedSizeAllocator;
18
+ class MetadataWriter;
19
+
20
+ //! A fixed-size buffer holds fixed-size segments of data. It lazily deserializes a buffer, if on-disk and not
21
+ //! yet in memory, and it only serializes dirty and non-written buffers to disk during
22
+ //! serialization.
23
+ class FixedSizeBuffer {
24
+ public:
25
+ //! Constructor for a new in-memory buffer
26
+ explicit FixedSizeBuffer(BlockManager &block_manager);
27
+ //! Constructor for deserializing buffer metadata from disk
28
+ FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id);
29
+
30
+ //! Block manager of the database instance
31
+ BlockManager &block_manager;
32
+
33
+ //! The number of allocated segments
34
+ idx_t segment_count;
35
+
36
+ //! True: the in-memory buffer is no longer consistent with a (possibly existing) copy on disk
37
+ bool dirty;
38
+ //! True: can be vacuumed after the vacuum operation
39
+ bool vacuum;
40
+
41
+ public:
42
+ //! Returns true, if the buffer is in-memory
43
+ inline bool InMemory() const {
44
+ return buffer_handle.IsValid();
45
+ }
46
+ //! Returns true, if the block is on-disk
47
+ inline bool OnDisk() const {
48
+ return (block_handle != nullptr) && (block_handle->BlockId() < MAXIMUM_BLOCK);
49
+ }
50
+ //! Returns the block ID
51
+ inline block_id_t BlockId() const {
52
+ D_ASSERT(OnDisk());
53
+ return block_handle->BlockId();
54
+ }
55
+ //! Returns a pointer to the buffer in memory, and calls Deserialize, if the buffer is not in memory
56
+ inline data_ptr_t Get(const bool dirty_p = true) {
57
+ if (!InMemory()) {
58
+ Pin();
59
+ }
60
+ if (dirty_p) {
61
+ dirty = dirty_p;
62
+ }
63
+ return buffer_handle.Ptr();
64
+ }
65
+ //! Destroys the in-memory buffer and the on-disk block
66
+ void Destroy();
67
+ //! Serializes a buffer (if dirty or not on disk)
68
+ void Serialize();
69
+ //! Pin a buffer (if not in-memory)
70
+ void Pin();
71
+
72
+ private:
73
+ //! The buffer handle of the in-memory buffer
74
+ BufferHandle buffer_handle;
75
+ //! The block handle of the on-disk buffer
76
+ shared_ptr<BlockHandle> block_handle;
77
+ };
78
+
79
+ } // namespace duckdb
@@ -0,0 +1,96 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/index/index_pointer.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/typedefs.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ class IndexPointer {
16
+ public:
17
+ //! Bit-shifting
18
+ static constexpr idx_t SHIFT_OFFSET = 32;
19
+ static constexpr idx_t SHIFT_METADATA = 56;
20
+ //! AND operations
21
+ static constexpr idx_t AND_OFFSET = 0x0000000000FFFFFF;
22
+ static constexpr idx_t AND_BUFFER_ID = 0x00000000FFFFFFFF;
23
+ static constexpr idx_t AND_METADATA = 0xFF00000000000000;
24
+
25
+ public:
26
+ //! Constructs an empty IndexPointer
27
+ IndexPointer() : data(0) {};
28
+ //! Constructs an in-memory IndexPointer with a buffer ID and an offset
29
+ IndexPointer(const uint32_t buffer_id, const uint32_t offset) : data(0) {
30
+ auto shifted_offset = ((idx_t)offset) << SHIFT_OFFSET;
31
+ data += shifted_offset;
32
+ data += buffer_id;
33
+ };
34
+
35
+ public:
36
+ //! Get data (all 64 bits)
37
+ inline idx_t Get() const {
38
+ return data;
39
+ }
40
+ //! Set data (all 64 bits)
41
+ inline void Set(const idx_t data_p) {
42
+ data = data_p;
43
+ }
44
+
45
+ //! Returns false, if the metadata is empty
46
+ inline bool HasMetadata() const {
47
+ return data & AND_METADATA;
48
+ }
49
+ //! Get metadata (zero to 7th bit)
50
+ inline uint8_t GetMetadata() const {
51
+ return data >> SHIFT_METADATA;
52
+ }
53
+ //! Set metadata (zero to 7th bit)
54
+ inline void SetMetadata(const uint8_t metadata) {
55
+ data += (idx_t)metadata << SHIFT_METADATA;
56
+ }
57
+
58
+ //! Get the offset (8th to 23rd bit)
59
+ inline idx_t GetOffset() const {
60
+ auto offset = data >> SHIFT_OFFSET;
61
+ return offset & AND_OFFSET;
62
+ }
63
+ //! Get the buffer ID (24th to 63rd bit)
64
+ inline idx_t GetBufferId() const {
65
+ return data & AND_BUFFER_ID;
66
+ }
67
+
68
+ //! Resets the IndexPointer
69
+ inline void Clear() {
70
+ data = 0;
71
+ }
72
+
73
+ //! Adds an idx_t to a buffer ID, the rightmost 32 bits of data contain the buffer ID
74
+ inline void IncreaseBufferId(const idx_t summand) {
75
+ data += summand;
76
+ }
77
+
78
+ //! Comparison operator
79
+ inline bool operator==(const IndexPointer &ptr) const {
80
+ return data == ptr.data;
81
+ }
82
+
83
+ private:
84
+ //! Data holds all the information contained in an IndexPointer
85
+ //! [0 - 7: metadata,
86
+ //! 8 - 23: offset, 24 - 63: buffer ID]
87
+ //! NOTE: we do not use bit fields because when using bit fields Windows compiles
88
+ //! the IndexPointer class into 16 bytes instead of the intended 8 bytes, doubling the
89
+ //! space requirements
90
+ //! https://learn.microsoft.com/en-us/cpp/cpp/cpp-bit-fields?view=msvc-170
91
+ idx_t data;
92
+ };
93
+
94
+ static_assert(sizeof(IndexPointer) == sizeof(idx_t), "Invalid size for IndexPointer.");
95
+
96
+ } // namespace duckdb
@@ -68,7 +68,7 @@ public:
68
68
  void Signal(idx_t n);
69
69
 
70
70
  //! Yield to other threads
71
- void YieldThread();
71
+ static void YieldThread();
72
72
 
73
73
  //! Set the allocator flush threshold
74
74
  void SetAllocatorFlushTreshold(idx_t threshold);
@@ -35,7 +35,7 @@ struct BlockPointer {
35
35
  block_id_t block_id;
36
36
  uint32_t offset;
37
37
 
38
- bool IsValid() {
38
+ bool IsValid() const {
39
39
  return block_id != INVALID_BLOCK;
40
40
  }
41
41
  };
@@ -54,8 +54,6 @@ public:
54
54
 
55
55
  //! Attached database instance
56
56
  AttachedDatabase &db;
57
- //! Buffer manager of the database instance
58
- BufferManager &buffer_manager;
59
57
 
60
58
  public:
61
59
  //! Initialize a single predicate scan on the index with the given expression and column IDs
@@ -85,6 +83,10 @@ public:
85
83
  //! Performs constraint checking for a chunk of input data
86
84
  virtual void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) = 0;
87
85
 
86
+ //! Deletes all data from the index. The lock obtained from InitializeLock must be held
87
+ virtual void CommitDrop(IndexLock &index_lock) = 0;
88
+ //! Deletes all data from the index
89
+ void CommitDrop();
88
90
  //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
89
91
  virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0;
90
92
  //! Obtains a lock and calls Delete while holding that lock
@@ -125,11 +127,11 @@ public:
125
127
  return (constraint_type == IndexConstraintType::FOREIGN);
126
128
  }
127
129
 
128
- //! Serializes the index and returns the pair of block_id offset positions
130
+ //! Serializes the index to disk
129
131
  virtual BlockPointer Serialize(MetadataWriter &writer);
130
- //! Returns the serialized data pointer to the block and offset of the serialized index
131
- BlockPointer GetSerializedDataPointer() const {
132
- return serialized_data_pointer;
132
+ //! Returns the serialized root block pointer
133
+ BlockPointer GetRootBlockPointer() const {
134
+ return root_block_pointer;
133
135
  }
134
136
 
135
137
  //! Execute the index expressions on an input chunk
@@ -139,8 +141,8 @@ public:
139
141
  protected:
140
142
  //! Lock used for any changes to the index
141
143
  mutex lock;
142
- //! Pointer to serialized index data
143
- BlockPointer serialized_data_pointer;
144
+ //! Pointer to the index on disk
145
+ BlockPointer root_block_pointer;
144
146
 
145
147
  private:
146
148
  //! Bound expressions used during expression execution
@@ -14,6 +14,9 @@ namespace duckdb {
14
14
 
15
15
  class MetadataWriter : public Serializer {
16
16
  public:
17
+ MetadataWriter(const MetadataWriter &) = delete;
18
+ MetadataWriter &operator=(const MetadataWriter &) = delete;
19
+
17
20
  explicit MetadataWriter(MetadataManager &manager);
18
21
  ~MetadataWriter() override;
19
22
 
@@ -69,21 +69,30 @@ BindResult ExpressionBinder::BindExpression(FunctionExpression &function, idx_t
69
69
  }
70
70
 
71
71
  switch (func->type) {
72
- case CatalogType::SCALAR_FUNCTION_ENTRY:
72
+ case CatalogType::SCALAR_FUNCTION_ENTRY: {
73
73
  // scalar function
74
74
 
75
75
  // check for lambda parameters, ignore ->> operator (JSON extension)
76
+ bool try_bind_lambda = false;
76
77
  if (function.function_name != "->>") {
77
78
  for (auto &child : function.children) {
78
79
  if (child->expression_class == ExpressionClass::LAMBDA) {
79
- return BindLambdaFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
80
+ try_bind_lambda = true;
80
81
  }
81
82
  }
82
83
  }
83
84
 
85
+ if (try_bind_lambda) {
86
+ auto result = BindLambdaFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
87
+ if (!result.HasError()) {
88
+ // Lambda bind successful
89
+ return result;
90
+ }
91
+ }
92
+
84
93
  // other scalar function
85
94
  return BindFunction(function, func->Cast<ScalarFunctionCatalogEntry>(), depth);
86
-
95
+ }
87
96
  case CatalogType::MACRO_ENTRY:
88
97
  // macro function
89
98
  return BindMacro(function, func->Cast<ScalarMacroCatalogEntry>(), depth, expr_ptr);
@@ -134,7 +143,7 @@ BindResult ExpressionBinder::BindLambdaFunction(FunctionExpression &function, Sc
134
143
  string error;
135
144
 
136
145
  if (function.children.size() != 2) {
137
- throw BinderException("Invalid function arguments!");
146
+ return BindResult("Invalid function arguments!");
138
147
  }
139
148
  D_ASSERT(function.children[1]->GetExpressionClass() == ExpressionClass::LAMBDA);
140
149
 
@@ -148,7 +157,7 @@ BindResult ExpressionBinder::BindLambdaFunction(FunctionExpression &function, Sc
148
157
  auto &list_child = BoundExpression::GetExpression(*function.children[0]);
149
158
  if (list_child->return_type.id() != LogicalTypeId::LIST && list_child->return_type.id() != LogicalTypeId::SQLNULL &&
150
159
  list_child->return_type.id() != LogicalTypeId::UNKNOWN) {
151
- throw BinderException(" Invalid LIST argument to " + function.function_name + "!");
160
+ return BindResult(" Invalid LIST argument to " + function.function_name + "!");
152
161
  }
153
162
 
154
163
  LogicalType list_child_type = list_child->return_type.id();
@@ -69,9 +69,8 @@ void SingleFileTableDataWriter::FinalizeTable(TableStatistics &&global_stats, Da
69
69
 
70
70
  // Write-off to metadata block ids and offsets of indexes
71
71
  meta_data_writer.Write<idx_t>(index_pointers.size());
72
- for (auto &block_info : index_pointers) {
73
- meta_data_writer.Write<block_id_t>(block_info.block_id);
74
- meta_data_writer.Write<uint32_t>(block_info.offset);
72
+ for (const auto &index_pointer : index_pointers) {
73
+ meta_data_writer.Write<BlockPointer>(index_pointer);
75
74
  }
76
75
  }
77
76
 
@@ -262,7 +262,8 @@ void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
262
262
  void CheckpointReader::ReadSchema(ClientContext &context, MetadataReader &reader) {
263
263
  // read the schema and create it in the catalog
264
264
  auto info = CatalogEntry::Deserialize(reader);
265
- // we set create conflict to ignore to ignore the failure of recreating the main schema
265
+
266
+ // we set create conflict to IGNORE_ON_CONFLICT, so that we can ignore a failure when recreating the main schema
266
267
  info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
267
268
  catalog.CreateSchema(context, info->Cast<CreateSchemaInfo>());
268
269
 
@@ -336,14 +337,12 @@ void CheckpointReader::ReadSequence(ClientContext &context, MetadataReader &read
336
337
  // Indexes
337
338
  //===--------------------------------------------------------------------===//
338
339
  void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
339
- // The index data should already have been written as part of WriteTableData.
340
- // Here, we need only serialize the pointer to that data.
341
- auto root_offset = index_catalog.index->GetSerializedDataPointer();
340
+ // we write the index data in WriteTableData
341
+ // here, we only write the root pointer
342
+ const auto root_block_pointer = index_catalog.index->GetRootBlockPointer();
342
343
  auto &metadata_writer = GetMetadataWriter();
343
344
  index_catalog.Serialize(metadata_writer);
344
- // Serialize the Block id and offset of root node
345
- metadata_writer.Write(root_offset.block_id);
346
- metadata_writer.Write(root_offset.offset);
345
+ metadata_writer.Write(root_block_pointer);
347
346
  }
348
347
 
349
348
  void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader) {
@@ -358,10 +357,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
358
357
  auto &index_catalog = schema_catalog.CreateIndex(context, index_info, table_catalog)->Cast<DuckIndexEntry>();
359
358
  index_catalog.info = table_catalog.GetStorage().info;
360
359
 
361
- // we deserialize the index lazily, i.e., we do not need to load any node information
362
- // except the root block id and offset
363
- auto root_block_id = reader.Read<block_id_t>();
364
- auto root_offset = reader.Read<uint32_t>();
360
+ // we deserialize the index lazily, i.e., we only load the root block pointer
361
+ const auto index_block_pointer = reader.Read<BlockPointer>();
365
362
 
366
363
  // obtain the expressions of the ART from the index metadata
367
364
  vector<unique_ptr<Expression>> unbound_expressions;
@@ -401,9 +398,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
401
398
  switch (index_info.index_type) {
402
399
  case IndexType::ART: {
403
400
  auto &storage = table_catalog.GetStorage();
404
- auto art =
405
- make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
406
- index_info.constraint_type, storage.db, nullptr, BlockPointer(root_block_id, root_offset));
401
+ auto art = make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
402
+ index_info.constraint_type, storage.db, nullptr, index_block_pointer);
407
403
  index_catalog.index = art.get();
408
404
  storage.info->indexes.AddIndex(std::move(art));
409
405
  break;
@@ -450,7 +446,7 @@ void CheckpointReader::ReadTableMacro(ClientContext &context, MetadataReader &re
450
446
  // Table Metadata
451
447
  //===--------------------------------------------------------------------===//
452
448
  void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
453
- // write the table meta data
449
+ // write the table metadata
454
450
  table.Serialize(GetMetadataWriter());
455
451
  // now we need to write the table data.
456
452
  if (auto writer = GetTableDataWriter(table)) {
@@ -483,12 +479,11 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetadataReader &rea
483
479
  data_reader.ReadTableData();
484
480
  bound_info.data->total_rows = reader.Read<idx_t>();
485
481
 
486
- // Get any indexes block info
487
- idx_t num_indexes = reader.Read<idx_t>();
488
- for (idx_t i = 0; i < num_indexes; i++) {
489
- auto idx_block_id = reader.Read<block_id_t>();
490
- auto idx_offset = reader.Read<uint32_t>();
491
- bound_info.indexes.emplace_back(idx_block_id, idx_offset);
482
+ // get the root block pointers of each index
483
+ idx_t index_count = reader.Read<idx_t>();
484
+ for (idx_t i = 0; i < index_count; i++) {
485
+ const auto index_pointer = reader.Read<BlockPointer>();
486
+ bound_info.indexes.emplace_back(index_pointer);
492
487
  }
493
488
  }
494
489
 
@@ -429,13 +429,13 @@ void DataTable::VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk,
429
429
  data_table.info->indexes.VerifyForeignKey(*dst_keys_ptr, dst_chunk, regular_conflicts);
430
430
  regular_conflicts.Finalize();
431
431
  auto &regular_matches = regular_conflicts.Conflicts();
432
- // check whether or not the chunk can be inserted or deleted into the referenced table' transaction local storage
433
- auto &local_storage = LocalStorage::Get(context, db);
434
432
 
433
+ // check if we can insert the chunk into the reference table's local storage
434
+ auto &local_storage = LocalStorage::Get(context, db);
435
435
  bool error = IsForeignKeyConstraintError(is_append, count, regular_matches);
436
436
  bool transaction_error = false;
437
-
438
437
  bool transaction_check = local_storage.Find(data_table);
438
+
439
439
  if (transaction_check) {
440
440
  auto &transact_index = local_storage.GetIndexes(data_table);
441
441
  transact_index.VerifyForeignKey(*dst_keys_ptr, dst_chunk, transaction_conflicts);
@@ -12,7 +12,7 @@ Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_mana
12
12
  IndexConstraintType constraint_type_p)
13
13
 
14
14
  : type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
15
- db(db), buffer_manager(BufferManager::GetBufferManager(db)) {
15
+ db(db) {
16
16
 
17
17
  for (auto &expr : unbound_expressions) {
18
18
  types.push_back(expr->return_type.InternalType());
@@ -39,6 +39,12 @@ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
39
39
  return Append(state, entries, row_identifiers);
40
40
  }
41
41
 
42
+ void Index::CommitDrop() {
43
+ IndexLock index_lock;
44
+ InitializeLock(index_lock);
45
+ CommitDrop(index_lock);
46
+ }
47
+
42
48
  void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
43
49
  IndexLock state;
44
50
  InitializeLock(state);