duckdb 0.8.2-dev4203.0 → 0.8.2-dev4376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +5 -0
  4. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  5. package/src/duckdb/src/common/sort/partition_state.cpp +107 -29
  6. package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
  7. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  8. package/src/duckdb/src/execution/index/art/leaf.cpp +13 -10
  9. package/src/duckdb/src/execution/index/art/node48.cpp +0 -2
  10. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +38 -73
  11. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +245 -27
  12. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +2 -3
  13. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +35 -20
  14. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
  15. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  16. package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
  19. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
  20. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +14 -4
  21. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  22. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +2 -0
  23. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +1 -7
  24. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +38 -8
  25. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
  26. package/src/duckdb/src/include/duckdb/main/relation.hpp +9 -2
  27. package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
  29. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
  30. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  31. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
  32. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
  33. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +35 -19
  34. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
  35. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
  36. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +4 -19
  37. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
  39. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
  40. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
  42. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
  43. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
  44. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
  46. package/src/duckdb/src/main/relation.cpp +15 -2
  47. package/src/duckdb/src/main/settings/settings.cpp +5 -10
  48. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
  49. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
  50. package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
  51. package/src/duckdb/src/storage/data_table.cpp +1 -1
  52. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  53. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
  54. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
  55. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
  56. package/src/duckdb/src/storage/partial_block_manager.cpp +42 -15
  57. package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
  58. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  59. package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
  60. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +26 -32
  61. package/src/duckdb/src/storage/table/column_data.cpp +14 -9
  62. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
  63. package/src/duckdb/src/storage/table/row_group.cpp +102 -192
  64. package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
  65. package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
  66. package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
  67. package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
  68. package/src/duckdb/src/transaction/commit_state.cpp +5 -4
  69. package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
  70. package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
  71. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
  72. package/src/duckdb/ub_src_storage_table.cpp +2 -0
@@ -8,36 +8,61 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/storage/partial_block_manager.hpp"
11
12
  #include "duckdb/common/typedefs.hpp"
12
13
  #include "duckdb/storage/buffer/block_handle.hpp"
13
14
  #include "duckdb/storage/buffer/buffer_handle.hpp"
15
+ #include "duckdb/storage/block_manager.hpp"
14
16
 
15
17
  namespace duckdb {
16
18
 
17
19
  class FixedSizeAllocator;
18
20
  class MetadataWriter;
19
21
 
22
+ struct PartialBlockForIndex : public PartialBlock {
23
+ public:
24
+ PartialBlockForIndex(PartialBlockState state, BlockManager &block_manager,
25
+ const shared_ptr<BlockHandle> &block_handle);
26
+ ~PartialBlockForIndex() override {};
27
+
28
+ public:
29
+ void Flush(const idx_t free_space_left) override;
30
+ void Clear() override;
31
+ void Merge(PartialBlock &other, idx_t offset, idx_t other_size) override;
32
+ };
33
+
20
34
  //! A fixed-size buffer holds fixed-size segments of data. It lazily deserializes a buffer, if on-disk and not
21
35
  //! yet in memory, and it only serializes dirty and non-written buffers to disk during
22
36
  //! serialization.
23
37
  class FixedSizeBuffer {
38
+ public:
39
+ //! Constants for fast offset calculations in the bitmask
40
+ static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
41
+ static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
42
+
24
43
  public:
25
44
  //! Constructor for a new in-memory buffer
26
45
  explicit FixedSizeBuffer(BlockManager &block_manager);
27
46
  //! Constructor for deserializing buffer metadata from disk
28
- FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id);
47
+ FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size,
48
+ const BlockPointer &block_pointer);
29
49
 
30
50
  //! Block manager of the database instance
31
51
  BlockManager &block_manager;
32
52
 
33
53
  //! The number of allocated segments
34
54
  idx_t segment_count;
55
+ //! The size of allocated memory in this buffer (necessary for copying while pinning)
56
+ idx_t allocation_size;
35
57
 
36
58
  //! True: the in-memory buffer is no longer consistent with a (possibly existing) copy on disk
37
59
  bool dirty;
38
60
  //! True: can be vacuumed after the vacuum operation
39
61
  bool vacuum;
40
62
 
63
+ //! Partial block id and offset
64
+ BlockPointer block_pointer;
65
+
41
66
  public:
42
67
  //! Returns true, if the buffer is in-memory
43
68
  inline bool InMemory() const {
@@ -45,12 +70,7 @@ public:
45
70
  }
46
71
  //! Returns true, if the block is on-disk
47
72
  inline bool OnDisk() const {
48
- return (block_handle != nullptr) && (block_handle->BlockId() < MAXIMUM_BLOCK);
49
- }
50
- //! Returns the block ID
51
- inline block_id_t BlockId() const {
52
- D_ASSERT(OnDisk());
53
- return block_handle->BlockId();
73
+ return block_pointer.IsValid();
54
74
  }
55
75
  //! Returns a pointer to the buffer in memory, and calls Deserialize, if the buffer is not in memory
56
76
  inline data_ptr_t Get(const bool dirty_p = true) {
@@ -65,15 +85,25 @@ public:
65
85
  //! Destroys the in-memory buffer and the on-disk block
66
86
  void Destroy();
67
87
  //! Serializes a buffer (if dirty or not on disk)
68
- void Serialize();
88
+ void Serialize(PartialBlockManager &partial_block_manager, const idx_t available_segments, const idx_t segment_size,
89
+ const idx_t bitmask_offset);
69
90
  //! Pin a buffer (if not in-memory)
70
91
  void Pin();
92
+ //! Returns the first free offset in a bitmask
93
+ uint32_t GetOffset(const idx_t bitmask_count);
71
94
 
72
95
  private:
73
96
  //! The buffer handle of the in-memory buffer
74
97
  BufferHandle buffer_handle;
75
98
  //! The block handle of the on-disk buffer
76
99
  shared_ptr<BlockHandle> block_handle;
100
+
101
+ private:
102
+ //! Returns the maximum non-free offset in a bitmask
103
+ uint32_t GetMaxOffset(const idx_t available_segments_per_buffer);
104
+ //! Sets all uninitialized regions of a buffer in the respective partial block allocation
105
+ void SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size, const idx_t offset,
106
+ const idx_t bitmask_offset);
77
107
  };
78
108
 
79
109
  } // namespace duckdb
@@ -67,6 +67,9 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
67
67
 
68
68
  unique_ptr<ArrowArrayStreamWrapper> stream;
69
69
  shared_ptr<ArrowArrayWrapper> chunk;
70
+ // This vector hold the Arrow Vectors owned by DuckDB to allow for zero-copy
71
+ // Note that only DuckDB can release these vectors
72
+ unordered_map<idx_t, shared_ptr<ArrowArrayWrapper>> arrow_owned_data;
70
73
  idx_t chunk_offset = 0;
71
74
  idx_t batch_index = 0;
72
75
  vector<column_t> column_ids;
@@ -19,8 +19,11 @@
19
19
  #include "duckdb/main/client_context.hpp"
20
20
  #include "duckdb/main/external_dependencies.hpp"
21
21
  #include "duckdb/parser/statement/explain_statement.hpp"
22
-
23
- #include <memory>
22
+ #include "duckdb/parser/parsed_expression.hpp"
23
+ #include "duckdb/parser/result_modifier.hpp"
24
+ #include "duckdb/common/unique_ptr.hpp"
25
+ #include "duckdb/common/vector.hpp"
26
+ #include "duckdb/common/helper.hpp"
24
27
 
25
28
  namespace duckdb {
26
29
  struct BoundStatement;
@@ -81,9 +84,12 @@ public:
81
84
  DUCKDB_API shared_ptr<Relation> Project(const string &select_list, const vector<string> &aliases);
82
85
  DUCKDB_API shared_ptr<Relation> Project(const vector<string> &expressions);
83
86
  DUCKDB_API shared_ptr<Relation> Project(const vector<string> &expressions, const vector<string> &aliases);
87
+ DUCKDB_API shared_ptr<Relation> Project(vector<unique_ptr<ParsedExpression>> expressions,
88
+ const vector<string> &aliases);
84
89
 
85
90
  // FILTER
86
91
  DUCKDB_API shared_ptr<Relation> Filter(const string &expression);
92
+ DUCKDB_API shared_ptr<Relation> Filter(unique_ptr<ParsedExpression> expression);
87
93
  DUCKDB_API shared_ptr<Relation> Filter(const vector<string> &expressions);
88
94
 
89
95
  // LIMIT
@@ -92,6 +98,7 @@ public:
92
98
  // ORDER
93
99
  DUCKDB_API shared_ptr<Relation> Order(const string &expression);
94
100
  DUCKDB_API shared_ptr<Relation> Order(const vector<string> &expressions);
101
+ DUCKDB_API shared_ptr<Relation> Order(vector<OrderByNode> expressions);
95
102
 
96
103
  // JOIN operation
97
104
  DUCKDB_API shared_ptr<Relation> Join(const shared_ptr<Relation> &other, const string &condition,
@@ -52,11 +52,11 @@ struct MetaBlockPointer {
52
52
  idx_t block_pointer;
53
53
  uint32_t offset;
54
54
 
55
- bool IsValid() {
55
+ bool IsValid() const {
56
56
  return block_pointer != DConstants::INVALID_INDEX;
57
57
  }
58
- block_id_t GetBlockId();
59
- uint32_t GetBlockIndex();
58
+ block_id_t GetBlockId() const;
59
+ uint32_t GetBlockIndex() const;
60
60
 
61
61
  void Serialize(Serializer &serializer) const;
62
62
  static MetaBlockPointer Deserialize(Deserializer &source);
@@ -12,14 +12,7 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- enum class BitpackingMode : uint8_t {
16
- AUTO,
17
-
18
- CONSTANT,
19
- CONSTANT_DELTA,
20
- DELTA_FOR,
21
- FOR
22
- };
15
+ enum class BitpackingMode : uint8_t { INVALID, AUTO, CONSTANT, CONSTANT_DELTA, DELTA_FOR, FOR };
23
16
 
24
17
  BitpackingMode BitpackingModeFromString(const string &str);
25
18
  string BitpackingModeToString(const BitpackingMode &mode);
@@ -40,8 +40,8 @@ struct RowGroupPointer {
40
40
  uint64_t tuple_count;
41
41
  //! The data pointers of the column segments stored in the row group
42
42
  vector<MetaBlockPointer> data_pointers;
43
- //! The versions information of the row group (if any)
44
- shared_ptr<VersionNode> versions;
43
+ //! Data pointers to the delete information of the row group (if any)
44
+ vector<MetaBlockPointer> deletes_pointers;
45
45
  };
46
46
 
47
47
  } // namespace duckdb
@@ -64,6 +64,7 @@ public:
64
64
  void Flush();
65
65
 
66
66
  void MarkBlocksAsModified();
67
+ void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
67
68
 
68
69
  idx_t BlockCount();
69
70
 
@@ -82,6 +83,7 @@ protected:
82
83
 
83
84
  void AddBlock(MetadataBlock new_block, bool if_exists = false);
84
85
  void AddAndRegisterBlock(MetadataBlock block);
86
+ void ConvertToTransient(MetadataBlock &block);
85
87
  };
86
88
 
87
89
  } // namespace duckdb
@@ -18,6 +18,7 @@ enum class BlockReaderType { EXISTING_BLOCKS, REGISTER_BLOCKS };
18
18
  class MetadataReader : public ReadStream {
19
19
  public:
20
20
  MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
21
+ optional_ptr<vector<MetaBlockPointer>> read_pointers = nullptr,
21
22
  BlockReaderType type = BlockReaderType::EXISTING_BLOCKS);
22
23
  MetadataReader(MetadataManager &manager, BlockPointer pointer);
23
24
  ~MetadataReader() override;
@@ -46,6 +47,7 @@ private:
46
47
  MetadataHandle block;
47
48
  MetadataPointer next_pointer;
48
49
  bool has_next_block;
50
+ optional_ptr<vector<MetaBlockPointer>> read_pointers;
49
51
  idx_t index;
50
52
  idx_t offset;
51
53
  idx_t next_offset;
@@ -15,10 +15,10 @@ namespace duckdb {
15
15
 
16
16
  class MetadataWriter : public WriteStream {
17
17
  public:
18
+ explicit MetadataWriter(MetadataManager &manager,
19
+ optional_ptr<vector<MetaBlockPointer>> written_pointers = nullptr);
18
20
  MetadataWriter(const MetadataWriter &) = delete;
19
21
  MetadataWriter &operator=(const MetadataWriter &) = delete;
20
-
21
- explicit MetadataWriter(MetadataManager &manager);
22
22
  ~MetadataWriter() override;
23
23
 
24
24
  public:
@@ -27,6 +27,9 @@ public:
27
27
 
28
28
  BlockPointer GetBlockPointer();
29
29
  MetaBlockPointer GetMetaBlockPointer();
30
+ MetadataManager &GetManager() {
31
+ return manager;
32
+ }
30
33
 
31
34
  protected:
32
35
  virtual MetadataHandle NextHandle();
@@ -41,6 +44,7 @@ private:
41
44
  MetadataManager &manager;
42
45
  MetadataHandle block;
43
46
  MetadataPointer current_pointer;
47
+ optional_ptr<vector<MetaBlockPointer>> written_pointers;
44
48
  idx_t capacity;
45
49
  idx_t offset;
46
50
  };
@@ -25,30 +25,46 @@ class TableCatalogEntry;
25
25
  class ViewCatalogEntry;
26
26
  class TypeCatalogEntry;
27
27
 
28
+ //! Regions that require zero-initialization to avoid leaking memory
29
+ struct UninitializedRegion {
30
+ idx_t start;
31
+ idx_t end;
32
+ };
33
+
34
+ //! The current state of a partial block
28
35
  struct PartialBlockState {
36
+ //! The block id of the partial block
29
37
  block_id_t block_id;
30
- //! How big is the block we're writing to. (Total bytes to assign).
38
+ //! The total bytes that we can assign to this block
31
39
  uint32_t block_size;
32
- //! How many bytes of the allocation are used. (offset_in_block of next allocation)
33
- uint32_t offset_in_block;
34
- //! How many times has the block been used?
40
+ //! Next allocation offset, and also the current allocation size
41
+ uint32_t offset;
42
+ //! The number of times that this block has been used for partial allocations
35
43
  uint32_t block_use_count;
36
44
  };
37
45
 
38
46
  struct PartialBlock {
39
- explicit PartialBlock(PartialBlockState state) : state(std::move(state)) {
40
- }
47
+ PartialBlock(PartialBlockState state, BlockManager &block_manager, const shared_ptr<BlockHandle> &block_handle);
41
48
  virtual ~PartialBlock() {
42
49
  }
43
50
 
51
+ //! The current state of a partial block
44
52
  PartialBlockState state;
53
+ //! All uninitialized regions on this block, we need to zero-initialize them when flushing
54
+ vector<UninitializedRegion> uninitialized_regions;
55
+ //! The block manager of the partial block manager
56
+ BlockManager &block_manager;
57
+ //! The block handle of the underlying block that this partial block writes to
58
+ shared_ptr<BlockHandle> block_handle;
45
59
 
46
60
  public:
47
- virtual void AddUninitializedRegion(idx_t start, idx_t end) = 0;
48
- virtual void Flush(idx_t free_space_left) = 0;
49
- virtual void Clear() {
50
- }
51
- virtual void Merge(PartialBlock &other, idx_t offset, idx_t other_size);
61
+ //! Add regions that need zero-initialization to avoid leaking memory
62
+ void AddUninitializedRegion(const idx_t start, const idx_t end);
63
+ //! Flush the block to disk and zero-initialize any free space and uninitialized regions
64
+ virtual void Flush(const idx_t free_space_left) = 0;
65
+ void FlushInternal(const idx_t free_space_left);
66
+ virtual void Merge(PartialBlock &other, idx_t offset, idx_t other_size) = 0;
67
+ virtual void Clear() = 0;
52
68
 
53
69
  public:
54
70
  template <class TARGET>
@@ -59,13 +75,13 @@ public:
59
75
  };
60
76
 
61
77
  struct PartialBlockAllocation {
62
- // BlockManager owning the block_id
78
+ //! The BlockManager owning the block_id
63
79
  BlockManager *block_manager {nullptr};
64
- //! How many bytes assigned to the caller?
80
+ //! The number of assigned bytes to the caller
65
81
  uint32_t allocation_size;
66
- //! State of assigned block.
82
+ //! The current state of the partial block
67
83
  PartialBlockState state;
68
- //! Arbitrary state related to partial block storage.
84
+ //! Arbitrary state related to the partial block storage
69
85
  unique_ptr<PartialBlock> partial_block;
70
86
  };
71
87
 
@@ -76,12 +92,12 @@ enum class CheckpointType { FULL_CHECKPOINT, APPEND_TO_TABLE };
76
92
  //! In any case, they must share a block manager.
77
93
  class PartialBlockManager {
78
94
  public:
79
- // 20% free / 80% utilization
95
+ //! 20% free / 80% utilization
80
96
  static constexpr const idx_t DEFAULT_MAX_PARTIAL_BLOCK_SIZE = Storage::BLOCK_SIZE / 5 * 4;
81
- // Max number of shared references to a block. No effective limit by default.
97
+ //! Max number of shared references to a block. No effective limit by default.
82
98
  static constexpr const idx_t DEFAULT_MAX_USE_COUNT = 1u << 20;
83
- // No point letting map size grow unbounded. We'll drop blocks with the
84
- // least free space first.
99
+ //! No point letting map size grow unbounded. We'll drop blocks with the
100
+ //! least free space first.
85
101
  static constexpr const idx_t MAX_BLOCK_MAP_SIZE = 1u << 31;
86
102
 
87
103
  public:
@@ -23,6 +23,25 @@ struct FileHandle;
23
23
  #error Row group size should be cleanly divisible by vector size
24
24
  #endif
25
25
 
26
+ struct Storage {
27
+ //! The size of a hard disk sector, only really needed for Direct IO
28
+ constexpr static int SECTOR_SIZE = 4096;
29
+ //! Block header size for blocks written to the storage
30
+ constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
31
+ // Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
32
+ // default to 256KB. (1 << 18)
33
+ constexpr static int BLOCK_ALLOC_SIZE = 262144;
34
+ //! The actual memory space that is available within the blocks
35
+ constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
36
+ //! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
37
+ //! to the page size, which is 4KB. (1 << 12)
38
+ constexpr static int FILE_HEADER_SIZE = 4096;
39
+ //! The number of rows per row group (must be a multiple of the vector size)
40
+ constexpr static const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
41
+ //! The number of vectors per row group
42
+ constexpr static const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
43
+ };
44
+
26
45
  //! The version number of the database storage format
27
46
  extern const uint64_t VERSION_NUMBER;
28
47
 
@@ -46,8 +46,10 @@ public:
46
46
  virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0;
47
47
  virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0;
48
48
 
49
- virtual void Serialize(Serializer &serializer) const = 0;
50
- static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
49
+ virtual bool HasDeletes() const = 0;
50
+
51
+ virtual void Write(WriteStream &writer) const;
52
+ static unique_ptr<ChunkInfo> Read(ReadStream &reader);
51
53
 
52
54
  public:
53
55
  template <class TARGET>
@@ -74,8 +76,8 @@ public:
74
76
  public:
75
77
  explicit ChunkConstantInfo(idx_t start);
76
78
 
77
- atomic<transaction_t> insert_id;
78
- atomic<transaction_t> delete_id;
79
+ transaction_t insert_id;
80
+ transaction_t delete_id;
79
81
 
80
82
  public:
81
83
  idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override;
@@ -85,8 +87,10 @@ public:
85
87
  void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
86
88
  idx_t GetCommittedDeletedCount(idx_t max_count) override;
87
89
 
88
- void Serialize(Serializer &serializer) const override;
89
- static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
90
+ bool HasDeletes() const override;
91
+
92
+ void Write(WriteStream &writer) const override;
93
+ static unique_ptr<ChunkInfo> Read(ReadStream &reader);
90
94
 
91
95
  private:
92
96
  template <class OP>
@@ -102,13 +106,13 @@ public:
102
106
  explicit ChunkVectorInfo(idx_t start);
103
107
 
104
108
  //! The transaction ids of the transactions that inserted the tuples (if any)
105
- atomic<transaction_t> inserted[STANDARD_VECTOR_SIZE];
106
- atomic<transaction_t> insert_id;
107
- atomic<bool> same_inserted_id;
109
+ transaction_t inserted[STANDARD_VECTOR_SIZE];
110
+ transaction_t insert_id;
111
+ bool same_inserted_id;
108
112
 
109
113
  //! The transaction ids of the transactions that deleted the tuples (if any)
110
- atomic<transaction_t> deleted[STANDARD_VECTOR_SIZE];
111
- atomic<bool> any_deleted;
114
+ transaction_t deleted[STANDARD_VECTOR_SIZE];
115
+ bool any_deleted;
112
116
 
113
117
  public:
114
118
  idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
@@ -130,8 +134,10 @@ public:
130
134
  idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count);
131
135
  void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count);
132
136
 
133
- void Serialize(Serializer &serializer) const override;
134
- static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
137
+ bool HasDeletes() const override;
138
+
139
+ void Write(WriteStream &writer) const override;
140
+ static unique_ptr<ChunkInfo> Read(ReadStream &reader);
135
141
 
136
142
  private:
137
143
  template <class OP>
@@ -68,37 +68,22 @@ struct PartialBlockForCheckpoint : public PartialBlock {
68
68
  };
69
69
 
70
70
  public:
71
- PartialBlockForCheckpoint(ColumnData &data, ColumnSegment &segment, BlockManager &block_manager,
72
- PartialBlockState state);
71
+ PartialBlockForCheckpoint(ColumnData &data, ColumnSegment &segment, PartialBlockState state,
72
+ BlockManager &block_manager);
73
73
  ~PartialBlockForCheckpoint() override;
74
74
 
75
75
  // We will copy all segment data into the memory of the shared block.
76
76
  // Once the block is full (or checkpoint is complete) we'll invoke Flush().
77
77
  // This will cause the block to get written to storage (via BlockManger::ConvertToPersistent),
78
78
  // and all segments to have their references updated (via ColumnSegment::ConvertToPersistent)
79
- BlockManager &block_manager;
80
- shared_ptr<BlockHandle> block;
81
79
  vector<PartialColumnSegment> segments;
82
80
 
83
- private:
84
- struct UninitializedRegion {
85
- idx_t start;
86
- idx_t end;
87
- };
88
- vector<UninitializedRegion> uninitialized_regions;
89
-
90
81
  public:
91
82
  bool IsFlushed();
92
-
93
- void AddUninitializedRegion(idx_t start, idx_t end) override;
94
-
95
- void Flush(idx_t free_space_left) override;
96
-
97
- void Clear() override;
98
-
83
+ void Flush(const idx_t free_space_left) override;
99
84
  void Merge(PartialBlock &other, idx_t offset, idx_t other_size) override;
100
-
101
85
  void AddSegmentToTail(ColumnData &data, ColumnSegment &segment, uint32_t offset_in_block);
86
+ void Clear() override;
102
87
  };
103
88
 
104
89
  } // namespace duckdb
@@ -151,7 +151,7 @@ protected:
151
151
  void AppendTransientSegment(SegmentLock &l, idx_t start_row);
152
152
 
153
153
  //! Scans a base vector from the column
154
- idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining);
154
+ idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining, bool has_updates);
155
155
  //! Scans a vector from the column merged with any potential updates
156
156
  //! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found
157
157
  template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
@@ -36,11 +36,12 @@ class Vector;
36
36
  struct ColumnCheckpointState;
37
37
  struct RowGroupPointer;
38
38
  struct TransactionData;
39
- struct VersionNode;
40
39
  class CollectionScanState;
41
40
  class TableFilterSet;
42
41
  struct ColumnFetchState;
43
42
  struct RowGroupAppendState;
43
+ class MetadataManager;
44
+ class RowVersionManager;
44
45
 
45
46
  struct RowGroupWriteData {
46
47
  vector<unique_ptr<ColumnCheckpointState>> states;
@@ -50,11 +51,6 @@ struct RowGroupWriteData {
50
51
  class RowGroup : public SegmentBase<RowGroup> {
51
52
  public:
52
53
  friend class ColumnData;
53
- friend class VersionDeleteState;
54
-
55
- public:
56
- static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
57
- static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
58
54
 
59
55
  public:
60
56
  RowGroup(RowGroupCollection &collection, idx_t start, idx_t count);
@@ -65,7 +61,7 @@ private:
65
61
  //! The RowGroupCollection this row-group is a part of
66
62
  reference<RowGroupCollection> collection;
67
63
  //! The version info of the row_group (inserted and deleted tuple info)
68
- shared_ptr<VersionNode> version_info;
64
+ shared_ptr<RowVersionManager> version_info;
69
65
  //! The column data of the row_group
70
66
  vector<shared_ptr<ColumnData>> columns;
71
67
 
@@ -145,12 +141,17 @@ public:
145
141
 
146
142
  void NextVector(CollectionScanState &state);
147
143
 
144
+ idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
145
+ RowVersionManager &GetOrCreateVersionInfo();
146
+
148
147
  // Serialization
149
148
  static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
150
149
  static RowGroupPointer Deserialize(Deserializer &deserializer);
151
150
 
152
151
  private:
153
- ChunkInfo *GetChunkInfo(idx_t vector_idx);
152
+ shared_ptr<RowVersionManager> &GetVersionInfo();
153
+ shared_ptr<RowVersionManager> &GetOrCreateVersionInfoPtr();
154
+
154
155
  ColumnData &GetColumn(storage_t c);
155
156
  idx_t GetColumnCount() const;
156
157
  vector<shared_ptr<ColumnData>> &GetColumns();
@@ -158,18 +159,17 @@ private:
158
159
  template <TableScanType TYPE>
159
160
  void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
160
161
 
162
+ vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
163
+
164
+ bool HasUnloadedDeletes() const;
165
+
161
166
  private:
162
167
  mutex row_group_lock;
163
168
  mutex stats_lock;
164
169
  vector<MetaBlockPointer> column_pointers;
165
170
  unique_ptr<atomic<bool>[]> is_loaded;
166
- };
167
-
168
- struct VersionNode {
169
- unique_ptr<ChunkInfo> info[RowGroup::ROW_GROUP_VECTOR_COUNT];
170
-
171
- void SetStart(idx_t start);
172
- idx_t GetCommittedDeletedCount(idx_t count);
171
+ vector<MetaBlockPointer> deletes_pointers;
172
+ atomic<bool> deletes_is_loaded;
173
173
  };
174
174
 
175
175
  } // namespace duckdb
@@ -0,0 +1,59 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/storage/table/row_version_manager.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/vector_size.hpp"
12
+ #include "duckdb/storage/table/chunk_info.hpp"
13
+ #include "duckdb/storage/storage_info.hpp"
14
+ #include "duckdb/common/mutex.hpp"
15
+
16
+ namespace duckdb {
17
+
18
+ class MetadataManager;
19
+ struct MetaBlockPointer;
20
+
21
+ class RowVersionManager {
22
+ public:
23
+ explicit RowVersionManager(idx_t start);
24
+
25
+ idx_t GetStart() {
26
+ return start;
27
+ }
28
+ void SetStart(idx_t start);
29
+ idx_t GetCommittedDeletedCount(idx_t count);
30
+
31
+ idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count);
32
+ idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx,
33
+ SelectionVector &sel_vector, idx_t max_count);
34
+ bool Fetch(TransactionData transaction, idx_t row);
35
+
36
+ void AppendVersionInfo(TransactionData transaction, idx_t count, idx_t row_group_start, idx_t row_group_end);
37
+ void CommitAppend(transaction_t commit_id, idx_t row_group_start, idx_t count);
38
+ void RevertAppend(idx_t start_row);
39
+
40
+ idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
41
+ void CommitDelete(idx_t vector_idx, transaction_t commit_id, row_t rows[], idx_t count);
42
+
43
+ vector<MetaBlockPointer> Checkpoint(MetadataManager &manager);
44
+ static shared_ptr<RowVersionManager> Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager,
45
+ idx_t start);
46
+
47
+ private:
48
+ mutex version_lock;
49
+ idx_t start;
50
+ unique_ptr<ChunkInfo> vector_info[Storage::ROW_GROUP_VECTOR_COUNT];
51
+ bool has_changes;
52
+ vector<MetaBlockPointer> storage_pointers;
53
+
54
+ private:
55
+ optional_ptr<ChunkInfo> GetChunkInfo(idx_t vector_idx);
56
+ ChunkVectorInfo &GetVectorInfo(idx_t vector_idx);
57
+ };
58
+
59
+ } // namespace duckdb
@@ -101,7 +101,7 @@ struct UpdateNodeData {
101
101
  };
102
102
 
103
103
  struct UpdateNode {
104
- unique_ptr<UpdateNodeData> info[RowGroup::ROW_GROUP_VECTOR_COUNT];
104
+ unique_ptr<UpdateNodeData> info[Storage::ROW_GROUP_VECTOR_COUNT];
105
105
  };
106
106
 
107
107
  } // namespace duckdb