duckdb 0.8.2-dev4314.0 → 0.8.2-dev4376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +5 -0
  4. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  5. package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
  6. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
  7. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  8. package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
  9. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  10. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
  11. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
  12. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  13. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
  14. package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
  15. package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
  16. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
  17. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  18. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
  19. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
  20. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
  21. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
  22. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  23. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
  24. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
  25. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  26. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
  27. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
  28. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
  29. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  30. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
  31. package/src/duckdb/src/main/settings/settings.cpp +5 -10
  32. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
  33. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
  34. package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
  35. package/src/duckdb/src/storage/data_table.cpp +1 -1
  36. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  37. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
  38. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
  39. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
  40. package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
  41. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  42. package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
  43. package/src/duckdb/src/storage/table/column_data.cpp +14 -9
  44. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
  45. package/src/duckdb/src/storage/table/row_group.cpp +102 -192
  46. package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
  47. package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
  48. package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
  49. package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
  50. package/src/duckdb/src/transaction/commit_state.cpp +5 -4
  51. package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
  52. package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
  53. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
  54. package/src/duckdb/ub_src_storage_table.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev4314.0",
5
+ "version": "0.8.2-dev4376.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -118,7 +118,7 @@ struct ParquetWriteBindData : public TableFunctionData {
118
118
  vector<LogicalType> sql_types;
119
119
  vector<string> column_names;
120
120
  duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
121
- idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
121
+ idx_t row_group_size = Storage::ROW_GROUP_SIZE;
122
122
 
123
123
  //! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
124
124
  static constexpr const idx_t BYTES_PER_ROW = 1024;
@@ -551,6 +551,8 @@ BindingMode EnumUtil::FromString<BindingMode>(const char *value) {
551
551
  template<>
552
552
  const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
553
553
  switch(value) {
554
+ case BitpackingMode::INVALID:
555
+ return "INVALID";
554
556
  case BitpackingMode::AUTO:
555
557
  return "AUTO";
556
558
  case BitpackingMode::CONSTANT:
@@ -568,6 +570,9 @@ const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
568
570
 
569
571
  template<>
570
572
  BitpackingMode EnumUtil::FromString<BitpackingMode>(const char *value) {
573
+ if (StringUtil::Equals(value, "INVALID")) {
574
+ return BitpackingMode::INVALID;
575
+ }
571
576
  if (StringUtil::Equals(value, "AUTO")) {
572
577
  return BitpackingMode::AUTO;
573
578
  }
@@ -5,7 +5,7 @@
5
5
  #include "duckdb/common/exception.hpp"
6
6
  #include "duckdb/common/file_system.hpp"
7
7
  #include "duckdb/common/helper.hpp"
8
-
8
+ #include "duckdb/storage/storage_info.hpp"
9
9
  #include <cstring>
10
10
 
11
11
  namespace duckdb {
@@ -1,4 +1,7 @@
1
1
  #include "duckdb/common/types/validity_mask.hpp"
2
+ #include "duckdb/common/limits.hpp"
3
+ #include "duckdb/common/serializer/write_stream.hpp"
4
+ #include "duckdb/common/serializer/read_stream.hpp"
2
5
 
3
6
  namespace duckdb {
4
7
 
@@ -173,4 +176,57 @@ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset,
173
176
  #endif
174
177
  }
175
178
 
179
+ enum class ValiditySerialization : uint8_t { BITMASK = 0, VALID_VALUES = 1, INVALID_VALUES = 2 };
180
+
181
+ void ValidityMask::Write(WriteStream &writer, idx_t count) {
182
+ auto valid_values = CountValid(count);
183
+ auto invalid_values = count - valid_values;
184
+ auto bitmask_bytes = ValidityMask::ValidityMaskSize(count);
185
+ auto need_u32 = count >= NumericLimits<uint16_t>::Maximum();
186
+ auto bytes_per_value = need_u32 ? sizeof(uint32_t) : sizeof(uint16_t);
187
+ auto valid_value_size = bytes_per_value * valid_values + sizeof(uint32_t);
188
+ auto invalid_value_size = bytes_per_value * invalid_values + sizeof(uint32_t);
189
+ if (valid_value_size < bitmask_bytes || invalid_value_size < bitmask_bytes) {
190
+ auto serialize_valid = valid_value_size < invalid_value_size;
191
+ // serialize (in)valid value indexes as [COUNT][V0][V1][...][VN]
192
+ auto flag = serialize_valid ? ValiditySerialization::VALID_VALUES : ValiditySerialization::INVALID_VALUES;
193
+ writer.Write(flag);
194
+ writer.Write<uint32_t>(MinValue<uint32_t>(valid_values, invalid_values));
195
+ for (idx_t i = 0; i < count; i++) {
196
+ if (RowIsValid(i) == serialize_valid) {
197
+ if (need_u32) {
198
+ writer.Write<uint32_t>(i);
199
+ } else {
200
+ writer.Write<uint16_t>(i);
201
+ }
202
+ }
203
+ }
204
+ } else {
205
+ // serialize the entire bitmask
206
+ writer.Write(ValiditySerialization::BITMASK);
207
+ writer.WriteData(const_data_ptr_cast(GetData()), bitmask_bytes);
208
+ }
209
+ }
210
+
211
+ void ValidityMask::Read(ReadStream &reader, idx_t count) {
212
+ Initialize(count);
213
+ // deserialize the storage type
214
+ auto flag = reader.Read<ValiditySerialization>();
215
+ if (flag == ValiditySerialization::BITMASK) {
216
+ // deserialize the bitmask
217
+ reader.ReadData(data_ptr_cast(GetData()), ValidityMask::ValidityMaskSize(count));
218
+ return;
219
+ }
220
+ auto is_u32 = count >= NumericLimits<uint16_t>::Maximum();
221
+ auto is_valid = flag == ValiditySerialization::VALID_VALUES;
222
+ auto serialize_count = reader.Read<uint32_t>();
223
+ if (is_valid) {
224
+ SetAllInvalid(count);
225
+ }
226
+ for (idx_t i = 0; i < serialize_count; i++) {
227
+ idx_t index = is_u32 ? reader.Read<uint32_t>() : reader.Read<uint16_t>();
228
+ Set(index, is_valid);
229
+ }
230
+ }
231
+
176
232
  } // namespace duckdb
@@ -194,7 +194,7 @@ public:
194
194
  }
195
195
  auto new_count = current_collection->GetTotalRows();
196
196
  auto batch_type =
197
- new_count < RowGroup::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
197
+ new_count < Storage::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
198
198
  if (batch_type == RowGroupBatchType::FLUSHED && writer) {
199
199
  writer->WriteLastRowGroup(*current_collection);
200
200
  }
@@ -482,7 +482,7 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
482
482
 
483
483
  lock_guard<mutex> lock(gstate.lock);
484
484
  gstate.insert_count += append_count;
485
- if (append_count < RowGroup::ROW_GROUP_SIZE) {
485
+ if (append_count < Storage::ROW_GROUP_SIZE) {
486
486
  // we have few rows - append to the local storage directly
487
487
  auto &table = gstate.table;
488
488
  auto &storage = table.GetStorage();
@@ -837,7 +837,15 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
837
837
  throw InvalidInputException("arrow_scan: array length mismatch");
838
838
  }
839
839
  // Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
840
- output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
840
+ if (scan_state.arrow_owned_data.find(idx) == scan_state.arrow_owned_data.end()) {
841
+ auto arrow_data = make_shared<ArrowArrayWrapper>();
842
+ arrow_data->arrow_array = scan_state.chunk->arrow_array;
843
+ scan_state.chunk->arrow_array.release = nullptr;
844
+ scan_state.arrow_owned_data[idx] = arrow_data;
845
+ }
846
+
847
+ output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.arrow_owned_data[idx]));
848
+
841
849
  D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
842
850
  auto &arrow_type = *arrow_convert_data.at(col_idx);
843
851
  if (array.dictionary) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4314"
2
+ #define DUCKDB_VERSION "0.8.2-dev4376"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "84a109bbee"
5
+ #define DUCKDB_SOURCE_ID "312b995450"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -58,21 +58,6 @@ struct DConstants {
58
58
  static constexpr const idx_t INVALID_INDEX = idx_t(-1);
59
59
  };
60
60
 
61
- struct Storage {
62
- //! The size of a hard disk sector, only really needed for Direct IO
63
- constexpr static int SECTOR_SIZE = 4096;
64
- //! Block header size for blocks written to the storage
65
- constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
66
- // Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
67
- // default to 256KB. (1 << 18)
68
- constexpr static int BLOCK_ALLOC_SIZE = 262144;
69
- //! The actual memory space that is available within the blocks
70
- constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
71
- //! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
72
- //! to the page size, which is 4KB. (1 << 12)
73
- constexpr static int FILE_HEADER_SIZE = 4096;
74
- };
75
-
76
61
  struct LogicalIndex {
77
62
  explicit LogicalIndex(idx_t index) : index(index) {
78
63
  }
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/serializer/buffer_stream.hpp
4
+ // duckdb/common/serializer/memory_stream.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -332,6 +332,9 @@ public:
332
332
  DUCKDB_API string ToString(idx_t count) const;
333
333
 
334
334
  DUCKDB_API static bool IsAligned(idx_t count);
335
+
336
+ void Write(WriteStream &writer, idx_t count);
337
+ void Read(ReadStream &reader, idx_t count);
335
338
  };
336
339
 
337
340
  } // namespace duckdb
@@ -67,6 +67,9 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
67
67
 
68
68
  unique_ptr<ArrowArrayStreamWrapper> stream;
69
69
  shared_ptr<ArrowArrayWrapper> chunk;
70
+ // This vector hold the Arrow Vectors owned by DuckDB to allow for zero-copy
71
+ // Note that only DuckDB can release these vectors
72
+ unordered_map<idx_t, shared_ptr<ArrowArrayWrapper>> arrow_owned_data;
70
73
  idx_t chunk_offset = 0;
71
74
  idx_t batch_index = 0;
72
75
  vector<column_t> column_ids;
@@ -52,11 +52,11 @@ struct MetaBlockPointer {
52
52
  idx_t block_pointer;
53
53
  uint32_t offset;
54
54
 
55
- bool IsValid() {
55
+ bool IsValid() const {
56
56
  return block_pointer != DConstants::INVALID_INDEX;
57
57
  }
58
- block_id_t GetBlockId();
59
- uint32_t GetBlockIndex();
58
+ block_id_t GetBlockId() const;
59
+ uint32_t GetBlockIndex() const;
60
60
 
61
61
  void Serialize(Serializer &serializer) const;
62
62
  static MetaBlockPointer Deserialize(Deserializer &source);
@@ -12,14 +12,7 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- enum class BitpackingMode : uint8_t {
16
- AUTO,
17
-
18
- CONSTANT,
19
- CONSTANT_DELTA,
20
- DELTA_FOR,
21
- FOR
22
- };
15
+ enum class BitpackingMode : uint8_t { INVALID, AUTO, CONSTANT, CONSTANT_DELTA, DELTA_FOR, FOR };
23
16
 
24
17
  BitpackingMode BitpackingModeFromString(const string &str);
25
18
  string BitpackingModeToString(const BitpackingMode &mode);
@@ -40,8 +40,8 @@ struct RowGroupPointer {
40
40
  uint64_t tuple_count;
41
41
  //! The data pointers of the column segments stored in the row group
42
42
  vector<MetaBlockPointer> data_pointers;
43
- //! The versions information of the row group (if any)
44
- shared_ptr<VersionNode> versions;
43
+ //! Data pointers to the delete information of the row group (if any)
44
+ vector<MetaBlockPointer> deletes_pointers;
45
45
  };
46
46
 
47
47
  } // namespace duckdb
@@ -64,6 +64,7 @@ public:
64
64
  void Flush();
65
65
 
66
66
  void MarkBlocksAsModified();
67
+ void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
67
68
 
68
69
  idx_t BlockCount();
69
70
 
@@ -82,6 +83,7 @@ protected:
82
83
 
83
84
  void AddBlock(MetadataBlock new_block, bool if_exists = false);
84
85
  void AddAndRegisterBlock(MetadataBlock block);
86
+ void ConvertToTransient(MetadataBlock &block);
85
87
  };
86
88
 
87
89
  } // namespace duckdb
@@ -18,6 +18,7 @@ enum class BlockReaderType { EXISTING_BLOCKS, REGISTER_BLOCKS };
18
18
  class MetadataReader : public ReadStream {
19
19
  public:
20
20
  MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
21
+ optional_ptr<vector<MetaBlockPointer>> read_pointers = nullptr,
21
22
  BlockReaderType type = BlockReaderType::EXISTING_BLOCKS);
22
23
  MetadataReader(MetadataManager &manager, BlockPointer pointer);
23
24
  ~MetadataReader() override;
@@ -46,6 +47,7 @@ private:
46
47
  MetadataHandle block;
47
48
  MetadataPointer next_pointer;
48
49
  bool has_next_block;
50
+ optional_ptr<vector<MetaBlockPointer>> read_pointers;
49
51
  idx_t index;
50
52
  idx_t offset;
51
53
  idx_t next_offset;
@@ -15,10 +15,10 @@ namespace duckdb {
15
15
 
16
16
  class MetadataWriter : public WriteStream {
17
17
  public:
18
+ explicit MetadataWriter(MetadataManager &manager,
19
+ optional_ptr<vector<MetaBlockPointer>> written_pointers = nullptr);
18
20
  MetadataWriter(const MetadataWriter &) = delete;
19
21
  MetadataWriter &operator=(const MetadataWriter &) = delete;
20
-
21
- explicit MetadataWriter(MetadataManager &manager);
22
22
  ~MetadataWriter() override;
23
23
 
24
24
  public:
@@ -27,6 +27,9 @@ public:
27
27
 
28
28
  BlockPointer GetBlockPointer();
29
29
  MetaBlockPointer GetMetaBlockPointer();
30
+ MetadataManager &GetManager() {
31
+ return manager;
32
+ }
30
33
 
31
34
  protected:
32
35
  virtual MetadataHandle NextHandle();
@@ -41,6 +44,7 @@ private:
41
44
  MetadataManager &manager;
42
45
  MetadataHandle block;
43
46
  MetadataPointer current_pointer;
47
+ optional_ptr<vector<MetaBlockPointer>> written_pointers;
44
48
  idx_t capacity;
45
49
  idx_t offset;
46
50
  };
@@ -23,6 +23,25 @@ struct FileHandle;
23
23
  #error Row group size should be cleanly divisible by vector size
24
24
  #endif
25
25
 
26
+ struct Storage {
27
+ //! The size of a hard disk sector, only really needed for Direct IO
28
+ constexpr static int SECTOR_SIZE = 4096;
29
+ //! Block header size for blocks written to the storage
30
+ constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
31
+ // Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
32
+ // default to 256KB. (1 << 18)
33
+ constexpr static int BLOCK_ALLOC_SIZE = 262144;
34
+ //! The actual memory space that is available within the blocks
35
+ constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
36
+ //! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
37
+ //! to the page size, which is 4KB. (1 << 12)
38
+ constexpr static int FILE_HEADER_SIZE = 4096;
39
+ //! The number of rows per row group (must be a multiple of the vector size)
40
+ constexpr static const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
41
+ //! The number of vectors per row group
42
+ constexpr static const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
43
+ };
44
+
26
45
  //! The version number of the database storage format
27
46
  extern const uint64_t VERSION_NUMBER;
28
47
 
@@ -46,8 +46,10 @@ public:
46
46
  virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0;
47
47
  virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0;
48
48
 
49
- virtual void Serialize(Serializer &serializer) const = 0;
50
- static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
49
+ virtual bool HasDeletes() const = 0;
50
+
51
+ virtual void Write(WriteStream &writer) const;
52
+ static unique_ptr<ChunkInfo> Read(ReadStream &reader);
51
53
 
52
54
  public:
53
55
  template <class TARGET>
@@ -74,8 +76,8 @@ public:
74
76
  public:
75
77
  explicit ChunkConstantInfo(idx_t start);
76
78
 
77
- atomic<transaction_t> insert_id;
78
- atomic<transaction_t> delete_id;
79
+ transaction_t insert_id;
80
+ transaction_t delete_id;
79
81
 
80
82
  public:
81
83
  idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override;
@@ -85,8 +87,10 @@ public:
85
87
  void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
86
88
  idx_t GetCommittedDeletedCount(idx_t max_count) override;
87
89
 
88
- void Serialize(Serializer &serializer) const override;
89
- static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
90
+ bool HasDeletes() const override;
91
+
92
+ void Write(WriteStream &writer) const override;
93
+ static unique_ptr<ChunkInfo> Read(ReadStream &reader);
90
94
 
91
95
  private:
92
96
  template <class OP>
@@ -102,13 +106,13 @@ public:
102
106
  explicit ChunkVectorInfo(idx_t start);
103
107
 
104
108
  //! The transaction ids of the transactions that inserted the tuples (if any)
105
- atomic<transaction_t> inserted[STANDARD_VECTOR_SIZE];
106
- atomic<transaction_t> insert_id;
107
- atomic<bool> same_inserted_id;
109
+ transaction_t inserted[STANDARD_VECTOR_SIZE];
110
+ transaction_t insert_id;
111
+ bool same_inserted_id;
108
112
 
109
113
  //! The transaction ids of the transactions that deleted the tuples (if any)
110
- atomic<transaction_t> deleted[STANDARD_VECTOR_SIZE];
111
- atomic<bool> any_deleted;
114
+ transaction_t deleted[STANDARD_VECTOR_SIZE];
115
+ bool any_deleted;
112
116
 
113
117
  public:
114
118
  idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
@@ -130,8 +134,10 @@ public:
130
134
  idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count);
131
135
  void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count);
132
136
 
133
- void Serialize(Serializer &serializer) const override;
134
- static unique_ptr<ChunkInfo> Deserialize(Deserializer &deserializer);
137
+ bool HasDeletes() const override;
138
+
139
+ void Write(WriteStream &writer) const override;
140
+ static unique_ptr<ChunkInfo> Read(ReadStream &reader);
135
141
 
136
142
  private:
137
143
  template <class OP>
@@ -151,7 +151,7 @@ protected:
151
151
  void AppendTransientSegment(SegmentLock &l, idx_t start_row);
152
152
 
153
153
  //! Scans a base vector from the column
154
- idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining);
154
+ idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining, bool has_updates);
155
155
  //! Scans a vector from the column merged with any potential updates
156
156
  //! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found
157
157
  template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
@@ -36,11 +36,12 @@ class Vector;
36
36
  struct ColumnCheckpointState;
37
37
  struct RowGroupPointer;
38
38
  struct TransactionData;
39
- struct VersionNode;
40
39
  class CollectionScanState;
41
40
  class TableFilterSet;
42
41
  struct ColumnFetchState;
43
42
  struct RowGroupAppendState;
43
+ class MetadataManager;
44
+ class RowVersionManager;
44
45
 
45
46
  struct RowGroupWriteData {
46
47
  vector<unique_ptr<ColumnCheckpointState>> states;
@@ -50,11 +51,6 @@ struct RowGroupWriteData {
50
51
  class RowGroup : public SegmentBase<RowGroup> {
51
52
  public:
52
53
  friend class ColumnData;
53
- friend class VersionDeleteState;
54
-
55
- public:
56
- static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
57
- static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
58
54
 
59
55
  public:
60
56
  RowGroup(RowGroupCollection &collection, idx_t start, idx_t count);
@@ -65,7 +61,7 @@ private:
65
61
  //! The RowGroupCollection this row-group is a part of
66
62
  reference<RowGroupCollection> collection;
67
63
  //! The version info of the row_group (inserted and deleted tuple info)
68
- shared_ptr<VersionNode> version_info;
64
+ shared_ptr<RowVersionManager> version_info;
69
65
  //! The column data of the row_group
70
66
  vector<shared_ptr<ColumnData>> columns;
71
67
 
@@ -145,12 +141,17 @@ public:
145
141
 
146
142
  void NextVector(CollectionScanState &state);
147
143
 
144
+ idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
145
+ RowVersionManager &GetOrCreateVersionInfo();
146
+
148
147
  // Serialization
149
148
  static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
150
149
  static RowGroupPointer Deserialize(Deserializer &deserializer);
151
150
 
152
151
  private:
153
- ChunkInfo *GetChunkInfo(idx_t vector_idx);
152
+ shared_ptr<RowVersionManager> &GetVersionInfo();
153
+ shared_ptr<RowVersionManager> &GetOrCreateVersionInfoPtr();
154
+
154
155
  ColumnData &GetColumn(storage_t c);
155
156
  idx_t GetColumnCount() const;
156
157
  vector<shared_ptr<ColumnData>> &GetColumns();
@@ -158,18 +159,17 @@ private:
158
159
  template <TableScanType TYPE>
159
160
  void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
160
161
 
162
+ vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
163
+
164
+ bool HasUnloadedDeletes() const;
165
+
161
166
  private:
162
167
  mutex row_group_lock;
163
168
  mutex stats_lock;
164
169
  vector<MetaBlockPointer> column_pointers;
165
170
  unique_ptr<atomic<bool>[]> is_loaded;
166
- };
167
-
168
- struct VersionNode {
169
- unique_ptr<ChunkInfo> info[RowGroup::ROW_GROUP_VECTOR_COUNT];
170
-
171
- void SetStart(idx_t start);
172
- idx_t GetCommittedDeletedCount(idx_t count);
171
+ vector<MetaBlockPointer> deletes_pointers;
172
+ atomic<bool> deletes_is_loaded;
173
173
  };
174
174
 
175
175
  } // namespace duckdb
@@ -0,0 +1,59 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/storage/table/row_version_manager.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/vector_size.hpp"
12
+ #include "duckdb/storage/table/chunk_info.hpp"
13
+ #include "duckdb/storage/storage_info.hpp"
14
+ #include "duckdb/common/mutex.hpp"
15
+
16
+ namespace duckdb {
17
+
18
+ class MetadataManager;
19
+ struct MetaBlockPointer;
20
+
21
+ class RowVersionManager {
22
+ public:
23
+ explicit RowVersionManager(idx_t start);
24
+
25
+ idx_t GetStart() {
26
+ return start;
27
+ }
28
+ void SetStart(idx_t start);
29
+ idx_t GetCommittedDeletedCount(idx_t count);
30
+
31
+ idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count);
32
+ idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx,
33
+ SelectionVector &sel_vector, idx_t max_count);
34
+ bool Fetch(TransactionData transaction, idx_t row);
35
+
36
+ void AppendVersionInfo(TransactionData transaction, idx_t count, idx_t row_group_start, idx_t row_group_end);
37
+ void CommitAppend(transaction_t commit_id, idx_t row_group_start, idx_t count);
38
+ void RevertAppend(idx_t start_row);
39
+
40
+ idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
41
+ void CommitDelete(idx_t vector_idx, transaction_t commit_id, row_t rows[], idx_t count);
42
+
43
+ vector<MetaBlockPointer> Checkpoint(MetadataManager &manager);
44
+ static shared_ptr<RowVersionManager> Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager,
45
+ idx_t start);
46
+
47
+ private:
48
+ mutex version_lock;
49
+ idx_t start;
50
+ unique_ptr<ChunkInfo> vector_info[Storage::ROW_GROUP_VECTOR_COUNT];
51
+ bool has_changes;
52
+ vector<MetaBlockPointer> storage_pointers;
53
+
54
+ private:
55
+ optional_ptr<ChunkInfo> GetChunkInfo(idx_t vector_idx);
56
+ ChunkVectorInfo &GetVectorInfo(idx_t vector_idx);
57
+ };
58
+
59
+ } // namespace duckdb
@@ -101,7 +101,7 @@ struct UpdateNodeData {
101
101
  };
102
102
 
103
103
  struct UpdateNode {
104
- unique_ptr<UpdateNodeData> info[RowGroup::ROW_GROUP_VECTOR_COUNT];
104
+ unique_ptr<UpdateNodeData> info[Storage::ROW_GROUP_VECTOR_COUNT];
105
105
  };
106
106
 
107
107
  } // namespace duckdb
@@ -23,7 +23,7 @@ struct UpdateInfo;
23
23
 
24
24
  class CommitState {
25
25
  public:
26
- explicit CommitState(ClientContext &context, transaction_t commit_id, optional_ptr<WriteAheadLog> log = nullptr);
26
+ explicit CommitState(transaction_t commit_id, optional_ptr<WriteAheadLog> log = nullptr);
27
27
 
28
28
  optional_ptr<WriteAheadLog> log;
29
29
  transaction_t commit_id;
@@ -35,9 +35,6 @@ public:
35
35
  unique_ptr<DataChunk> delete_chunk;
36
36
  unique_ptr<DataChunk> update_chunk;
37
37
 
38
- private:
39
- ClientContext &context;
40
-
41
38
  public:
42
39
  template <bool HAS_LOG>
43
40
  void CommitEntry(UndoFlags type, data_ptr_t data);
@@ -49,8 +46,6 @@ private:
49
46
  void WriteCatalogEntry(CatalogEntry &entry, data_ptr_t extra_data);
50
47
  void WriteDelete(DeleteInfo &info);
51
48
  void WriteUpdate(UpdateInfo &info);
52
-
53
- void AppendRowId(row_t rowid);
54
49
  };
55
50
 
56
51
  } // namespace duckdb
@@ -11,12 +11,13 @@
11
11
  #include "duckdb/common/constants.hpp"
12
12
 
13
13
  namespace duckdb {
14
- class ChunkVectorInfo;
15
14
  class DataTable;
15
+ class RowVersionManager;
16
16
 
17
17
  struct DeleteInfo {
18
18
  DataTable *table;
19
- ChunkVectorInfo *vinfo;
19
+ RowVersionManager *version_info;
20
+ idx_t vector_idx;
20
21
  idx_t count;
21
22
  idx_t base_row;
22
23
  row_t rows[1];
@@ -11,12 +11,13 @@
11
11
  #include "duckdb/transaction/transaction.hpp"
12
12
 
13
13
  namespace duckdb {
14
+ class RowVersionManager;
14
15
 
15
16
  class DuckTransaction : public Transaction {
16
17
  public:
17
18
  DuckTransaction(TransactionManager &manager, ClientContext &context, transaction_t start_time,
18
19
  transaction_t transaction_id);
19
- ~DuckTransaction();
20
+ ~DuckTransaction() override;
20
21
 
21
22
  //! The start timestamp of this transaction
22
23
  transaction_t start_time;
@@ -49,7 +50,8 @@ public:
49
50
 
50
51
  bool ChangesMade();
51
52
 
52
- void PushDelete(DataTable &table, ChunkVectorInfo *vinfo, row_t rows[], idx_t count, idx_t base_row);
53
+ void PushDelete(DataTable &table, RowVersionManager &info, idx_t vector_idx, row_t rows[], idx_t count,
54
+ idx_t base_row);
53
55
  void PushAppend(DataTable &table, idx_t row_start, idx_t row_count);
54
56
  UpdateInfo *CreateUpdateInfo(idx_t type_size, idx_t entries);
55
57
 
@@ -88,7 +88,7 @@ private:
88
88
  class LocalStorage {
89
89
  public:
90
90
  // Threshold to merge row groups instead of appending
91
- static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE;
91
+ static constexpr const idx_t MERGE_THRESHOLD = Storage::ROW_GROUP_SIZE;
92
92
 
93
93
  public:
94
94
  struct CommitState {