duckdb 0.8.2-dev4203.0 → 0.8.2-dev4376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +5 -0
  4. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  5. package/src/duckdb/src/common/sort/partition_state.cpp +107 -29
  6. package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
  7. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  8. package/src/duckdb/src/execution/index/art/leaf.cpp +13 -10
  9. package/src/duckdb/src/execution/index/art/node48.cpp +0 -2
  10. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +38 -73
  11. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +245 -27
  12. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +2 -3
  13. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +35 -20
  14. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
  15. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  16. package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
  19. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
  20. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +14 -4
  21. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  22. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +2 -0
  23. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +1 -7
  24. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +38 -8
  25. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
  26. package/src/duckdb/src/include/duckdb/main/relation.hpp +9 -2
  27. package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
  29. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
  30. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  31. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
  32. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
  33. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +35 -19
  34. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
  35. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
  36. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +4 -19
  37. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
  39. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
  40. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
  42. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
  43. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
  44. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
  46. package/src/duckdb/src/main/relation.cpp +15 -2
  47. package/src/duckdb/src/main/settings/settings.cpp +5 -10
  48. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
  49. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
  50. package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
  51. package/src/duckdb/src/storage/data_table.cpp +1 -1
  52. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  53. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
  54. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
  55. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
  56. package/src/duckdb/src/storage/partial_block_manager.cpp +42 -15
  57. package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
  58. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  59. package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
  60. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +26 -32
  61. package/src/duckdb/src/storage/table/column_data.cpp +14 -9
  62. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
  63. package/src/duckdb/src/storage/table/row_group.cpp +102 -192
  64. package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
  65. package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
  66. package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
  67. package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
  68. package/src/duckdb/src/transaction/commit_state.cpp +5 -4
  69. package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
  70. package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
  71. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
  72. package/src/duckdb/ub_src_storage_table.cpp +2 -0
@@ -23,7 +23,7 @@ struct UpdateInfo;
23
23
 
24
24
  class CommitState {
25
25
  public:
26
- explicit CommitState(ClientContext &context, transaction_t commit_id, optional_ptr<WriteAheadLog> log = nullptr);
26
+ explicit CommitState(transaction_t commit_id, optional_ptr<WriteAheadLog> log = nullptr);
27
27
 
28
28
  optional_ptr<WriteAheadLog> log;
29
29
  transaction_t commit_id;
@@ -35,9 +35,6 @@ public:
35
35
  unique_ptr<DataChunk> delete_chunk;
36
36
  unique_ptr<DataChunk> update_chunk;
37
37
 
38
- private:
39
- ClientContext &context;
40
-
41
38
  public:
42
39
  template <bool HAS_LOG>
43
40
  void CommitEntry(UndoFlags type, data_ptr_t data);
@@ -49,8 +46,6 @@ private:
49
46
  void WriteCatalogEntry(CatalogEntry &entry, data_ptr_t extra_data);
50
47
  void WriteDelete(DeleteInfo &info);
51
48
  void WriteUpdate(UpdateInfo &info);
52
-
53
- void AppendRowId(row_t rowid);
54
49
  };
55
50
 
56
51
  } // namespace duckdb
@@ -11,12 +11,13 @@
11
11
  #include "duckdb/common/constants.hpp"
12
12
 
13
13
  namespace duckdb {
14
- class ChunkVectorInfo;
15
14
  class DataTable;
15
+ class RowVersionManager;
16
16
 
17
17
  struct DeleteInfo {
18
18
  DataTable *table;
19
- ChunkVectorInfo *vinfo;
19
+ RowVersionManager *version_info;
20
+ idx_t vector_idx;
20
21
  idx_t count;
21
22
  idx_t base_row;
22
23
  row_t rows[1];
@@ -11,12 +11,13 @@
11
11
  #include "duckdb/transaction/transaction.hpp"
12
12
 
13
13
  namespace duckdb {
14
+ class RowVersionManager;
14
15
 
15
16
  class DuckTransaction : public Transaction {
16
17
  public:
17
18
  DuckTransaction(TransactionManager &manager, ClientContext &context, transaction_t start_time,
18
19
  transaction_t transaction_id);
19
- ~DuckTransaction();
20
+ ~DuckTransaction() override;
20
21
 
21
22
  //! The start timestamp of this transaction
22
23
  transaction_t start_time;
@@ -49,7 +50,8 @@ public:
49
50
 
50
51
  bool ChangesMade();
51
52
 
52
- void PushDelete(DataTable &table, ChunkVectorInfo *vinfo, row_t rows[], idx_t count, idx_t base_row);
53
+ void PushDelete(DataTable &table, RowVersionManager &info, idx_t vector_idx, row_t rows[], idx_t count,
54
+ idx_t base_row);
53
55
  void PushAppend(DataTable &table, idx_t row_start, idx_t row_count);
54
56
  UpdateInfo *CreateUpdateInfo(idx_t type_size, idx_t entries);
55
57
 
@@ -88,7 +88,7 @@ private:
88
88
  class LocalStorage {
89
89
  public:
90
90
  // Threshold to merge row groups instead of appending
91
- static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE;
91
+ static constexpr const idx_t MERGE_THRESHOLD = Storage::ROW_GROUP_SIZE;
92
92
 
93
93
  public:
94
94
  struct CommitState {
@@ -48,7 +48,6 @@ public:
48
48
  void Rollback() noexcept;
49
49
 
50
50
  private:
51
- ClientContext &context;
52
51
  ArenaAllocator allocator;
53
52
 
54
53
  private:
@@ -47,6 +47,11 @@ shared_ptr<Relation> Relation::Project(const vector<string> &expressions) {
47
47
  return Project(expressions, aliases);
48
48
  }
49
49
 
50
+ shared_ptr<Relation> Relation::Project(vector<unique_ptr<ParsedExpression>> expressions,
51
+ const vector<string> &aliases) {
52
+ return make_shared<ProjectionRelation>(shared_from_this(), std::move(expressions), aliases);
53
+ }
54
+
50
55
  static vector<unique_ptr<ParsedExpression>> StringListToExpressionList(ClientContext &context,
51
56
  const vector<string> &expressions) {
52
57
  if (expressions.empty()) {
@@ -73,7 +78,11 @@ shared_ptr<Relation> Relation::Filter(const string &expression) {
73
78
  if (expression_list.size() != 1) {
74
79
  throw ParserException("Expected a single expression as filter condition");
75
80
  }
76
- return make_shared<FilterRelation>(shared_from_this(), std::move(expression_list[0]));
81
+ return Filter(std::move(expression_list[0]));
82
+ }
83
+
84
+ shared_ptr<Relation> Relation::Filter(unique_ptr<ParsedExpression> expression) {
85
+ return make_shared<FilterRelation>(shared_from_this(), std::move(expression));
77
86
  }
78
87
 
79
88
  shared_ptr<Relation> Relation::Filter(const vector<string> &expressions) {
@@ -95,6 +104,10 @@ shared_ptr<Relation> Relation::Limit(int64_t limit, int64_t offset) {
95
104
 
96
105
  shared_ptr<Relation> Relation::Order(const string &expression) {
97
106
  auto order_list = Parser::ParseOrderList(expression, context.GetContext()->GetParserOptions());
107
+ return Order(std::move(order_list));
108
+ }
109
+
110
+ shared_ptr<Relation> Relation::Order(vector<OrderByNode> order_list) {
98
111
  return make_shared<OrderRelation>(shared_from_this(), std::move(order_list));
99
112
  }
100
113
 
@@ -110,7 +123,7 @@ shared_ptr<Relation> Relation::Order(const vector<string> &expressions) {
110
123
  }
111
124
  order_list.push_back(std::move(inner_list[0]));
112
125
  }
113
- return make_shared<OrderRelation>(shared_from_this(), std::move(order_list));
126
+ return Order(std::move(order_list));
114
127
  }
115
128
 
116
129
  shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other, const string &condition, JoinType type,
@@ -726,17 +726,12 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
726
726
  //===--------------------------------------------------------------------===//
727
727
  void ForceBitpackingModeSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
728
728
  auto mode_str = StringUtil::Lower(input.ToString());
729
- if (mode_str == "none") {
730
- config.options.force_bitpacking_mode = BitpackingMode::AUTO;
731
- } else {
732
- auto mode = BitpackingModeFromString(mode_str);
733
- if (mode == BitpackingMode::AUTO) {
734
- throw ParserException(
735
- "Unrecognized option for force_bitpacking_mode, expected none, constant, constant_delta, "
736
- "delta_for, or for");
737
- }
738
- config.options.force_bitpacking_mode = mode;
729
+ auto mode = BitpackingModeFromString(mode_str);
730
+ if (mode == BitpackingMode::INVALID) {
731
+ throw ParserException("Unrecognized option for force_bitpacking_mode, expected none, constant, constant_delta, "
732
+ "delta_for, or for");
739
733
  }
734
+ config.options.force_bitpacking_mode = mode;
740
735
  }
741
736
 
742
737
  void ForceBitpackingModeSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) {
@@ -22,6 +22,20 @@ static unique_ptr<BaseStatistics> StatisticsOperationsNumericNumericCast(const B
22
22
  }
23
23
 
24
24
  static unique_ptr<BaseStatistics> StatisticsNumericCastSwitch(const BaseStatistics &input, const LogicalType &target) {
25
+ // Downcasting timestamps to times is not a truncation operation
26
+ switch (target.id()) {
27
+ case LogicalTypeId::TIME:
28
+ switch (input.GetType().id()) {
29
+ case LogicalTypeId::TIMESTAMP:
30
+ case LogicalTypeId::TIMESTAMP_TZ:
31
+ return nullptr;
32
+ default:
33
+ break;
34
+ }
35
+ default:
36
+ break;
37
+ }
38
+
25
39
  switch (target.InternalType()) {
26
40
  case PhysicalType::INT8:
27
41
  case PhysicalType::INT16:
@@ -66,7 +66,6 @@ void SingleFileTableDataWriter::FinalizeTable(TableStatistics &&global_stats, Da
66
66
  row_group_serializer.End();
67
67
  }
68
68
 
69
- // TODO: Serialize this:
70
69
  auto index_pointers = info->indexes.SerializeIndexes(table_data_writer);
71
70
 
72
71
  // Now begin the metadata as a unit
@@ -23,8 +23,7 @@ static constexpr const idx_t BITPACKING_METADATA_GROUP_SIZE = STANDARD_VECTOR_SI
23
23
 
24
24
  BitpackingMode BitpackingModeFromString(const string &str) {
25
25
  auto mode = StringUtil::Lower(str);
26
-
27
- if (mode == "auto") {
26
+ if (mode == "auto" || mode == "none") {
28
27
  return BitpackingMode::AUTO;
29
28
  } else if (mode == "constant") {
30
29
  return BitpackingMode::CONSTANT;
@@ -35,21 +34,21 @@ BitpackingMode BitpackingModeFromString(const string &str) {
35
34
  } else if (mode == "for") {
36
35
  return BitpackingMode::FOR;
37
36
  } else {
38
- return BitpackingMode::AUTO;
37
+ return BitpackingMode::INVALID;
39
38
  }
40
39
  }
41
40
 
42
41
  string BitpackingModeToString(const BitpackingMode &mode) {
43
42
  switch (mode) {
44
- case (BitpackingMode::AUTO):
43
+ case BitpackingMode::AUTO:
45
44
  return "auto";
46
- case (BitpackingMode::CONSTANT):
45
+ case BitpackingMode::CONSTANT:
47
46
  return "constant";
48
- case (BitpackingMode::CONSTANT_DELTA):
47
+ case BitpackingMode::CONSTANT_DELTA:
49
48
  return "constant_delta";
50
- case (BitpackingMode::DELTA_FOR):
49
+ case BitpackingMode::DELTA_FOR:
51
50
  return "delta_for";
52
- case (BitpackingMode::FOR):
51
+ case BitpackingMode::FOR:
53
52
  return "for";
54
53
  default:
55
54
  throw NotImplementedException("Unknown bitpacking mode: " + to_string((uint8_t)mode) + "\n");
@@ -161,7 +160,7 @@ public:
161
160
  // Don't delta encoding 1 value makes no sense
162
161
  if (compression_buffer_idx < 2) {
163
162
  return;
164
- };
163
+ }
165
164
 
166
165
  // TODO: handle NULLS here?
167
166
  // Currently we cannot handle nulls because we would need an additional step of patching for this.
@@ -686,48 +685,57 @@ public:
686
685
  }
687
686
 
688
687
  void Skip(ColumnSegment &segment, idx_t skip_count) {
689
- while (skip_count > 0) {
690
- if (current_group_offset + skip_count < BITPACKING_METADATA_GROUP_SIZE) {
691
- // Skipping Delta FOR requires a bit of decoding to figure out the new delta
692
- if (current_group.mode == BitpackingMode::DELTA_FOR) {
693
- // if current_group_offset points into the middle of a
694
- // BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE, we need to scan a few
695
- // values before current_group_offset to align with the algorithm groups
696
- idx_t extra_count = current_group_offset % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
697
-
698
- // Calculate total offset and count to bitunpack
699
- idx_t base_decompress_count = BitpackingPrimitives::RoundUpToAlgorithmGroupSize(skip_count);
700
- idx_t decompress_count = base_decompress_count + extra_count;
701
- idx_t decompress_offset = current_group_offset - extra_count;
702
- bool skip_sign_extension = true;
703
-
704
- BitpackingPrimitives::UnPackBuffer<T>(data_ptr_cast(decompression_buffer),
705
- current_group_ptr + decompress_offset, decompress_count,
706
- current_width, skip_sign_extension);
707
-
708
- ApplyFrameOfReference<T_S>(reinterpret_cast<T_S *>(decompression_buffer + extra_count),
709
- current_frame_of_reference, skip_count);
710
- DeltaDecode<T_S>(reinterpret_cast<T_S *>(decompression_buffer + extra_count),
711
- static_cast<T_S>(current_delta_offset), skip_count);
712
- current_delta_offset = decompression_buffer[extra_count + skip_count - 1];
713
-
714
- current_group_offset += skip_count;
715
- } else {
716
- current_group_offset += skip_count;
717
- }
718
- break;
719
- } else {
720
- auto left_in_this_group = BITPACKING_METADATA_GROUP_SIZE - current_group_offset;
721
- auto number_of_groups_to_skip = (skip_count - left_in_this_group) / BITPACKING_METADATA_GROUP_SIZE;
722
-
723
- current_group_offset = 0;
724
- bitpacking_metadata_ptr -= number_of_groups_to_skip * sizeof(bitpacking_metadata_encoded_t);
688
+ bool skip_sign_extend = true;
725
689
 
690
+ idx_t skipped = 0;
691
+ while (skipped < skip_count) {
692
+ // Exhausted this metadata group, move pointers to next group and load metadata for next group.
693
+ if (current_group_offset >= BITPACKING_METADATA_GROUP_SIZE) {
726
694
  LoadNextGroup();
695
+ }
727
696
 
728
- skip_count -= left_in_this_group;
729
- skip_count -= number_of_groups_to_skip * BITPACKING_METADATA_GROUP_SIZE;
697
+ idx_t offset_in_compression_group =
698
+ current_group_offset % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
699
+
700
+ if (current_group.mode == BitpackingMode::CONSTANT) {
701
+ idx_t remaining = skip_count - skipped;
702
+ idx_t to_skip = MinValue(remaining, BITPACKING_METADATA_GROUP_SIZE - current_group_offset);
703
+ skipped += to_skip;
704
+ current_group_offset += to_skip;
705
+ continue;
706
+ }
707
+ if (current_group.mode == BitpackingMode::CONSTANT_DELTA) {
708
+ idx_t remaining = skip_count - skipped;
709
+ idx_t to_skip = MinValue(remaining, BITPACKING_METADATA_GROUP_SIZE - current_group_offset);
710
+ skipped += to_skip;
711
+ current_group_offset += to_skip;
712
+ continue;
713
+ }
714
+ D_ASSERT(current_group.mode == BitpackingMode::FOR || current_group.mode == BitpackingMode::DELTA_FOR);
715
+
716
+ idx_t to_skip =
717
+ MinValue<idx_t>(skip_count - skipped,
718
+ BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE - offset_in_compression_group);
719
+ // Calculate start of compression algorithm group
720
+ if (current_group.mode == BitpackingMode::DELTA_FOR) {
721
+ data_ptr_t current_position_ptr = current_group_ptr + current_group_offset * current_width / 8;
722
+ data_ptr_t decompression_group_start_pointer =
723
+ current_position_ptr - offset_in_compression_group * current_width / 8;
724
+
725
+ BitpackingPrimitives::UnPackBlock<T>(data_ptr_cast(decompression_buffer),
726
+ decompression_group_start_pointer, current_width,
727
+ skip_sign_extend);
728
+
729
+ T *decompression_ptr = decompression_buffer + offset_in_compression_group;
730
+ ApplyFrameOfReference<T_S>(reinterpret_cast<T_S *>(decompression_ptr),
731
+ static_cast<T_S>(current_frame_of_reference), to_skip);
732
+ DeltaDecode<T_S>(reinterpret_cast<T_S *>(decompression_ptr), static_cast<T_S>(current_delta_offset),
733
+ to_skip);
734
+ current_delta_offset = decompression_ptr[to_skip - 1];
730
735
  }
736
+
737
+ skipped += to_skip;
738
+ current_group_offset += to_skip;
731
739
  }
732
740
  }
733
741
 
@@ -757,7 +765,6 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
757
765
  bool skip_sign_extend = true;
758
766
 
759
767
  idx_t scanned = 0;
760
-
761
768
  while (scanned < scan_count) {
762
769
  // Exhausted this metadata group, move pointers to next group and load metadata for next group.
763
770
  if (scan_state.current_group_offset >= BITPACKING_METADATA_GROUP_SIZE) {
@@ -208,7 +208,7 @@ void DataTable::InitializeScanWithOffset(TableScanState &state, const vector<col
208
208
  }
209
209
 
210
210
  idx_t DataTable::MaxThreads(ClientContext &context) {
211
- idx_t parallel_scan_vector_count = RowGroup::ROW_GROUP_VECTOR_COUNT;
211
+ idx_t parallel_scan_vector_count = Storage::ROW_GROUP_VECTOR_COUNT;
212
212
  if (ClientConfig::GetConfig(context).verify_parallelism) {
213
213
  parallel_scan_vector_count = 1;
214
214
  }
@@ -102,7 +102,7 @@ void LocalTableStorage::WriteNewRowGroup() {
102
102
  }
103
103
 
104
104
  void LocalTableStorage::FlushBlocks() {
105
- if (!merged_storage && row_groups->GetTotalRows() > RowGroup::ROW_GROUP_SIZE) {
105
+ if (!merged_storage && row_groups->GetTotalRows() > Storage::ROW_GROUP_SIZE) {
106
106
  optimistic_writer.WriteLastRowGroup(*row_groups);
107
107
  }
108
108
  optimistic_writer.FinalFlush();
@@ -34,6 +34,12 @@ MetadataHandle MetadataManager::AllocateHandle() {
34
34
  MetadataPointer pointer;
35
35
  pointer.block_index = free_block;
36
36
  auto &block = blocks[free_block];
37
+ if (block.block->BlockId() < MAXIMUM_BLOCK) {
38
+ // this block is a disk-backed block, yet we are planning to write to it
39
+ // we need to convert it into a transient block before we can write to it
40
+ ConvertToTransient(block);
41
+ D_ASSERT(block.block->BlockId() >= MAXIMUM_BLOCK);
42
+ }
37
43
  D_ASSERT(!block.free_blocks.empty());
38
44
  pointer.index = block.free_blocks.back();
39
45
  // mark the block as used
@@ -54,6 +60,23 @@ MetadataHandle MetadataManager::Pin(MetadataPointer pointer) {
54
60
  return handle;
55
61
  }
56
62
 
63
+ void MetadataManager::ConvertToTransient(MetadataBlock &block) {
64
+ // pin the old block
65
+ auto old_buffer = buffer_manager.Pin(block.block);
66
+
67
+ // allocate a new transient block to replace it
68
+ shared_ptr<BlockHandle> new_block;
69
+ auto new_buffer = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block);
70
+
71
+ // copy the data to the transient block
72
+ memcpy(new_buffer.Ptr(), old_buffer.Ptr(), Storage::BLOCK_SIZE);
73
+
74
+ block.block = std::move(new_block);
75
+
76
+ // unregister the old block
77
+ block_manager.UnregisterBlock(block.block_id, false);
78
+ }
79
+
57
80
  block_id_t MetadataManager::AllocateNewBlock() {
58
81
  auto new_block_id = GetNextBlockId();
59
82
 
@@ -91,11 +114,11 @@ MetaBlockPointer MetadataManager::GetDiskPointer(MetadataPointer pointer, uint32
91
114
  return MetaBlockPointer(block_pointer, offset);
92
115
  }
93
116
 
94
- block_id_t MetaBlockPointer::GetBlockId() {
117
+ block_id_t MetaBlockPointer::GetBlockId() const {
95
118
  return block_id_t(block_pointer & ~(idx_t(0xFF) << 56ULL));
96
119
  }
97
120
 
98
- uint32_t MetaBlockPointer::GetBlockIndex() {
121
+ uint32_t MetaBlockPointer::GetBlockIndex() const {
99
122
  return block_pointer >> 56ULL;
100
123
  }
101
124
 
@@ -262,6 +285,22 @@ void MetadataManager::MarkBlocksAsModified() {
262
285
  }
263
286
  }
264
287
 
288
+ void MetadataManager::ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers) {
289
+ for (auto &pointer : pointers) {
290
+ auto block_id = pointer.GetBlockId();
291
+ auto block_index = pointer.GetBlockIndex();
292
+ auto entry = modified_blocks.find(block_id);
293
+ if (entry == modified_blocks.end()) {
294
+ throw InternalException("ClearModifiedBlocks - Block id %llu not found in modified_blocks", block_id);
295
+ }
296
+ auto &modified_list = entry->second;
297
+ // verify the block has been modified
298
+ D_ASSERT(modified_list && (1ULL << block_index));
299
+ // unset the bit
300
+ modified_list &= ~(1ULL << block_index);
301
+ }
302
+ }
303
+
265
304
  block_id_t MetadataManager::GetNextBlockId() {
266
305
  return block_manager.GetFreeBlockId();
267
306
  }
@@ -2,9 +2,14 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer, BlockReaderType type)
6
- : manager(manager), type(type), next_pointer(FromDiskPointer(pointer)), has_next_block(true), index(0), offset(0),
7
- next_offset(pointer.offset), capacity(0) {
5
+ MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
6
+ optional_ptr<vector<MetaBlockPointer>> read_pointers_p, BlockReaderType type)
7
+ : manager(manager), type(type), next_pointer(FromDiskPointer(pointer)), has_next_block(true),
8
+ read_pointers(read_pointers_p), index(0), offset(0), next_offset(pointer.offset), capacity(0) {
9
+ if (read_pointers) {
10
+ D_ASSERT(read_pointers->empty());
11
+ read_pointers->push_back(pointer);
12
+ }
8
13
  }
9
14
 
10
15
  MetadataReader::MetadataReader(MetadataManager &manager, BlockPointer pointer)
@@ -57,6 +62,10 @@ void MetadataReader::ReadNextBlock() {
57
62
  has_next_block = false;
58
63
  } else {
59
64
  next_pointer = FromDiskPointer(MetaBlockPointer(next_block, 0));
65
+ MetaBlockPointer next_block_pointer(next_block, 0);
66
+ if (read_pointers) {
67
+ read_pointers->push_back(next_block_pointer);
68
+ }
60
69
  }
61
70
  if (next_offset < sizeof(block_id_t)) {
62
71
  next_offset = sizeof(block_id_t);
@@ -3,7 +3,9 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- MetadataWriter::MetadataWriter(MetadataManager &manager) : manager(manager), capacity(0), offset(0) {
6
+ MetadataWriter::MetadataWriter(MetadataManager &manager, optional_ptr<vector<MetaBlockPointer>> written_pointers_p)
7
+ : manager(manager), written_pointers(written_pointers_p), capacity(0), offset(0) {
8
+ D_ASSERT(!written_pointers || written_pointers->empty());
7
9
  }
8
10
 
9
11
  MetadataWriter::~MetadataWriter() {
@@ -38,7 +40,8 @@ void MetadataWriter::NextBlock() {
38
40
 
39
41
  // write the block id of the new block to the start of the current block
40
42
  if (capacity > 0) {
41
- Store<idx_t>(manager.GetDiskPointer(new_handle.pointer).block_pointer, BasePtr());
43
+ auto disk_block = manager.GetDiskPointer(new_handle.pointer);
44
+ Store<idx_t>(disk_block.block_pointer, BasePtr());
42
45
  }
43
46
  // now update the block id of the block
44
47
  block = std::move(new_handle);
@@ -46,6 +49,9 @@ void MetadataWriter::NextBlock() {
46
49
  offset = sizeof(idx_t);
47
50
  capacity = MetadataManager::METADATA_BLOCK_SIZE;
48
51
  Store<idx_t>(-1, BasePtr());
52
+ if (written_pointers) {
53
+ written_pointers->push_back(manager.GetDiskPointer(current_pointer));
54
+ }
49
55
  }
50
56
 
51
57
  void MetadataWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) {
@@ -2,6 +2,38 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
+ //===--------------------------------------------------------------------===//
6
+ // PartialBlock
7
+ //===--------------------------------------------------------------------===//
8
+
9
+ PartialBlock::PartialBlock(PartialBlockState state, BlockManager &block_manager,
10
+ const shared_ptr<BlockHandle> &block_handle)
11
+ : state(state), block_manager(block_manager), block_handle(block_handle) {
12
+ }
13
+
14
+ void PartialBlock::AddUninitializedRegion(idx_t start, idx_t end) {
15
+ uninitialized_regions.push_back({start, end});
16
+ }
17
+
18
+ void PartialBlock::FlushInternal(const idx_t free_space_left) {
19
+
20
+ // ensure that we do not leak any data
21
+ if (free_space_left > 0 || !uninitialized_regions.empty()) {
22
+ auto buffer_handle = block_manager.buffer_manager.Pin(block_handle);
23
+
24
+ // memset any uninitialized regions
25
+ for (auto &uninitialized : uninitialized_regions) {
26
+ memset(buffer_handle.Ptr() + uninitialized.start, 0, uninitialized.end - uninitialized.start);
27
+ }
28
+ // memset any free space at the end of the block to 0 prior to writing to disk
29
+ memset(buffer_handle.Ptr() + Storage::BLOCK_SIZE - free_space_left, 0, free_space_left);
30
+ }
31
+ }
32
+
33
+ //===--------------------------------------------------------------------===//
34
+ // PartialBlockManager
35
+ //===--------------------------------------------------------------------===//
36
+
5
37
  PartialBlockManager::PartialBlockManager(BlockManager &block_manager, CheckpointType checkpoint_type,
6
38
  uint32_t max_partial_block_size, uint32_t max_use_count)
7
39
  : block_manager(block_manager), checkpoint_type(checkpoint_type), max_partial_block_size(max_partial_block_size),
@@ -9,9 +41,7 @@ PartialBlockManager::PartialBlockManager(BlockManager &block_manager, Checkpoint
9
41
  }
10
42
  PartialBlockManager::~PartialBlockManager() {
11
43
  }
12
- //===--------------------------------------------------------------------===//
13
- // Partial Blocks
14
- //===--------------------------------------------------------------------===//
44
+
15
45
  PartialBlockAllocation PartialBlockManager::GetBlockAllocation(uint32_t segment_size) {
16
46
  PartialBlockAllocation allocation;
17
47
  allocation.block_manager = &block_manager;
@@ -47,7 +77,7 @@ void PartialBlockManager::AllocateBlock(PartialBlockState &state, uint32_t segme
47
77
  state.block_id = INVALID_BLOCK;
48
78
  }
49
79
  state.block_size = Storage::BLOCK_SIZE;
50
- state.offset_in_block = 0;
80
+ state.offset = 0;
51
81
  state.block_use_count = 1;
52
82
  }
53
83
 
@@ -60,21 +90,22 @@ bool PartialBlockManager::GetPartialBlock(idx_t segment_size, unique_ptr<Partial
60
90
  partial_block = std::move(entry->second);
61
91
  partially_filled_blocks.erase(entry);
62
92
 
63
- D_ASSERT(partial_block->state.offset_in_block > 0);
64
- D_ASSERT(ValueIsAligned(partial_block->state.offset_in_block));
93
+ D_ASSERT(partial_block->state.offset > 0);
94
+ D_ASSERT(ValueIsAligned(partial_block->state.offset));
65
95
  return true;
66
96
  }
67
97
 
68
98
  void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocation) {
69
99
  auto &state = allocation.partial_block->state;
100
+ D_ASSERT(checkpoint_type != CheckpointType::FULL_CHECKPOINT || state.block_id >= 0);
70
101
  if (state.block_use_count < max_use_count) {
71
- auto unaligned_size = allocation.allocation_size + state.offset_in_block;
102
+ auto unaligned_size = allocation.allocation_size + state.offset;
72
103
  auto new_size = AlignValue(unaligned_size);
73
104
  if (new_size != unaligned_size) {
74
105
  // register the uninitialized region so we can correctly initialize it before writing to disk
75
106
  allocation.partial_block->AddUninitializedRegion(unaligned_size, new_size);
76
107
  }
77
- state.offset_in_block = new_size;
108
+ state.offset = new_size;
78
109
  auto new_space_left = state.block_size - new_size;
79
110
  // check if the block is STILL partially filled after adding the segment_size
80
111
  if (new_space_left >= Storage::BLOCK_SIZE - max_partial_block_size) {
@@ -82,7 +113,7 @@ void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocati
82
113
  partially_filled_blocks.insert(make_pair(new_space_left, std::move(allocation.partial_block)));
83
114
  }
84
115
  }
85
- idx_t free_space = state.block_size - state.offset_in_block;
116
+ idx_t free_space = state.block_size - state.offset;
86
117
  auto block_to_free = std::move(allocation.partial_block);
87
118
  if (!block_to_free && partially_filled_blocks.size() > MAX_BLOCK_MAP_SIZE) {
88
119
  // Free the page with the least space free.
@@ -98,10 +129,6 @@ void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocati
98
129
  }
99
130
  }
100
131
 
101
- void PartialBlock::Merge(PartialBlock &other, idx_t offset, idx_t other_size) {
102
- throw InternalException("PartialBlock::Merge not implemented for this block type");
103
- }
104
-
105
132
  void PartialBlockManager::Merge(PartialBlockManager &other) {
106
133
  if (&other == this) {
107
134
  throw InternalException("Cannot merge into itself");
@@ -117,10 +144,10 @@ void PartialBlockManager::Merge(PartialBlockManager &other) {
117
144
  // we can merge this block into an existing block - merge them
118
145
  // merge blocks
119
146
  auto allocation = GetBlockAllocation(used_space);
120
- allocation.partial_block->Merge(*e.second, allocation.state.offset_in_block, used_space);
147
+ allocation.partial_block->Merge(*e.second, allocation.state.offset, used_space);
121
148
 
122
149
  // re-register the partial block
123
- allocation.state.offset_in_block += used_space;
150
+ allocation.state.offset += used_space;
124
151
  RegisterPartialBlock(std::move(allocation));
125
152
  } else {
126
153
  // we cannot merge this block - append it directly to the current block manager
@@ -240,8 +240,7 @@ void SingleFileBlockManager::LoadFreeList() {
240
240
  // no free list
241
241
  return;
242
242
  }
243
-
244
- MetadataReader reader(GetMetadataManager(), free_pointer, BlockReaderType::REGISTER_BLOCKS);
243
+ MetadataReader reader(GetMetadataManager(), free_pointer, nullptr, BlockReaderType::REGISTER_BLOCKS);
245
244
  auto free_list_count = reader.Read<uint64_t>();
246
245
  free_list.clear();
247
246
  for (idx_t i = 0; i < free_list_count; i++) {
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 59;
5
+ const uint64_t VERSION_NUMBER = 60;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;