@nxtedition/rocksdb 11.0.3 → 11.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/binding.cc +133 -122
  2. package/deps/rocksdb/rocksdb/db/column_family_test.cc +15 -7
  3. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  4. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -4
  5. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +11 -7
  6. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +17 -11
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +15 -0
  8. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +155 -0
  9. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +564 -461
  10. package/deps/rocksdb/rocksdb/db/db_follower_test.cc +8 -4
  11. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +40 -24
  12. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +8 -1
  13. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -4
  14. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  15. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -1
  16. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +19 -1
  17. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +20 -16
  18. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +27 -0
  19. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +10 -2
  20. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +85 -0
  21. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +55 -2
  22. package/deps/rocksdb/rocksdb/db/db_test2.cc +231 -0
  23. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  24. package/deps/rocksdb/rocksdb/db/db_test_util.h +10 -1
  25. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +0 -1
  26. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +175 -1
  27. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +64 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -6
  29. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  31. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2 -4
  32. package/deps/rocksdb/rocksdb/db/flush_job.cc +7 -2
  33. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +4 -2
  34. package/deps/rocksdb/rocksdb/db/listener_test.cc +5 -5
  35. package/deps/rocksdb/rocksdb/db/log_writer.cc +12 -3
  36. package/deps/rocksdb/rocksdb/db/memtable.cc +83 -23
  37. package/deps/rocksdb/rocksdb/db/memtable.h +11 -3
  38. package/deps/rocksdb/rocksdb/db/memtable_list.cc +7 -5
  39. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +21 -0
  40. package/deps/rocksdb/rocksdb/db/version_builder.cc +462 -33
  41. package/deps/rocksdb/rocksdb/db/version_builder.h +70 -23
  42. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +95 -207
  43. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +54 -35
  44. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -11
  45. package/deps/rocksdb/rocksdb/db/version_set_test.cc +313 -59
  46. package/deps/rocksdb/rocksdb/db/write_batch.cc +124 -64
  47. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -3
  48. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +1 -1
  49. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +4 -1
  50. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  51. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +4 -32
  52. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -3
  53. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +60 -172
  54. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +57 -2
  55. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +23 -15
  56. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +2 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +1 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +200 -92
  60. package/deps/rocksdb/rocksdb/env/file_system.cc +3 -3
  61. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +124 -23
  62. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +61 -8
  63. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +141 -2
  64. package/deps/rocksdb/rocksdb/file/file_util.cc +17 -2
  65. package/deps/rocksdb/rocksdb/file/file_util.h +10 -0
  66. package/deps/rocksdb/rocksdb/file/filename.cc +11 -3
  67. package/deps/rocksdb/rocksdb/file/filename.h +2 -1
  68. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +18 -0
  69. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +27 -4
  70. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +8 -1
  71. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +8 -13
  72. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -0
  73. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -2
  75. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +2 -1
  76. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +34 -0
  77. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +27 -9
  80. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +2 -0
  81. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +12 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  84. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +29 -1
  85. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +102 -33
  86. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +46 -3
  87. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +4 -0
  88. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -0
  89. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  90. package/deps/rocksdb/rocksdb/options/db_options.cc +15 -1
  91. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  92. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -0
  93. package/deps/rocksdb/rocksdb/options/options_parser.cc +3 -2
  94. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -2
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +75 -35
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +4 -0
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +8 -1
  99. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +40 -15
  100. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +98 -17
  101. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +14 -2
  102. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +21 -91
  103. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +13 -21
  104. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +14 -5
  105. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +62 -53
  106. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +60 -38
  107. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +175 -78
  108. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +65 -36
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +25 -15
  110. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +13 -1
  111. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +18 -4
  112. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  113. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -0
  114. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2 -2
  115. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +47 -18
  116. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +1 -2
  117. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +95 -0
  118. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +26 -15
  119. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +62 -19
  120. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +73 -34
  121. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  122. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +10 -3
  123. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +2 -1
  124. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  125. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +7 -4
  126. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +225 -0
  127. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -1
  128. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  129. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +5 -2
  130. package/index.js +5 -17
  131. package/iterator.js +1 -1
  132. package/package.json +1 -1
  133. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  134. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -26,6 +26,7 @@ struct FileMetaData;
26
26
  class InternalStats;
27
27
  class Version;
28
28
  class VersionSet;
29
+ class VersionEditHandler;
29
30
  class ColumnFamilyData;
30
31
  class CacheReservationManager;
31
32
 
@@ -38,22 +39,80 @@ class VersionBuilder {
38
39
  const ImmutableCFOptions* ioptions, TableCache* table_cache,
39
40
  VersionStorageInfo* base_vstorage, VersionSet* version_set,
40
41
  std::shared_ptr<CacheReservationManager>
41
- file_metadata_cache_res_mgr = nullptr);
42
+ file_metadata_cache_res_mgr = nullptr,
43
+ ColumnFamilyData* cfd = nullptr,
44
+ VersionEditHandler* version_edit_handler = nullptr,
45
+ bool track_found_and_missing_files = false,
46
+ bool allow_incomplete_valid_version = false);
42
47
  ~VersionBuilder();
43
48
 
44
49
  bool CheckConsistencyForNumLevels();
50
+
45
51
  Status Apply(const VersionEdit* edit);
52
+
53
+ // Save the current Version to the provided `vstorage`.
46
54
  Status SaveTo(VersionStorageInfo* vstorage) const;
55
+
56
+ // Load all the table handlers for the current Version in the builder.
47
57
  Status LoadTableHandlers(
48
58
  InternalStats* internal_stats, int max_threads,
49
59
  bool prefetch_index_and_filter_in_cache, bool is_initial_load,
50
60
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
51
61
  size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
52
62
  uint8_t block_protection_bytes_per_key);
53
- uint64_t GetMinOldestBlobFileNumber() const;
63
+
64
+ //============APIs only used by VersionEditHandlerPointInTime ============//
65
+
66
+ // Creates a save point for the Version that has been built so far. Subsequent
67
+ // VersionEdits applied to the builder will not affect the Version in this
68
+ // save point. VersionBuilder currently only supports creating one save point,
69
+ // so when `CreateOrReplaceSavePoint` is called again, the previous save point
70
+ // is cleared. `ClearSavePoint` can be called explicitly to clear
71
+ // the save point too.
72
+ void CreateOrReplaceSavePoint();
73
+
74
+ // The builder can find all the files to build a `Version`. Or if
75
+ // `allow_incomplete_valid_version_` is true and the version history is never
76
+ // edited in an atomic group, and only a suffix of L0 SST files and their
77
+ // associated blob files are missing.
78
+ // From the users' perspective, missing a suffix of L0 files means missing the
79
+ // user's most recently written data. So the remaining available files still
80
+ // presents a valid point in time view, although for some previous time.
81
+ // This validity check result will be cached and reused if the Version is not
82
+ // updated between two validity checks.
83
+ bool ValidVersionAvailable();
84
+
85
+ bool HasMissingFiles() const;
86
+
87
+ // When applying a sequence of VersionEdit, intermediate files are the ones
88
+ // that are added and then deleted. The caller should clear this intermediate
89
+ // files tracking after calling this API. So that the tracking for subsequent
90
+ // VersionEdits can start over with a clean state.
91
+ std::vector<std::string>& GetAndClearIntermediateFiles();
92
+
93
+ // Clearing all the found files in this Version.
94
+ void ClearFoundFiles();
95
+
96
+ // Save the Version in the save point to the provided `vstorage`.
97
+ // Non-OK status will be returned if there is not a valid save point.
98
+ Status SaveSavePointTo(VersionStorageInfo* vstorage) const;
99
+
100
+ // Load all the table handlers for the Version in the save point.
101
+ // Non-OK status will be returned if there is not a valid save point.
102
+ Status LoadSavePointTableHandlers(
103
+ InternalStats* internal_stats, int max_threads,
104
+ bool prefetch_index_and_filter_in_cache, bool is_initial_load,
105
+ const std::shared_ptr<const SliceTransform>& prefix_extractor,
106
+ size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
107
+ uint8_t block_protection_bytes_per_key);
108
+
109
+ void ClearSavePoint();
110
+
111
+ //======= End of APIs only used by VersionEditPointInTime==========//
54
112
 
55
113
  private:
56
114
  class Rep;
115
+ std::unique_ptr<Rep> savepoint_;
57
116
  std::unique_ptr<Rep> rep_;
58
117
  };
59
118
 
@@ -62,8 +121,15 @@ class VersionBuilder {
62
121
  // Both of the constructor and destructor need to be called inside DB Mutex.
63
122
  class BaseReferencedVersionBuilder {
64
123
  public:
65
- explicit BaseReferencedVersionBuilder(ColumnFamilyData* cfd);
66
- BaseReferencedVersionBuilder(ColumnFamilyData* cfd, Version* v);
124
+ explicit BaseReferencedVersionBuilder(
125
+ ColumnFamilyData* cfd, VersionEditHandler* version_edit_handler = nullptr,
126
+ bool track_found_and_missing_files = false,
127
+ bool allow_incomplete_valid_version = false);
128
+ BaseReferencedVersionBuilder(
129
+ ColumnFamilyData* cfd, Version* v,
130
+ VersionEditHandler* version_edit_handler = nullptr,
131
+ bool track_found_and_missing_files = false,
132
+ bool allow_incomplete_valid_version = false);
67
133
  ~BaseReferencedVersionBuilder();
68
134
  VersionBuilder* version_builder() const { return version_builder_.get(); }
69
135
 
@@ -71,23 +137,4 @@ class BaseReferencedVersionBuilder {
71
137
  std::unique_ptr<VersionBuilder> version_builder_;
72
138
  Version* version_;
73
139
  };
74
-
75
- class NewestFirstBySeqNo {
76
- public:
77
- bool operator()(const FileMetaData* lhs, const FileMetaData* rhs) const {
78
- assert(lhs);
79
- assert(rhs);
80
-
81
- if (lhs->fd.largest_seqno != rhs->fd.largest_seqno) {
82
- return lhs->fd.largest_seqno > rhs->fd.largest_seqno;
83
- }
84
-
85
- if (lhs->fd.smallest_seqno != rhs->fd.smallest_seqno) {
86
- return lhs->fd.smallest_seqno > rhs->fd.smallest_seqno;
87
- }
88
-
89
- // Break ties by file number
90
- return lhs->fd.GetNumber() > rhs->fd.GetNumber();
91
- }
92
- };
93
140
  } // namespace ROCKSDB_NAMESPACE
@@ -155,6 +155,7 @@ VersionEditHandler::VersionEditHandler(
155
155
  VersionSet* version_set, bool track_found_and_missing_files,
156
156
  bool no_error_if_files_missing, const std::shared_ptr<IOTracer>& io_tracer,
157
157
  const ReadOptions& read_options, bool skip_load_table_files,
158
+ bool allow_incomplete_valid_version,
158
159
  EpochNumberRequirement epoch_number_requirement)
159
160
  : VersionEditHandlerBase(read_options),
160
161
  read_only_(read_only),
@@ -165,6 +166,7 @@ VersionEditHandler::VersionEditHandler(
165
166
  io_tracer_(io_tracer),
166
167
  skip_load_table_files_(skip_load_table_files),
167
168
  initialized_(false),
169
+ allow_incomplete_valid_version_(allow_incomplete_valid_version),
168
170
  epoch_number_requirement_(epoch_number_requirement) {
169
171
  assert(version_set_ != nullptr);
170
172
  }
@@ -218,15 +220,15 @@ Status VersionEditHandler::ApplyVersionEdit(VersionEdit& edit,
218
220
 
219
221
  Status VersionEditHandler::OnColumnFamilyAdd(VersionEdit& edit,
220
222
  ColumnFamilyData** cfd) {
221
- bool cf_in_not_found = false;
223
+ bool do_not_open_cf = false;
222
224
  bool cf_in_builders = false;
223
- CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders);
225
+ CheckColumnFamilyId(edit, &do_not_open_cf, &cf_in_builders);
224
226
 
225
227
  assert(cfd != nullptr);
226
228
  *cfd = nullptr;
227
229
  const std::string& cf_name = edit.GetColumnFamilyName();
228
230
  Status s;
229
- if (cf_in_builders || cf_in_not_found) {
231
+ if (cf_in_builders || do_not_open_cf) {
230
232
  s = Status::Corruption("MANIFEST adding the same column family twice: " +
231
233
  cf_name);
232
234
  }
@@ -239,7 +241,7 @@ Status VersionEditHandler::OnColumnFamilyAdd(VersionEdit& edit,
239
241
  cf_name.compare(kPersistentStatsColumnFamilyName) == 0;
240
242
  if (cf_options == name_to_options_.end() &&
241
243
  !is_persistent_stats_column_family) {
242
- column_families_not_found_.emplace(edit.GetColumnFamily(), cf_name);
244
+ do_not_open_column_families_.emplace(edit.GetColumnFamily(), cf_name);
243
245
  } else {
244
246
  if (is_persistent_stats_column_family) {
245
247
  ColumnFamilyOptions cfo;
@@ -256,9 +258,9 @@ Status VersionEditHandler::OnColumnFamilyAdd(VersionEdit& edit,
256
258
 
257
259
  Status VersionEditHandler::OnColumnFamilyDrop(VersionEdit& edit,
258
260
  ColumnFamilyData** cfd) {
259
- bool cf_in_not_found = false;
261
+ bool do_not_open_cf = false;
260
262
  bool cf_in_builders = false;
261
- CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders);
263
+ CheckColumnFamilyId(edit, &do_not_open_cf, &cf_in_builders);
262
264
 
263
265
  assert(cfd != nullptr);
264
266
  *cfd = nullptr;
@@ -266,8 +268,8 @@ Status VersionEditHandler::OnColumnFamilyDrop(VersionEdit& edit,
266
268
  Status s;
267
269
  if (cf_in_builders) {
268
270
  tmp_cfd = DestroyCfAndCleanup(edit);
269
- } else if (cf_in_not_found) {
270
- column_families_not_found_.erase(edit.GetColumnFamily());
271
+ } else if (do_not_open_cf) {
272
+ do_not_open_column_families_.erase(edit.GetColumnFamily());
271
273
  } else {
272
274
  s = Status::Corruption("MANIFEST - dropping non-existing column family");
273
275
  }
@@ -288,22 +290,20 @@ Status VersionEditHandler::OnWalDeletion(VersionEdit& edit) {
288
290
 
289
291
  Status VersionEditHandler::OnNonCfOperation(VersionEdit& edit,
290
292
  ColumnFamilyData** cfd) {
291
- bool cf_in_not_found = false;
293
+ bool do_not_open_cf = false;
292
294
  bool cf_in_builders = false;
293
- CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders);
295
+ CheckColumnFamilyId(edit, &do_not_open_cf, &cf_in_builders);
294
296
 
295
297
  assert(cfd != nullptr);
296
298
  *cfd = nullptr;
297
299
  Status s;
298
- if (!cf_in_not_found) {
300
+ if (!do_not_open_cf) {
299
301
  if (!cf_in_builders) {
300
302
  s = Status::Corruption(
301
303
  "MANIFEST record referencing unknown column family");
302
304
  }
303
305
  ColumnFamilyData* tmp_cfd = nullptr;
304
306
  if (s.ok()) {
305
- auto builder_iter = builders_.find(edit.GetColumnFamily());
306
- assert(builder_iter != builders_.end());
307
307
  tmp_cfd = version_set_->GetColumnFamilySet()->GetColumnFamily(
308
308
  edit.GetColumnFamily());
309
309
  assert(tmp_cfd != nullptr);
@@ -318,56 +318,33 @@ Status VersionEditHandler::OnNonCfOperation(VersionEdit& edit,
318
318
  if (!s.ok()) {
319
319
  return s;
320
320
  }
321
- s = MaybeCreateVersion(edit, tmp_cfd, /*force_create_version=*/false);
322
- if (s.ok()) {
323
- s = builder_iter->second->version_builder()->Apply(&edit);
324
- }
321
+ s = MaybeCreateVersionBeforeApplyEdit(edit, tmp_cfd,
322
+ /*force_create_version=*/false);
325
323
  }
326
324
  *cfd = tmp_cfd;
327
325
  }
328
326
  return s;
329
327
  }
330
328
 
331
- // TODO maybe cache the computation result
332
- bool VersionEditHandler::HasMissingFiles() const {
333
- bool ret = false;
334
- for (const auto& elem : cf_to_missing_files_) {
335
- const auto& missing_files = elem.second;
336
- if (!missing_files.empty()) {
337
- ret = true;
338
- break;
339
- }
340
- }
341
- if (!ret) {
342
- for (const auto& elem : cf_to_missing_blob_files_high_) {
343
- if (elem.second != kInvalidBlobFileNumber) {
344
- ret = true;
345
- break;
346
- }
347
- }
348
- }
349
- return ret;
350
- }
351
-
352
329
  void VersionEditHandler::CheckColumnFamilyId(const VersionEdit& edit,
353
- bool* cf_in_not_found,
330
+ bool* do_not_open_cf,
354
331
  bool* cf_in_builders) const {
355
- assert(cf_in_not_found != nullptr);
332
+ assert(do_not_open_cf != nullptr);
356
333
  assert(cf_in_builders != nullptr);
357
334
  // Not found means that user didn't supply that column
358
335
  // family option AND we encountered column family add
359
336
  // record. Once we encounter column family drop record,
360
337
  // we will delete the column family from
361
- // column_families_not_found.
338
+ // do_not_open_column_families_.
362
339
  uint32_t cf_id = edit.GetColumnFamily();
363
- bool in_not_found = column_families_not_found_.find(cf_id) !=
364
- column_families_not_found_.end();
340
+ bool in_do_not_open = do_not_open_column_families_.find(cf_id) !=
341
+ do_not_open_column_families_.end();
365
342
  // in builders means that user supplied that column family
366
343
  // option AND that we encountered column family add record
367
344
  bool in_builders = builders_.find(cf_id) != builders_.end();
368
345
  // They cannot both be true
369
- assert(!(in_not_found && in_builders));
370
- *cf_in_not_found = in_not_found;
346
+ assert(!(in_do_not_open && in_builders));
347
+ *do_not_open_cf = in_do_not_open;
371
348
  *cf_in_builders = in_builders;
372
349
  }
373
350
 
@@ -396,9 +373,9 @@ void VersionEditHandler::CheckIterationResult(const log::Reader& reader,
396
373
  // There were some column families in the MANIFEST that weren't specified
397
374
  // in the argument. This is OK in read_only mode
398
375
  if (s->ok() && MustOpenAllColumnFamilies() &&
399
- !column_families_not_found_.empty()) {
376
+ !do_not_open_column_families_.empty()) {
400
377
  std::string msg;
401
- for (const auto& cf : column_families_not_found_) {
378
+ for (const auto& cf : do_not_open_column_families_) {
402
379
  msg.append(", ");
403
380
  msg.append(cf.second);
404
381
  }
@@ -453,7 +430,8 @@ void VersionEditHandler::CheckIterationResult(const log::Reader& reader,
453
430
  }
454
431
  assert(cfd->initialized());
455
432
  VersionEdit edit;
456
- *s = MaybeCreateVersion(edit, cfd, /*force_create_version=*/true);
433
+ *s = MaybeCreateVersionBeforeApplyEdit(edit, cfd,
434
+ /*force_create_version=*/true);
457
435
  if (!s->ok()) {
458
436
  break;
459
437
  }
@@ -498,13 +476,9 @@ ColumnFamilyData* VersionEditHandler::CreateCfAndInit(
498
476
  assert(cfd != nullptr);
499
477
  cfd->set_initialized();
500
478
  assert(builders_.find(cf_id) == builders_.end());
501
- builders_.emplace(cf_id,
502
- VersionBuilderUPtr(new BaseReferencedVersionBuilder(cfd)));
503
- if (track_found_and_missing_files_) {
504
- cf_to_found_files_.emplace(cf_id, std::unordered_set<uint64_t>());
505
- cf_to_missing_files_.emplace(cf_id, std::unordered_set<uint64_t>());
506
- cf_to_missing_blob_files_high_.emplace(cf_id, kInvalidBlobFileNumber);
507
- }
479
+ builders_.emplace(cf_id, VersionBuilderUPtr(new BaseReferencedVersionBuilder(
480
+ cfd, this, track_found_and_missing_files_,
481
+ allow_incomplete_valid_version_)));
508
482
  return cfd;
509
483
  }
510
484
 
@@ -514,21 +488,6 @@ ColumnFamilyData* VersionEditHandler::DestroyCfAndCleanup(
514
488
  auto builder_iter = builders_.find(cf_id);
515
489
  assert(builder_iter != builders_.end());
516
490
  builders_.erase(builder_iter);
517
- if (track_found_and_missing_files_) {
518
- auto found_files_iter = cf_to_found_files_.find(cf_id);
519
- assert(found_files_iter != cf_to_found_files_.end());
520
- cf_to_found_files_.erase(found_files_iter);
521
-
522
- auto missing_files_iter = cf_to_missing_files_.find(cf_id);
523
- assert(missing_files_iter != cf_to_missing_files_.end());
524
- cf_to_missing_files_.erase(missing_files_iter);
525
-
526
- auto missing_blob_files_high_iter =
527
- cf_to_missing_blob_files_high_.find(cf_id);
528
- assert(missing_blob_files_high_iter !=
529
- cf_to_missing_blob_files_high_.end());
530
- cf_to_missing_blob_files_high_.erase(missing_blob_files_high_iter);
531
- }
532
491
  ColumnFamilyData* ret =
533
492
  version_set_->GetColumnFamilySet()->GetColumnFamily(cf_id);
534
493
  assert(ret != nullptr);
@@ -538,15 +497,14 @@ ColumnFamilyData* VersionEditHandler::DestroyCfAndCleanup(
538
497
  return ret;
539
498
  }
540
499
 
541
- Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/,
542
- ColumnFamilyData* cfd,
543
- bool force_create_version) {
500
+ Status VersionEditHandler::MaybeCreateVersionBeforeApplyEdit(
501
+ const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) {
544
502
  assert(cfd->initialized());
545
503
  Status s;
504
+ auto builder_iter = builders_.find(cfd->GetID());
505
+ assert(builder_iter != builders_.end());
506
+ auto* builder = builder_iter->second->version_builder();
546
507
  if (force_create_version) {
547
- auto builder_iter = builders_.find(cfd->GetID());
548
- assert(builder_iter != builders_.end());
549
- auto* builder = builder_iter->second->version_builder();
550
508
  auto* v = new Version(cfd, version_set_, version_set_->file_options_,
551
509
  *cfd->GetLatestMutableCFOptions(), io_tracer_,
552
510
  version_set_->current_version_number_++,
@@ -562,6 +520,7 @@ Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/,
562
520
  delete v;
563
521
  }
564
522
  }
523
+ s = builder->Apply(&edit);
565
524
  return s;
566
525
  }
567
526
 
@@ -731,12 +690,13 @@ Status VersionEditHandler::MaybeHandleFileBoundariesForNewFiles(
731
690
  VersionEditHandlerPointInTime::VersionEditHandlerPointInTime(
732
691
  bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
733
692
  VersionSet* version_set, const std::shared_ptr<IOTracer>& io_tracer,
734
- const ReadOptions& read_options,
693
+ const ReadOptions& read_options, bool allow_incomplete_valid_version,
735
694
  EpochNumberRequirement epoch_number_requirement)
736
695
  : VersionEditHandler(read_only, column_families, version_set,
737
696
  /*track_found_and_missing_files=*/true,
738
697
  /*no_error_if_files_missing=*/true, io_tracer,
739
- read_options, epoch_number_requirement) {}
698
+ read_options, allow_incomplete_valid_version,
699
+ epoch_number_requirement) {}
740
700
 
741
701
  VersionEditHandlerPointInTime::~VersionEditHandlerPointInTime() {
742
702
  for (const auto& cfid_and_version : atomic_update_versions_) {
@@ -762,7 +722,8 @@ Status VersionEditHandlerPointInTime::OnAtomicGroupReplayBegin() {
762
722
  assert(!cfd->IsDropped());
763
723
  assert(cfd->initialized());
764
724
  VersionEdit edit;
765
- Status s = MaybeCreateVersion(edit, cfd, true /* force_create_version */);
725
+ Status s = MaybeCreateVersionBeforeApplyEdit(
726
+ edit, cfd, true /* force_create_version */);
766
727
  if (!s.ok()) {
767
728
  return s;
768
729
  }
@@ -824,17 +785,17 @@ void VersionEditHandlerPointInTime::CheckIterationResult(
824
785
  }
825
786
  assert(cfd->initialized());
826
787
  auto v_iter = versions_.find(cfd->GetID());
788
+ auto builder_iter = builders_.find(cfd->GetID());
827
789
  if (v_iter != versions_.end()) {
828
790
  assert(v_iter->second != nullptr);
791
+ assert(builder_iter != builders_.end());
829
792
 
830
793
  version_set_->AppendVersion(cfd, v_iter->second);
831
794
  versions_.erase(v_iter);
832
795
  // Let's clear found_files, since any files in that are part of the
833
796
  // installed Version. Any files that got obsoleted would have already
834
797
  // been moved to intermediate_files_
835
- auto found_files_iter = cf_to_found_files_.find(cfd->GetID());
836
- assert(found_files_iter != cf_to_found_files_.end());
837
- found_files_iter->second.clear();
798
+ builder_iter->second->version_builder()->ClearFoundFiles();
838
799
  }
839
800
  }
840
801
  } else {
@@ -863,147 +824,50 @@ ColumnFamilyData* VersionEditHandlerPointInTime::DestroyCfAndCleanup(
863
824
  return cfd;
864
825
  }
865
826
 
866
- Status VersionEditHandlerPointInTime::MaybeCreateVersion(
827
+ Status VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit(
867
828
  const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) {
868
- TEST_SYNC_POINT("VersionEditHandlerPointInTime::MaybeCreateVersion:Begin1");
869
- TEST_SYNC_POINT("VersionEditHandlerPointInTime::MaybeCreateVersion:Begin2");
829
+ TEST_SYNC_POINT(
830
+ "VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit:"
831
+ "Begin1");
832
+ TEST_SYNC_POINT(
833
+ "VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit:"
834
+ "Begin2");
870
835
  assert(cfd != nullptr);
871
836
  if (!force_create_version) {
872
837
  assert(edit.GetColumnFamily() == cfd->GetID());
873
838
  }
874
- auto found_files_iter = cf_to_found_files_.find(cfd->GetID());
875
- assert(found_files_iter != cf_to_found_files_.end());
876
- std::unordered_set<uint64_t>& found_files = found_files_iter->second;
877
-
878
- auto missing_files_iter = cf_to_missing_files_.find(cfd->GetID());
879
- assert(missing_files_iter != cf_to_missing_files_.end());
880
- std::unordered_set<uint64_t>& missing_files = missing_files_iter->second;
881
-
882
- auto missing_blob_files_high_iter =
883
- cf_to_missing_blob_files_high_.find(cfd->GetID());
884
- assert(missing_blob_files_high_iter != cf_to_missing_blob_files_high_.end());
885
- const uint64_t prev_missing_blob_file_high =
886
- missing_blob_files_high_iter->second;
887
-
888
- VersionBuilder* builder = nullptr;
889
-
890
- if (prev_missing_blob_file_high != kInvalidBlobFileNumber) {
891
- auto builder_iter = builders_.find(cfd->GetID());
892
- assert(builder_iter != builders_.end());
893
- builder = builder_iter->second->version_builder();
894
- assert(builder != nullptr);
895
- }
896
-
897
- // At this point, we have not yet applied the new version edits read from the
898
- // MANIFEST. We check whether we have any missing table and blob files.
899
- const bool prev_has_missing_files =
900
- !missing_files.empty() ||
901
- (prev_missing_blob_file_high != kInvalidBlobFileNumber &&
902
- prev_missing_blob_file_high >= builder->GetMinOldestBlobFileNumber());
903
-
904
- for (const auto& file : edit.GetDeletedFiles()) {
905
- uint64_t file_num = file.second;
906
- auto fiter = missing_files.find(file_num);
907
- if (fiter != missing_files.end()) {
908
- missing_files.erase(fiter);
909
- } else {
910
- fiter = found_files.find(file_num);
911
- // Only mark new files added during this catchup attempt for deletion.
912
- // These files were never installed in VersionStorageInfo.
913
- // Already referenced files that are deleted by a VersionEdit will
914
- // be added to the VersionStorageInfo's obsolete files when the old
915
- // version is dereferenced.
916
- if (fiter != found_files.end()) {
917
- intermediate_files_.emplace_back(
918
- MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_num));
919
- found_files.erase(fiter);
920
- }
921
- }
922
- }
923
-
924
- assert(!cfd->ioptions()->cf_paths.empty());
925
- Status s;
926
- for (const auto& elem : edit.GetNewFiles()) {
927
- int level = elem.first;
928
- const FileMetaData& meta = elem.second;
929
- const FileDescriptor& fd = meta.fd;
930
- uint64_t file_num = fd.GetNumber();
931
- const std::string fpath =
932
- MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_num);
933
- s = VerifyFile(cfd, fpath, level, meta);
934
- if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) {
935
- missing_files.insert(file_num);
936
- if (s.IsCorruption()) {
937
- found_files.insert(file_num);
938
- }
939
- s = Status::OK();
940
- } else if (!s.ok()) {
941
- break;
942
- } else {
943
- found_files.insert(file_num);
944
- }
945
- }
946
-
947
- uint64_t missing_blob_file_num = prev_missing_blob_file_high;
948
- for (const auto& elem : edit.GetBlobFileAdditions()) {
949
- uint64_t file_num = elem.GetBlobFileNumber();
950
- s = VerifyBlobFile(cfd, file_num, elem);
951
- if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) {
952
- missing_blob_file_num = std::max(missing_blob_file_num, file_num);
953
- s = Status::OK();
954
- } else if (!s.ok()) {
955
- break;
956
- }
957
- }
958
-
959
- bool has_missing_blob_files = false;
960
- if (missing_blob_file_num != kInvalidBlobFileNumber &&
961
- missing_blob_file_num >= prev_missing_blob_file_high) {
962
- missing_blob_files_high_iter->second = missing_blob_file_num;
963
- has_missing_blob_files = true;
964
- } else if (missing_blob_file_num < prev_missing_blob_file_high) {
965
- assert(false);
966
- }
967
-
968
- // We still have not applied the new version edit, but have tried to add new
969
- // table and blob files after verifying their presence and consistency.
970
- // Therefore, we know whether we will see new missing table and blob files
971
- // later after actually applying the version edit. We perform the check here
972
- // and record the result.
973
- const bool has_missing_files =
974
- !missing_files.empty() || has_missing_blob_files;
975
839
 
976
840
  bool missing_info = !version_edit_params_.HasLogNumber() ||
977
841
  !version_edit_params_.HasNextFile() ||
978
842
  !version_edit_params_.HasLastSequence();
979
843
 
980
- // Create version before apply edit. The version will represent the state
981
- // before applying the version edit.
844
+ Status s;
845
+ auto builder_iter = builders_.find(cfd->GetID());
846
+ assert(builder_iter != builders_.end());
847
+ VersionBuilder* builder = builder_iter->second->version_builder();
848
+ const bool valid_pit_before_edit = builder->ValidVersionAvailable();
849
+ builder->CreateOrReplaceSavePoint();
850
+ s = builder->Apply(&edit);
851
+ const bool valid_pit_after_edit = builder->ValidVersionAvailable();
852
+
982
853
  // A new version will be created if:
983
854
  // 1) no error has occurred so far, and
984
855
  // 2) log_number_, next_file_number_ and last_sequence_ are known, and
985
856
  // 3) not in an AtomicGroup
986
857
  // 4) any of the following:
987
- // a) no missing file before, but will have missing file(s) after applying
988
- // this version edit.
989
- // b) no missing file after applying the version edit, and the caller
990
- // explicitly request that a new version be created.
858
+ // a) a valid Version is available before applying the edit
859
+ // and a valid Version is not available after the edit.
860
+ // b) a valid Version is available after the edit and the
861
+ // caller explicitly request that a new version be created.
991
862
  if (s.ok() && !missing_info && !in_atomic_group_ &&
992
- ((has_missing_files && !prev_has_missing_files) ||
993
- (!has_missing_files && force_create_version))) {
994
- if (!builder) {
995
- auto builder_iter = builders_.find(cfd->GetID());
996
- assert(builder_iter != builders_.end());
997
- builder = builder_iter->second->version_builder();
998
- assert(builder);
999
- }
1000
-
863
+ ((!valid_pit_after_edit && valid_pit_before_edit) ||
864
+ (valid_pit_after_edit && force_create_version))) {
1001
865
  const MutableCFOptions* cf_opts_ptr = cfd->GetLatestMutableCFOptions();
1002
866
  auto* version = new Version(cfd, version_set_, version_set_->file_options_,
1003
867
  *cf_opts_ptr, io_tracer_,
1004
868
  version_set_->current_version_number_++,
1005
869
  epoch_number_requirement_);
1006
- s = builder->LoadTableHandlers(
870
+ s = builder->LoadSavePointTableHandlers(
1007
871
  cfd->internal_stats(),
1008
872
  version_set_->db_options_->max_file_opening_threads, false, true,
1009
873
  cf_opts_ptr->prefix_extractor, MaxFileSizeForL0MetaPin(*cf_opts_ptr),
@@ -1015,7 +879,7 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
1015
879
  }
1016
880
  return s;
1017
881
  }
1018
- s = builder->SaveTo(version->storage_info());
882
+ s = builder->SaveSavePointTo(version->storage_info());
1019
883
  if (s.ok()) {
1020
884
  if (AtomicUpdateVersionsContains(cfd->GetID())) {
1021
885
  AtomicUpdateVersionsPut(version);
@@ -1038,6 +902,8 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
1038
902
  delete version;
1039
903
  }
1040
904
  }
905
+
906
+ builder->ClearSavePoint();
1041
907
  return s;
1042
908
  }
1043
909
 
@@ -1072,6 +938,15 @@ Status VersionEditHandlerPointInTime::LoadTables(
1072
938
  return Status::OK();
1073
939
  }
1074
940
 
941
+ bool VersionEditHandlerPointInTime::HasMissingFiles() const {
942
+ for (const auto& builder : builders_) {
943
+ if (builder.second->version_builder()->HasMissingFiles()) {
944
+ return true;
945
+ }
946
+ }
947
+ return false;
948
+ }
949
+
1075
950
  bool VersionEditHandlerPointInTime::AtomicUpdateVersionsCompleted() {
1076
951
  return atomic_update_versions_missing_ == 0;
1077
952
  }
@@ -1145,8 +1020,9 @@ Status ManifestTailer::Initialize() {
1145
1020
  Version* base_version = dummy_version->Next();
1146
1021
  assert(base_version);
1147
1022
  base_version->Ref();
1148
- VersionBuilderUPtr new_builder(
1149
- new BaseReferencedVersionBuilder(default_cfd, base_version));
1023
+ VersionBuilderUPtr new_builder(new BaseReferencedVersionBuilder(
1024
+ default_cfd, base_version, this, track_found_and_missing_files_,
1025
+ allow_incomplete_valid_version_));
1150
1026
  builder_iter->second = std::move(new_builder);
1151
1027
 
1152
1028
  initialized_ = true;
@@ -1189,8 +1065,8 @@ Status ManifestTailer::OnColumnFamilyAdd(VersionEdit& edit,
1189
1065
  Version* base_version = dummy_version->Next();
1190
1066
  assert(base_version);
1191
1067
  base_version->Ref();
1192
- VersionBuilderUPtr new_builder(
1193
- new BaseReferencedVersionBuilder(tmp_cfd, base_version));
1068
+ VersionBuilderUPtr new_builder(new BaseReferencedVersionBuilder(
1069
+ tmp_cfd, base_version, this, track_found_and_missing_files_));
1194
1070
  builder_iter->second = std::move(new_builder);
1195
1071
 
1196
1072
  #ifndef NDEBUG
@@ -1213,6 +1089,18 @@ void ManifestTailer::CheckIterationResult(const log::Reader& reader,
1213
1089
  }
1214
1090
  }
1215
1091
 
1092
+ std::vector<std::string> ManifestTailer::GetAndClearIntermediateFiles() {
1093
+ std::vector<std::string> res;
1094
+ for (const auto& builder : builders_) {
1095
+ auto files =
1096
+ builder.second->version_builder()->GetAndClearIntermediateFiles();
1097
+ res.insert(res.end(), std::make_move_iterator(files.begin()),
1098
+ std::make_move_iterator(files.end()));
1099
+ files.erase(files.begin(), files.end());
1100
+ }
1101
+ return res;
1102
+ }
1103
+
1216
1104
  Status ManifestTailer::VerifyFile(ColumnFamilyData* cfd,
1217
1105
  const std::string& fpath, int level,
1218
1106
  const FileMetaData& fmeta) {