@nxtedition/rocksdb 12.1.4 → 12.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/binding.cc +1 -1
  2. package/deps/rocksdb/rocksdb/Makefile +10 -5
  3. package/deps/rocksdb/rocksdb/TARGETS +9 -7
  4. package/deps/rocksdb/rocksdb/cache/cache.cc +15 -11
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +26 -0
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +16 -0
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +6 -0
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +38 -8
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +4 -0
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +11 -0
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +6 -0
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +56 -0
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -9
  16. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +10 -0
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +9 -0
  18. package/deps/rocksdb/rocksdb/db/c.cc +9 -0
  19. package/deps/rocksdb/rocksdb/db/c_test.c +12 -1
  20. package/deps/rocksdb/rocksdb/db/column_family.cc +6 -23
  21. package/deps/rocksdb/rocksdb/db/column_family.h +1 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +4 -5
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -4
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +14 -6
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +19 -16
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +34 -30
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +2 -1
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +1 -1
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +16 -31
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +7 -50
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +95 -84
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +616 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +1 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +8 -2
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +93 -69
  40. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +353 -89
  41. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +4 -3
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +116 -14
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +67 -8
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +42 -14
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +50 -0
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +79 -32
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +36 -59
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +72 -39
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -12
  51. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +75 -0
  52. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -3
  53. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +1 -1
  54. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +24 -0
  55. package/deps/rocksdb/rocksdb/db/db_test2.cc +36 -22
  56. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +23 -0
  57. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2 -0
  58. package/deps/rocksdb/rocksdb/db/error_handler.cc +28 -3
  59. package/deps/rocksdb/rocksdb/db/error_handler.h +2 -1
  60. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  61. package/deps/rocksdb/rocksdb/db/experimental.cc +165 -33
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -5
  63. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +37 -28
  64. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -6
  65. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -6
  66. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -6
  67. package/deps/rocksdb/rocksdb/db/job_context.h +4 -0
  68. package/deps/rocksdb/rocksdb/db/memtable.cc +24 -14
  69. package/deps/rocksdb/rocksdb/db/memtable.h +2 -1
  70. package/deps/rocksdb/rocksdb/db/memtable_list.cc +61 -33
  71. package/deps/rocksdb/rocksdb/db/memtable_list.h +8 -0
  72. package/deps/rocksdb/rocksdb/db/repair.cc +4 -2
  73. package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/version_builder.cc +14 -11
  75. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +20 -4
  76. package/deps/rocksdb/rocksdb/db/version_set.cc +40 -30
  77. package/deps/rocksdb/rocksdb/db/version_set.h +13 -3
  78. package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -76
  79. package/deps/rocksdb/rocksdb/db/write_batch.cc +6 -2
  80. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +1 -1
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2 -1
  84. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +25 -2
  85. package/deps/rocksdb/rocksdb/env/fs_remap.cc +11 -0
  86. package/deps/rocksdb/rocksdb/env/fs_remap.h +5 -0
  87. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +11 -1
  88. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +3 -1
  89. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +20 -1
  90. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +10 -8
  91. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
  92. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +30 -28
  93. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +10 -5
  94. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +3 -1
  95. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +287 -83
  96. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +68 -36
  97. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +8 -0
  98. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  100. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +4 -4
  101. package/deps/rocksdb/rocksdb/options/customizable_test.cc +31 -0
  102. package/deps/rocksdb/rocksdb/options/db_options.cc +14 -0
  103. package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
  104. package/deps/rocksdb/rocksdb/options/options_helper.cc +15 -4
  105. package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
  106. package/deps/rocksdb/rocksdb/options/options_parser.cc +5 -4
  107. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -1
  108. package/deps/rocksdb/rocksdb/options/options_test.cc +38 -45
  109. package/deps/rocksdb/rocksdb/port/port.h +16 -0
  110. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +8 -1
  111. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +10 -20
  112. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -9
  113. package/deps/rocksdb/rocksdb/table/format.cc +32 -4
  114. package/deps/rocksdb/rocksdb/table/format.h +12 -1
  115. package/deps/rocksdb/rocksdb/table/iterator.cc +4 -0
  116. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +214 -161
  117. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +4 -2
  118. package/deps/rocksdb/rocksdb/table/table_properties.cc +4 -0
  119. package/deps/rocksdb/rocksdb/table/table_reader.h +2 -2
  120. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -4
  121. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
  122. package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -0
  123. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -2
  124. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -22
  125. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -0
  126. package/deps/rocksdb/rocksdb/util/async_file_reader.h +1 -1
  127. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +3 -0
  128. package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -2
  129. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +3 -3
  130. package/package.json +1 -1
  131. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  132. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -466,14 +466,27 @@ struct CompactionServiceJobInfo {
466
466
 
467
467
  Env::Priority priority;
468
468
 
469
+ // Additional Compaction Details that can be useful in the CompactionService
470
+ CompactionReason compaction_reason;
471
+ bool is_full_compaction;
472
+ bool is_manual_compaction;
473
+ bool bottommost_level;
474
+
469
475
  CompactionServiceJobInfo(std::string db_name_, std::string db_id_,
470
476
  std::string db_session_id_, uint64_t job_id_,
471
- Env::Priority priority_)
477
+ Env::Priority priority_,
478
+ CompactionReason compaction_reason_,
479
+ bool is_full_compaction_, bool is_manual_compaction_,
480
+ bool bottommost_level_)
472
481
  : db_name(std::move(db_name_)),
473
482
  db_id(std::move(db_id_)),
474
483
  db_session_id(std::move(db_session_id_)),
475
484
  job_id(job_id_),
476
- priority(priority_) {}
485
+ priority(priority_),
486
+ compaction_reason(compaction_reason_),
487
+ is_full_compaction(is_full_compaction_),
488
+ is_manual_compaction(is_manual_compaction_),
489
+ bottommost_level(bottommost_level_) {}
477
490
  };
478
491
 
479
492
  struct CompactionServiceScheduleResponse {
@@ -1380,16 +1393,35 @@ struct DBOptions {
1380
1393
  // ReadOptions::background_purge_on_iterator_cleanup.
1381
1394
  bool avoid_unnecessary_blocking_io = false;
1382
1395
 
1383
- // Historically DB ID has always been stored in Identity File in DB folder.
1384
- // If this flag is true, the DB ID is written to Manifest file in addition
1385
- // to the Identity file. By doing this 2 problems are solved
1386
- // 1. We don't checksum the Identity file where as Manifest file is.
1387
- // 2. Since the source of truth for DB is Manifest file DB ID will sit with
1388
- // the source of truth. Previously the Identity file could be copied
1389
- // independent of Manifest and that can result in wrong DB ID.
1390
- // We recommend setting this flag to true.
1391
- // Default: false
1392
- bool write_dbid_to_manifest = false;
1396
+ // The DB unique ID can be saved in the DB manifest (preferred, this option)
1397
+ // or an IDENTITY file (historical, deprecated), or both. If this option is
1398
+ // set to false (old behavior), then write_identity_file must be set to true.
1399
+ // The manifest is preferred because
1400
+ // 1. The IDENTITY file is not checksummed, so it is not as safe against
1401
+ // corruption.
1402
+ // 2. The IDENTITY file may or may not be copied with the DB (e.g. not
1403
+ // copied by BackupEngine), so is not reliable for the provenance of a DB.
1404
+ // This option might eventually be obsolete and removed as Identity files
1405
+ // are phased out.
1406
+ bool write_dbid_to_manifest = true;
1407
+
1408
+ // It is expected that the Identity file will be obsoleted by recording
1409
+ // DB ID in the manifest (see write_dbid_to_manifest). Setting this to true
1410
+ // maintains the historical behavior of writing an Identity file, while
1411
+ // setting to false is expected to be the future default. This option might
1412
+ // eventually be obsolete and removed as Identity files are phased out.
1413
+ bool write_identity_file = true;
1414
+
1415
+ // Historically, when prefix_extractor != nullptr, iterators have an
1416
+ // unfortunate default semantics of *possibly* only returning data
1417
+ // within the same prefix. To avoid "spooky action at a distance," iterator
1418
+ // bounds should come from the instantiation or seeking of the iterator,
1419
+ // not from a mutable column family option.
1420
+ //
1421
+ // When set to true, it is as if every iterator is created with
1422
+ // total_order_seek=true and only auto_prefix_mode=true and
1423
+ // prefix_same_as_start=true can take advantage of prefix seek optimizations.
1424
+ bool prefix_seek_opt_in_only = false;
1393
1425
 
1394
1426
  // The number of bytes to prefetch when reading the log. This is mostly useful
1395
1427
  // for reading a remotely located log, as it can save the number of
@@ -1840,10 +1872,10 @@ struct ReadOptions {
1840
1872
  bool auto_prefix_mode = false;
1841
1873
 
1842
1874
  // Enforce that the iterator only iterates over the same prefix as the seek.
1843
- // This option is effective only for prefix seeks, i.e. prefix_extractor is
1844
- // non-null for the column family and total_order_seek is false. Unlike
1845
- // iterate_upper_bound, prefix_same_as_start only works within a prefix
1846
- // but in both directions.
1875
+ // This makes the iterator bounds dependent on the column family's current
1876
+ // prefix_extractor, which is mutable. When SST files have been built with
1877
+ // the same prefix extractor, prefix filtering optimizations will be used
1878
+ // for both Seek and SeekForPrev.
1847
1879
  bool prefix_same_as_start = false;
1848
1880
 
1849
1881
  // Keep the blocks loaded by the iterator pinned in memory as long as the
@@ -2131,8 +2163,16 @@ struct CompactRangeOptions {
2131
2163
  // IngestExternalFileOptions is used by IngestExternalFile()
2132
2164
  struct IngestExternalFileOptions {
2133
2165
  // Can be set to true to move the files instead of copying them.
2166
+ // The input files will be unlinked after successful ingestion.
2167
+ // The implementation depends on hard links (LinkFile) instead of traditional
2168
+ // move (RenameFile) to maximize the chances to restore to the original
2169
+ // state upon failure.
2134
2170
  bool move_files = false;
2135
- // If set to true, ingestion falls back to copy when move fails.
2171
+ // Same as move_files except that input files will NOT be unlinked.
2172
+ // Only one of `move_files` and `link_files` can be set at the same time.
2173
+ bool link_files = false;
2174
+ // If set to true, ingestion falls back to copy when hard linking fails.
2175
+ // This applies to both `move_files` and `link_files`.
2136
2176
  bool failed_move_fall_back_to_copy = true;
2137
2177
  // If set to false, an ingested file keys could appear in existing snapshots
2138
2178
  // that where created before the file was ingested.
@@ -2204,22 +2244,17 @@ struct IngestExternalFileOptions {
2204
2244
  // XXX: "bottommost" is obsolete/confusing terminology to refer to last level
2205
2245
  bool fail_if_not_bottommost_level = false;
2206
2246
  // EXPERIMENTAL
2207
- // If set to true, ingestion will
2208
- // - allow the files to not be generated by SstFileWriter, and
2209
- // - ignore cf_id mismatch between cf_id in the files and the CF they are
2210
- // being ingested into.
2211
- //
2212
- // REQUIRES:
2213
- // - files to be ingested do not overlap with existing keys.
2214
- // - write_global_seqno = false
2215
- // - move_files = false
2216
- //
2217
- // Warning: This ONLY works for SST files where all keys have sequence number
2218
- // zero and with no duplicated user keys (this should be guaranteed if the
2219
- // file is generated by a DB with zero as the largest sequence number).
2220
- // We scan the entire SST files to validate sequence numbers.
2221
- // Warning: If a DB contains ingested files generated by another DB/CF,
2222
- // RepairDB() may not correctly recover these files. It may lose these files.
2247
+ // Enables ingestion of files not generated by SstFileWriter. When true:
2248
+ // - Allows files to be ingested when their cf_id doesn't match the CF they
2249
+ // are being ingested into.
2250
+ // REQUIREMENTS:
2251
+ // - Ingested files must not overlap with existing keys.
2252
+ // - `write_global_seqno` must be false.
2253
+ // - All keys in ingested files should have sequence number 0. We fail
2254
+ // ingestion if any sequence numbers is non-zero.
2255
+ // WARNING: If a DB contains ingested files generated by another DB/CF,
2256
+ // RepairDB() may not recover these files correctly, potentially leading to
2257
+ // data loss.
2223
2258
  bool allow_db_generated_files = false;
2224
2259
  };
2225
2260
 
@@ -2284,9 +2319,6 @@ struct SizeApproximationOptions {
2284
2319
  };
2285
2320
 
2286
2321
  struct CompactionServiceOptionsOverride {
2287
- // Currently pointer configurations are not passed to compaction service
2288
- // compaction so the user needs to set it. It will be removed once pointer
2289
- // configuration passing is supported.
2290
2322
  Env* env = Env::Default();
2291
2323
  std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory = nullptr;
2292
2324
 
@@ -74,6 +74,7 @@ struct TablePropertiesNames {
74
74
  static const std::string kSequenceNumberTimeMapping;
75
75
  static const std::string kTailStartOffset;
76
76
  static const std::string kUserDefinedTimestampsPersisted;
77
+ static const std::string kKeyLargestSeqno;
77
78
  };
78
79
 
79
80
  // `TablePropertiesCollector` provides the mechanism for users to collect
@@ -134,6 +135,7 @@ class TablePropertiesCollector {
134
135
 
135
136
  // Return the human-readable properties, where the key is property name and
136
137
  // the value is the human-readable form of value.
138
+ // Returned properties are used for logging.
137
139
  // It will only be called after Finish() has been called by RocksDB internal.
138
140
  virtual UserCollectedProperties GetReadableProperties() const = 0;
139
141
 
@@ -292,6 +294,12 @@ struct TableProperties {
292
294
  // it's explicitly written to meta properties block.
293
295
  uint64_t user_defined_timestamps_persisted = 1;
294
296
 
297
+ // The largest sequence number of keys in this file.
298
+ // UINT64_MAX means unknown.
299
+ // Only written to properties block if known (should be known unless the
300
+ // table is empty).
301
+ uint64_t key_largest_seqno = UINT64_MAX;
302
+
295
303
  // DB identity
296
304
  // db_id is an identifier generated the first time the DB is created
297
305
  // If DB identity is unset or unassigned, `db_id` will be an empty string.
@@ -74,6 +74,7 @@ class LDBCommand {
74
74
  static const std::string ARG_DECODE_BLOB_INDEX;
75
75
  static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
76
76
  static const std::string ARG_READ_TIMESTAMP;
77
+ static const std::string ARG_GET_WRITE_UNIX_TIME;
77
78
 
78
79
  struct ParsedParams {
79
80
  std::string cmd;
@@ -12,8 +12,8 @@
12
12
  // NOTE: in 'main' development branch, this should be the *next*
13
13
  // minor or major version number planned for release.
14
14
  #define ROCKSDB_MAJOR 9
15
- #define ROCKSDB_MINOR 6
16
- #define ROCKSDB_PATCH 1
15
+ #define ROCKSDB_MINOR 7
16
+ #define ROCKSDB_PATCH 4
17
17
 
18
18
  // Do not use these. We made the mistake of declaring macros starting with
19
19
  // double underscore. Now we have to live with our choice. We'll deprecate these
@@ -969,12 +969,12 @@ bool InlineSkipList<Comparator>::Insert(const char* key, Splice* splice,
969
969
  while (true) {
970
970
  // Checking for duplicate keys on the level 0 is sufficient
971
971
  if (UNLIKELY(i == 0 && splice->next_[i] != nullptr &&
972
- compare_(x->Key(), splice->next_[i]->Key()) >= 0)) {
972
+ compare_(splice->next_[i]->Key(), key_decoded) <= 0)) {
973
973
  // duplicate key
974
974
  return false;
975
975
  }
976
976
  if (UNLIKELY(i == 0 && splice->prev_[i] != head_ &&
977
- compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) {
977
+ compare_(splice->prev_[i]->Key(), key_decoded) >= 0)) {
978
978
  // duplicate key
979
979
  return false;
980
980
  }
@@ -1012,12 +1012,12 @@ bool InlineSkipList<Comparator>::Insert(const char* key, Splice* splice,
1012
1012
  }
1013
1013
  // Checking for duplicate keys on the level 0 is sufficient
1014
1014
  if (UNLIKELY(i == 0 && splice->next_[i] != nullptr &&
1015
- compare_(x->Key(), splice->next_[i]->Key()) >= 0)) {
1015
+ compare_(splice->next_[i]->Key(), key_decoded) <= 0)) {
1016
1016
  // duplicate key
1017
1017
  return false;
1018
1018
  }
1019
1019
  if (UNLIKELY(i == 0 && splice->prev_[i] != head_ &&
1020
- compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) {
1020
+ compare_(splice->prev_[i]->Key(), key_decoded) >= 0)) {
1021
1021
  // duplicate key
1022
1022
  return false;
1023
1023
  }
@@ -1396,6 +1396,19 @@ class MockFilterPolicy : public FilterPolicy {
1396
1396
  }
1397
1397
  };
1398
1398
 
1399
+ class MockCache : public CacheWrapper {
1400
+ public:
1401
+ static const char* kClassName() { return "MockCache"; }
1402
+ const char* Name() const override { return kClassName(); }
1403
+
1404
+ MockCache()
1405
+ : CacheWrapper(NewLRUCache(LRUCacheOptions(100, 0, false, 0.0))) {}
1406
+
1407
+ bool IsInstanceOf(const std::string& name) const override {
1408
+ return name.find(Name()) == 0;
1409
+ }
1410
+ };
1411
+
1399
1412
  static int RegisterLocalObjects(ObjectLibrary& library,
1400
1413
  const std::string& /*arg*/) {
1401
1414
  size_t num_types;
@@ -1519,6 +1532,15 @@ static int RegisterLocalObjects(ObjectLibrary& library,
1519
1532
  return guard->get();
1520
1533
  });
1521
1534
 
1535
+ library.AddFactory<Cache>(
1536
+ ObjectLibrary::PatternEntry(MockCache::kClassName())
1537
+ .AddSeparator("://", /*at_least_one=*/false),
1538
+ [](const std::string& /*uri*/, std::unique_ptr<Cache>* guard,
1539
+ std::string* /* errmsg */) {
1540
+ guard->reset(new MockCache());
1541
+ return guard->get();
1542
+ });
1543
+
1522
1544
  return static_cast<int>(library.GetFactoryCount(&num_types));
1523
1545
  }
1524
1546
  } // namespace
@@ -2111,6 +2133,15 @@ TEST_F(LoadCustomizableTest, LoadFlushBlockPolicyFactoryTest) {
2111
2133
  }
2112
2134
  }
2113
2135
 
2136
+ TEST_F(LoadCustomizableTest, LoadCacheTest) {
2137
+ if (RegisterTests("Test")) {
2138
+ std::string uri(MockCache::kClassName());
2139
+ uri.append("://");
2140
+ auto cache = ExpectCreateShared<Cache>(uri);
2141
+ ASSERT_TRUE(cache->IsInstanceOf(MockCache::kClassName()));
2142
+ }
2143
+ }
2144
+
2114
2145
  } // namespace ROCKSDB_NAMESPACE
2115
2146
  int main(int argc, char** argv) {
2116
2147
  ::testing::InitGoogleTest(&argc, argv);
@@ -403,10 +403,18 @@ static std::unordered_map<std::string, OptionTypeInfo>
403
403
  {offsetof(struct ImmutableDBOptions, avoid_unnecessary_blocking_io),
404
404
  OptionType::kBoolean, OptionVerificationType::kNormal,
405
405
  OptionTypeFlags::kNone}},
406
+ {"prefix_seek_opt_in_only",
407
+ {offsetof(struct ImmutableDBOptions, prefix_seek_opt_in_only),
408
+ OptionType::kBoolean, OptionVerificationType::kNormal,
409
+ OptionTypeFlags::kNone}},
406
410
  {"write_dbid_to_manifest",
407
411
  {offsetof(struct ImmutableDBOptions, write_dbid_to_manifest),
408
412
  OptionType::kBoolean, OptionVerificationType::kNormal,
409
413
  OptionTypeFlags::kNone}},
414
+ {"write_identity_file",
415
+ {offsetof(struct ImmutableDBOptions, write_identity_file),
416
+ OptionType::kBoolean, OptionVerificationType::kNormal,
417
+ OptionTypeFlags::kNone}},
410
418
  {"log_readahead_size",
411
419
  {offsetof(struct ImmutableDBOptions, log_readahead_size),
412
420
  OptionType::kSizeT, OptionVerificationType::kNormal,
@@ -770,8 +778,10 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
770
778
  background_close_inactive_wals(options.background_close_inactive_wals),
771
779
  atomic_flush(options.atomic_flush),
772
780
  avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io),
781
+ prefix_seek_opt_in_only(options.prefix_seek_opt_in_only),
773
782
  persist_stats_to_disk(options.persist_stats_to_disk),
774
783
  write_dbid_to_manifest(options.write_dbid_to_manifest),
784
+ write_identity_file(options.write_identity_file),
775
785
  log_readahead_size(options.log_readahead_size),
776
786
  file_checksum_gen_factory(options.file_checksum_gen_factory),
777
787
  best_efforts_recovery(options.best_efforts_recovery),
@@ -943,10 +953,14 @@ void ImmutableDBOptions::Dump(Logger* log) const {
943
953
  ROCKS_LOG_HEADER(log,
944
954
  " Options.avoid_unnecessary_blocking_io: %d",
945
955
  avoid_unnecessary_blocking_io);
956
+ ROCKS_LOG_HEADER(log, " Options.prefix_seek_opt_in_only: %d",
957
+ prefix_seek_opt_in_only);
946
958
  ROCKS_LOG_HEADER(log, " Options.persist_stats_to_disk: %u",
947
959
  persist_stats_to_disk);
948
960
  ROCKS_LOG_HEADER(log, " Options.write_dbid_to_manifest: %d",
949
961
  write_dbid_to_manifest);
962
+ ROCKS_LOG_HEADER(log, " Options.write_identity_file: %d",
963
+ write_identity_file);
950
964
  ROCKS_LOG_HEADER(
951
965
  log, " Options.log_readahead_size: %" ROCKSDB_PRIszt,
952
966
  log_readahead_size);
@@ -87,8 +87,10 @@ struct ImmutableDBOptions {
87
87
  bool background_close_inactive_wals;
88
88
  bool atomic_flush;
89
89
  bool avoid_unnecessary_blocking_io;
90
+ bool prefix_seek_opt_in_only;
90
91
  bool persist_stats_to_disk;
91
92
  bool write_dbid_to_manifest;
93
+ bool write_identity_file;
92
94
  size_t log_readahead_size;
93
95
  std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
94
96
  bool best_efforts_recovery;
@@ -55,7 +55,13 @@ Status ValidateOptions(const DBOptions& db_opts,
55
55
  DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
56
56
  const MutableDBOptions& mutable_db_options) {
57
57
  DBOptions options;
58
+ BuildDBOptions(immutable_db_options, mutable_db_options, options);
59
+ return options;
60
+ }
58
61
 
62
+ void BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
63
+ const MutableDBOptions& mutable_db_options,
64
+ DBOptions& options) {
59
65
  options.create_if_missing = immutable_db_options.create_if_missing;
60
66
  options.create_missing_column_families =
61
67
  immutable_db_options.create_missing_column_families;
@@ -88,9 +94,6 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
88
94
  options.max_background_jobs = mutable_db_options.max_background_jobs;
89
95
  options.max_background_compactions =
90
96
  mutable_db_options.max_background_compactions;
91
- options.bytes_per_sync = mutable_db_options.bytes_per_sync;
92
- options.wal_bytes_per_sync = mutable_db_options.wal_bytes_per_sync;
93
- options.strict_bytes_per_sync = mutable_db_options.strict_bytes_per_sync;
94
97
  options.max_subcompactions = mutable_db_options.max_subcompactions;
95
98
  options.max_background_flushes = mutable_db_options.max_background_flushes;
96
99
  options.max_log_file_size = immutable_db_options.max_log_file_size;
@@ -127,6 +130,9 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
127
130
  options.writable_file_max_buffer_size =
128
131
  mutable_db_options.writable_file_max_buffer_size;
129
132
  options.use_adaptive_mutex = immutable_db_options.use_adaptive_mutex;
133
+ options.bytes_per_sync = mutable_db_options.bytes_per_sync;
134
+ options.wal_bytes_per_sync = mutable_db_options.wal_bytes_per_sync;
135
+ options.strict_bytes_per_sync = mutable_db_options.strict_bytes_per_sync;
130
136
  options.listeners = immutable_db_options.listeners;
131
137
  options.enable_thread_tracking = immutable_db_options.enable_thread_tracking;
132
138
  options.delayed_write_rate = mutable_db_options.delayed_write_rate;
@@ -161,9 +167,15 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
161
167
  options.two_write_queues = immutable_db_options.two_write_queues;
162
168
  options.manual_wal_flush = immutable_db_options.manual_wal_flush;
163
169
  options.wal_compression = immutable_db_options.wal_compression;
170
+ options.background_close_inactive_wals =
171
+ immutable_db_options.background_close_inactive_wals;
164
172
  options.atomic_flush = immutable_db_options.atomic_flush;
165
173
  options.avoid_unnecessary_blocking_io =
166
174
  immutable_db_options.avoid_unnecessary_blocking_io;
175
+ options.write_dbid_to_manifest = immutable_db_options.write_dbid_to_manifest;
176
+ options.write_identity_file = immutable_db_options.write_identity_file;
177
+ options.prefix_seek_opt_in_only =
178
+ immutable_db_options.prefix_seek_opt_in_only;
167
179
  options.log_readahead_size = immutable_db_options.log_readahead_size;
168
180
  options.file_checksum_gen_factory =
169
181
  immutable_db_options.file_checksum_gen_factory;
@@ -189,7 +201,6 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
189
201
  options.metadata_write_temperature =
190
202
  immutable_db_options.metadata_write_temperature;
191
203
  options.wal_write_temperature = immutable_db_options.wal_write_temperature;
192
- return options;
193
204
  }
194
205
 
195
206
  ColumnFamilyOptions BuildColumnFamilyOptions(
@@ -44,6 +44,10 @@ Status ValidateOptions(const DBOptions& db_opts,
44
44
 
45
45
  DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
46
46
  const MutableDBOptions& mutable_db_options);
47
+ // Overwrites `options`
48
+ void BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
49
+ const MutableDBOptions& mutable_db_options,
50
+ DBOptions& options);
47
51
 
48
52
  ColumnFamilyOptions BuildColumnFamilyOptions(
49
53
  const ColumnFamilyOptions& ioptions,
@@ -296,12 +296,13 @@ Status RocksDBOptionsParser::Parse(const ConfigOptions& config_options_in,
296
296
  return s;
297
297
  }
298
298
 
299
- // If the option file is not generated by a higher minor version,
300
- // there shouldn't be any unknown option.
299
+ // If the option file is not generated by a higher version, unknown
300
+ // option should only mean corruption.
301
301
  if (config_options.ignore_unknown_options &&
302
302
  section == kOptionSectionVersion) {
303
- if (db_version[0] < ROCKSDB_MAJOR || (db_version[0] == ROCKSDB_MAJOR &&
304
- db_version[1] <= ROCKSDB_MINOR)) {
303
+ using VTuple = std::tuple<int, int, int>;
304
+ if (VTuple(db_version[0], db_version[1], db_version[2]) <=
305
+ VTuple(ROCKSDB_MAJOR, ROCKSDB_MINOR, ROCKSDB_PATCH)) {
305
306
  config_options.ignore_unknown_options = false;
306
307
  }
307
308
  }
@@ -271,6 +271,12 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
271
271
  ASSERT_GT(unset_bytes_base, 0);
272
272
  options->~DBOptions();
273
273
 
274
+ // Now also check that BuildDBOptions populates everything
275
+ FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsExcluded);
276
+ BuildDBOptions({}, {}, *options);
277
+ ASSERT_EQ(unset_bytes_base,
278
+ NumUnsetBytes(options_ptr, sizeof(DBOptions), kDBOptionsExcluded));
279
+
274
280
  options = new (options_ptr) DBOptions();
275
281
  FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsExcluded);
276
282
 
@@ -372,7 +378,11 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
372
378
  "follower_catchup_retry_count=456;"
373
379
  "follower_catchup_retry_wait_ms=789;"
374
380
  "metadata_write_temperature=kCold;"
375
- "wal_write_temperature=kHot;",
381
+ "wal_write_temperature=kHot;"
382
+ "background_close_inactive_wals=true;"
383
+ "write_dbid_to_manifest=true;"
384
+ "write_identity_file=true;"
385
+ "prefix_seek_opt_in_only=true;",
376
386
  new_options));
377
387
 
378
388
  ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),
@@ -3449,44 +3449,8 @@ TEST_F(OptionsParserTest, DuplicateCFOptions) {
3449
3449
  }
3450
3450
 
3451
3451
  TEST_F(OptionsParserTest, IgnoreUnknownOptions) {
3452
- for (int case_id = 0; case_id < 5; case_id++) {
3453
- DBOptions db_opt;
3454
- db_opt.max_open_files = 12345;
3455
- db_opt.max_background_flushes = 301;
3456
- db_opt.max_total_wal_size = 1024;
3457
- ColumnFamilyOptions cf_opt;
3458
-
3459
- std::string version_string;
3460
- bool should_ignore = true;
3461
- if (case_id == 0) {
3462
- // same version
3463
- should_ignore = false;
3464
- version_string = std::to_string(ROCKSDB_MAJOR) + "." +
3465
- std::to_string(ROCKSDB_MINOR) + ".0";
3466
- } else if (case_id == 1) {
3467
- // higher minor version
3468
- should_ignore = true;
3469
- version_string = std::to_string(ROCKSDB_MAJOR) + "." +
3470
- std::to_string(ROCKSDB_MINOR + 1) + ".0";
3471
- } else if (case_id == 2) {
3472
- // higher major version.
3473
- should_ignore = true;
3474
- version_string = std::to_string(ROCKSDB_MAJOR + 1) + ".0.0";
3475
- } else if (case_id == 3) {
3476
- // lower minor version
3477
- #if ROCKSDB_MINOR == 0
3478
- continue;
3479
- #else
3480
- version_string = std::to_string(ROCKSDB_MAJOR) + "." +
3481
- std::to_string(ROCKSDB_MINOR - 1) + ".0";
3482
- should_ignore = false;
3483
- #endif
3484
- } else {
3485
- // lower major version
3486
- should_ignore = false;
3487
- version_string = std::to_string(ROCKSDB_MAJOR - 1) + "." +
3488
- std::to_string(ROCKSDB_MINOR) + ".0";
3489
- }
3452
+ auto testCase = [&](bool should_ignore, const std::string& version_string) {
3453
+ SCOPED_TRACE(std::to_string(should_ignore) + ", " + version_string);
3490
3454
 
3491
3455
  std::string options_file_content =
3492
3456
  "# This is a testing option string.\n"
@@ -3519,16 +3483,45 @@ TEST_F(OptionsParserTest, IgnoreUnknownOptions) {
3519
3483
  RocksDBOptionsParser parser;
3520
3484
  ASSERT_NOK(parser.Parse(kTestFileName, fs_.get(), false,
3521
3485
  4096 /* readahead_size */));
3486
+ Status parse_status = parser.Parse(kTestFileName, fs_.get(),
3487
+ true /* ignore_unknown_options */,
3488
+ 4096 /* readahead_size */);
3522
3489
  if (should_ignore) {
3523
- ASSERT_OK(parser.Parse(kTestFileName, fs_.get(),
3524
- true /* ignore_unknown_options */,
3525
- 4096 /* readahead_size */));
3490
+ ASSERT_OK(parse_status);
3526
3491
  } else {
3527
- ASSERT_NOK(parser.Parse(kTestFileName, fs_.get(),
3528
- true /* ignore_unknown_options */,
3529
- 4096 /* readahead_size */));
3492
+ ASSERT_NOK(parse_status);
3530
3493
  }
3531
- }
3494
+ };
3495
+
3496
+ // Same version
3497
+ testCase(false, GetRocksVersionAsString());
3498
+ // Same except .0 patch
3499
+ testCase(false, std::to_string(ROCKSDB_MAJOR) + "." +
3500
+ std::to_string(ROCKSDB_MINOR) + ".0");
3501
+ // Higher major version
3502
+ testCase(true, std::to_string(ROCKSDB_MAJOR + 1) + "." +
3503
+ std::to_string(ROCKSDB_MINOR) + ".0");
3504
+ // Higher minor version
3505
+ testCase(true, std::to_string(ROCKSDB_MAJOR) + "." +
3506
+ std::to_string(ROCKSDB_MINOR + 1) + ".0");
3507
+ // Higher patch version
3508
+ testCase(true, std::to_string(ROCKSDB_MAJOR) + "." +
3509
+ std::to_string(ROCKSDB_MINOR) + "." +
3510
+ std::to_string(ROCKSDB_PATCH + 1));
3511
+ // Lower major version
3512
+ testCase(false, std::to_string(ROCKSDB_MAJOR - 1) + "." +
3513
+ std::to_string(ROCKSDB_MINOR) + ".0");
3514
+ #if ROCKSDB_MINOR > 0
3515
+ // Lower minor version
3516
+ testCase(false, std::to_string(ROCKSDB_MAJOR) + "." +
3517
+ std::to_string(ROCKSDB_MINOR - 1) + ".0");
3518
+ #endif
3519
+ #if ROCKSDB_PATCH > 0
3520
+ // Lower patch version
3521
+ testCase(false, std::to_string(ROCKSDB_MAJOR) + "." +
3522
+ std::to_string(ROCKSDB_MINOR - 1) + "." +
3523
+ std::to_string(ROCKSDB_PATCH - 1));
3524
+ #endif
3532
3525
  }
3533
3526
 
3534
3527
  TEST_F(OptionsParserTest, ParseVersion) {
@@ -19,3 +19,19 @@
19
19
  #elif defined(OS_WIN)
20
20
  #include "port/win/port_win.h"
21
21
  #endif
22
+
23
+ #ifdef OS_LINUX
24
+ // A temporary hook into long-running RocksDB threads to support modifying their
25
+ // priority etc. This should become a public API hook once the requirements
26
+ // are better understood.
27
+ extern "C" void RocksDbThreadYield() __attribute__((__weak__));
28
+ #define ROCKSDB_THREAD_YIELD_HOOK() \
29
+ { \
30
+ if (RocksDbThreadYield) { \
31
+ RocksDbThreadYield(); \
32
+ } \
33
+ }
34
+ #else
35
+ #define ROCKSDB_THREAD_YIELD_HOOK() \
36
+ {}
37
+ #endif
@@ -627,6 +627,9 @@ struct BlockBasedTableBuilder::Rep {
627
627
  if (!ReifyDbHostIdProperty(ioptions.env, &props.db_host_id).ok()) {
628
628
  ROCKS_LOG_INFO(ioptions.logger, "db_host_id property will not be set");
629
629
  }
630
+ // Default is UINT64_MAX for unknown. Setting it to 0 here
631
+ // to allow updating it by taking max in BlockBasedTableBuilder::Add().
632
+ props.key_largest_seqno = 0;
630
633
 
631
634
  if (FormatVersionUsesContextChecksum(table_options.format_version)) {
632
635
  // Must be non-zero and semi- or quasi-random
@@ -1014,7 +1017,10 @@ void BlockBasedTableBuilder::Add(const Slice& ikey, const Slice& value) {
1014
1017
  if (!ok()) {
1015
1018
  return;
1016
1019
  }
1017
- ValueType value_type = ExtractValueType(ikey);
1020
+ ValueType value_type;
1021
+ SequenceNumber seq;
1022
+ UnPackSequenceAndType(ExtractInternalKeyFooter(ikey), &seq, &value_type);
1023
+ r->props.key_largest_seqno = std::max(r->props.key_largest_seqno, seq);
1018
1024
  if (IsValueType(value_type)) {
1019
1025
  #ifndef NDEBUG
1020
1026
  if (r->props.num_entries > r->props.num_range_deletions) {
@@ -1781,6 +1787,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
1781
1787
  rep_->props.user_defined_timestamps_persisted =
1782
1788
  rep_->persist_user_defined_timestamps;
1783
1789
 
1790
+ assert(IsEmpty() || rep_->props.key_largest_seqno != UINT64_MAX);
1784
1791
  // Add basic properties
1785
1792
  property_block_builder.AddTableProperty(rep_->props);
1786
1793