@nxtedition/rocksdb 7.0.4 → 7.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/binding.cc +320 -324
  2. package/chained-batch.js +6 -1
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +8 -3
  4. package/deps/rocksdb/rocksdb/Makefile +10 -4
  5. package/deps/rocksdb/rocksdb/TARGETS +6 -4
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +9 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +14 -0
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +8 -8
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +272 -174
  10. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +201 -57
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +19 -19
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -1
  13. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +170 -0
  14. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +95 -0
  15. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +298 -0
  16. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +172 -0
  17. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -3
  18. package/deps/rocksdb/rocksdb/db/column_family.h +6 -3
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +10 -0
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +6 -6
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +38 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -5
  24. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +4 -7
  25. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -71
  26. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +70 -1
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +13 -12
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +36 -0
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -4
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +139 -91
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +48 -14
  33. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +90 -55
  34. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +9 -4
  35. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -1
  36. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -7
  37. package/deps/rocksdb/rocksdb/db/db_write_test.cc +35 -0
  38. package/deps/rocksdb/rocksdb/db/dbformat.cc +3 -1
  39. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -3
  40. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -1
  41. package/deps/rocksdb/rocksdb/db/memtable.cc +1 -0
  42. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
  43. package/deps/rocksdb/rocksdb/db/repair.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/version_builder.cc +43 -1
  45. package/deps/rocksdb/rocksdb/db/version_edit.cc +13 -5
  46. package/deps/rocksdb/rocksdb/db/version_edit.h +22 -1
  47. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +4 -5
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +109 -41
  49. package/deps/rocksdb/rocksdb/db/version_set.h +36 -3
  50. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -4
  51. package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -10
  52. package/deps/rocksdb/rocksdb/db/version_util.h +1 -1
  53. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -1
  54. package/deps/rocksdb/rocksdb/db/write_batch.cc +34 -10
  55. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +2 -0
  56. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -0
  57. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +2 -0
  58. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -1
  60. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +7 -5
  61. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -10
  62. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -7
  63. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +2 -0
  64. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +24 -3
  65. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  66. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +10 -0
  67. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +5 -0
  68. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
  69. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +9 -5
  70. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +1 -0
  72. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +1 -1
  73. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  74. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -3
  75. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +8 -6
  76. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -2
  78. package/deps/rocksdb/rocksdb/options/options_test.cc +1 -11
  79. package/deps/rocksdb/rocksdb/port/port_posix.h +7 -0
  80. package/deps/rocksdb/rocksdb/port/win/port_win.h +11 -3
  81. package/deps/rocksdb/rocksdb/src.mk +6 -2
  82. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -33
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +3 -3
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -118
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +6 -8
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +10 -13
  87. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +4 -9
  88. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +0 -1
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -28
  90. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -3
  91. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -91
  92. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -30
  93. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -27
  94. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +11 -13
  95. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -40
  96. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +0 -1
  97. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +22 -43
  98. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +11 -22
  99. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +24 -25
  100. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +0 -1
  101. package/deps/rocksdb/rocksdb/table/get_context.h +0 -1
  102. package/deps/rocksdb/rocksdb/table/table_test.cc +3 -18
  103. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +3 -16
  104. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -3
  105. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -1
  106. package/deps/rocksdb/rocksdb/util/bloom_test.cc +0 -201
  107. package/deps/rocksdb/rocksdb/util/distributed_mutex.h +48 -0
  108. package/deps/rocksdb/rocksdb/util/filter_bench.cc +5 -11
  109. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3 -0
  110. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -21
  111. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  112. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +45 -0
  113. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +21 -14
  114. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +10 -1
  115. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +3 -1
  116. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +9 -0
  117. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +3 -2
  118. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -1
  119. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +5 -4
  120. package/deps/rocksdb/rocksdb.gyp +1 -1
  121. package/index.js +36 -14
  122. package/package-lock.json +2 -2
  123. package/package.json +1 -1
  124. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  125. package/prebuilds/linux-x64/node.napi.node +0 -0
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +0 -358
  127. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +0 -127
  128. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +0 -219
@@ -5,6 +5,7 @@
5
5
 
6
6
  #include <array>
7
7
  #include <sstream>
8
+ #include <string>
8
9
 
9
10
  #include "db/blob/blob_index.h"
10
11
  #include "db/blob/blob_log_format.h"
@@ -48,6 +49,177 @@ TEST_F(DBBlobBasicTest, GetBlob) {
48
49
  .IsIncomplete());
49
50
  }
50
51
 
52
+ TEST_F(DBBlobBasicTest, GetBlobFromCache) {
53
+ Options options = GetDefaultOptions();
54
+
55
+ LRUCacheOptions co;
56
+ co.capacity = 2048;
57
+ co.num_shard_bits = 2;
58
+ co.metadata_charge_policy = kDontChargeCacheMetadata;
59
+ auto backing_cache = NewLRUCache(co);
60
+
61
+ options.enable_blob_files = true;
62
+ options.blob_cache = backing_cache;
63
+
64
+ BlockBasedTableOptions block_based_options;
65
+ block_based_options.no_block_cache = false;
66
+ block_based_options.block_cache = backing_cache;
67
+ block_based_options.cache_index_and_filter_blocks = true;
68
+ options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
69
+
70
+ Reopen(options);
71
+
72
+ constexpr char key[] = "key";
73
+ constexpr char blob_value[] = "blob_value";
74
+
75
+ ASSERT_OK(Put(key, blob_value));
76
+
77
+ ASSERT_OK(Flush());
78
+
79
+ ReadOptions read_options;
80
+
81
+ read_options.fill_cache = false;
82
+
83
+ {
84
+ PinnableSlice result;
85
+
86
+ read_options.read_tier = kReadAllTier;
87
+ ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
88
+ ASSERT_EQ(result, blob_value);
89
+
90
+ result.Reset();
91
+ read_options.read_tier = kBlockCacheTier;
92
+
93
+ // Try again with no I/O allowed. Since we didn't re-fill the cache, the
94
+ // blob itself can only be read from the blob file, so the read should
95
+ // return Incomplete.
96
+ ASSERT_TRUE(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result)
97
+ .IsIncomplete());
98
+ ASSERT_TRUE(result.empty());
99
+ }
100
+
101
+ read_options.fill_cache = true;
102
+
103
+ {
104
+ PinnableSlice result;
105
+
106
+ read_options.read_tier = kReadAllTier;
107
+ ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
108
+ ASSERT_EQ(result, blob_value);
109
+
110
+ result.Reset();
111
+ read_options.read_tier = kBlockCacheTier;
112
+
113
+ // Try again with no I/O allowed. The table and the necessary blocks/blobs
114
+ // should already be in their respective caches.
115
+ ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
116
+ ASSERT_EQ(result, blob_value);
117
+ }
118
+ }
119
+
120
+ TEST_F(DBBlobBasicTest, IterateBlobsFromCache) {
121
+ Options options = GetDefaultOptions();
122
+
123
+ LRUCacheOptions co;
124
+ co.capacity = 2048;
125
+ co.num_shard_bits = 2;
126
+ co.metadata_charge_policy = kDontChargeCacheMetadata;
127
+ auto backing_cache = NewLRUCache(co);
128
+
129
+ options.enable_blob_files = true;
130
+ options.blob_cache = backing_cache;
131
+
132
+ BlockBasedTableOptions block_based_options;
133
+ block_based_options.no_block_cache = false;
134
+ block_based_options.block_cache = backing_cache;
135
+ block_based_options.cache_index_and_filter_blocks = true;
136
+ options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
137
+
138
+ Reopen(options);
139
+
140
+ int num_blobs = 5;
141
+ std::vector<std::string> keys;
142
+ std::vector<std::string> blobs;
143
+
144
+ for (int i = 0; i < num_blobs; ++i) {
145
+ keys.push_back("key" + std::to_string(i));
146
+ blobs.push_back("blob" + std::to_string(i));
147
+ ASSERT_OK(Put(keys[i], blobs[i]));
148
+ }
149
+ ASSERT_OK(Flush());
150
+
151
+ ReadOptions read_options;
152
+
153
+ {
154
+ read_options.fill_cache = false;
155
+ read_options.read_tier = kReadAllTier;
156
+
157
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
158
+ ASSERT_OK(iter->status());
159
+
160
+ int i = 0;
161
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
162
+ ASSERT_OK(iter->status());
163
+ ASSERT_EQ(iter->key().ToString(), keys[i]);
164
+ ASSERT_EQ(iter->value().ToString(), blobs[i]);
165
+ ++i;
166
+ }
167
+ ASSERT_EQ(i, num_blobs);
168
+ }
169
+
170
+ {
171
+ read_options.fill_cache = false;
172
+ read_options.read_tier = kBlockCacheTier;
173
+
174
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
175
+ ASSERT_OK(iter->status());
176
+
177
+ // Try again with no I/O allowed. Since we didn't re-fill the cache,
178
+ // the blob itself can only be read from the blob file, so iter->Valid()
179
+ // should be false.
180
+ iter->SeekToFirst();
181
+ ASSERT_NOK(iter->status());
182
+ ASSERT_FALSE(iter->Valid());
183
+ }
184
+
185
+ {
186
+ read_options.fill_cache = true;
187
+ read_options.read_tier = kReadAllTier;
188
+
189
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
190
+ ASSERT_OK(iter->status());
191
+
192
+ // Read blobs from the file and refill the cache.
193
+ int i = 0;
194
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
195
+ ASSERT_OK(iter->status());
196
+ ASSERT_EQ(iter->key().ToString(), keys[i]);
197
+ ASSERT_EQ(iter->value().ToString(), blobs[i]);
198
+ ++i;
199
+ }
200
+ ASSERT_EQ(i, num_blobs);
201
+ }
202
+
203
+ {
204
+ read_options.fill_cache = false;
205
+ read_options.read_tier = kBlockCacheTier;
206
+
207
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
208
+ ASSERT_OK(iter->status());
209
+
210
+ // Try again with no I/O allowed. The table and the necessary blocks/blobs
211
+ // should already be in their respective caches.
212
+ int i = 0;
213
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
214
+ ASSERT_OK(iter->status());
215
+ ASSERT_EQ(iter->key().ToString(), keys[i]);
216
+ ASSERT_EQ(iter->value().ToString(), blobs[i]);
217
+ ++i;
218
+ }
219
+ ASSERT_EQ(i, num_blobs);
220
+ }
221
+ }
222
+
51
223
  TEST_F(DBBlobBasicTest, MultiGetBlobs) {
52
224
  constexpr size_t min_blob_size = 6;
53
225
 
@@ -17,6 +17,7 @@
17
17
  #include <vector>
18
18
 
19
19
  #include "db/blob/blob_file_cache.h"
20
+ #include "db/blob/blob_source.h"
20
21
  #include "db/compaction/compaction_picker.h"
21
22
  #include "db/compaction/compaction_picker_fifo.h"
22
23
  #include "db/compaction/compaction_picker_level.h"
@@ -516,7 +517,7 @@ ColumnFamilyData::ColumnFamilyData(
516
517
  const ColumnFamilyOptions& cf_options, const ImmutableDBOptions& db_options,
517
518
  const FileOptions* file_options, ColumnFamilySet* column_family_set,
518
519
  BlockCacheTracer* const block_cache_tracer,
519
- const std::shared_ptr<IOTracer>& io_tracer,
520
+ const std::shared_ptr<IOTracer>& io_tracer, const std::string& db_id,
520
521
  const std::string& db_session_id)
521
522
  : id_(id),
522
523
  name_(name),
@@ -580,6 +581,8 @@ ColumnFamilyData::ColumnFamilyData(
580
581
  blob_file_cache_.reset(
581
582
  new BlobFileCache(_table_cache, ioptions(), soptions(), id_,
582
583
  internal_stats_->GetBlobFileReadHist(), io_tracer));
584
+ blob_source_.reset(new BlobSource(ioptions(), db_id, db_session_id,
585
+ blob_file_cache_.get()));
583
586
 
584
587
  if (ioptions_.compaction_style == kCompactionStyleLevel) {
585
588
  compaction_picker_.reset(
@@ -1504,13 +1507,14 @@ ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
1504
1507
  WriteController* _write_controller,
1505
1508
  BlockCacheTracer* const block_cache_tracer,
1506
1509
  const std::shared_ptr<IOTracer>& io_tracer,
1510
+ const std::string& db_id,
1507
1511
  const std::string& db_session_id)
1508
1512
  : max_column_family_(0),
1509
1513
  file_options_(file_options),
1510
1514
  dummy_cfd_(new ColumnFamilyData(
1511
1515
  ColumnFamilyData::kDummyColumnFamilyDataId, "", nullptr, nullptr,
1512
1516
  nullptr, ColumnFamilyOptions(), *db_options, &file_options_, nullptr,
1513
- block_cache_tracer, io_tracer, db_session_id)),
1517
+ block_cache_tracer, io_tracer, db_id, db_session_id)),
1514
1518
  default_cfd_cache_(nullptr),
1515
1519
  db_name_(dbname),
1516
1520
  db_options_(db_options),
@@ -1519,6 +1523,7 @@ ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
1519
1523
  write_controller_(_write_controller),
1520
1524
  block_cache_tracer_(block_cache_tracer),
1521
1525
  io_tracer_(io_tracer),
1526
+ db_id_(db_id),
1522
1527
  db_session_id_(db_session_id) {
1523
1528
  // initialize linked list
1524
1529
  dummy_cfd_->prev_ = dummy_cfd_;
@@ -1586,7 +1591,7 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
1586
1591
  ColumnFamilyData* new_cfd = new ColumnFamilyData(
1587
1592
  id, name, dummy_versions, table_cache_, write_buffer_manager_, options,
1588
1593
  *db_options_, &file_options_, this, block_cache_tracer_, io_tracer_,
1589
- db_session_id_);
1594
+ db_id_, db_session_id_);
1590
1595
  column_families_.insert({name, id});
1591
1596
  column_family_data_.insert({id, new_cfd});
1592
1597
  max_column_family_ = std::max(max_column_family_, id);
@@ -47,6 +47,7 @@ class InstrumentedMutex;
47
47
  class InstrumentedMutexLock;
48
48
  struct SuperVersionContext;
49
49
  class BlobFileCache;
50
+ class BlobSource;
50
51
 
51
52
  extern const double kIncSlowdownRatio;
52
53
  // This file contains a list of data structures for managing column family
@@ -376,7 +377,7 @@ class ColumnFamilyData {
376
377
  SequenceNumber earliest_seq);
377
378
 
378
379
  TableCache* table_cache() const { return table_cache_.get(); }
379
- BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); }
380
+ BlobSource* blob_source() const { return blob_source_.get(); }
380
381
 
381
382
  // See documentation in compaction_picker.h
382
383
  // REQUIRES: DB mutex held
@@ -539,7 +540,7 @@ class ColumnFamilyData {
539
540
  ColumnFamilySet* column_family_set,
540
541
  BlockCacheTracer* const block_cache_tracer,
541
542
  const std::shared_ptr<IOTracer>& io_tracer,
542
- const std::string& db_session_id);
543
+ const std::string& db_id, const std::string& db_session_id);
543
544
 
544
545
  std::vector<std::string> GetDbPaths() const;
545
546
 
@@ -563,6 +564,7 @@ class ColumnFamilyData {
563
564
 
564
565
  std::unique_ptr<TableCache> table_cache_;
565
566
  std::unique_ptr<BlobFileCache> blob_file_cache_;
567
+ std::unique_ptr<BlobSource> blob_source_;
566
568
 
567
569
  std::unique_ptr<InternalStats> internal_stats_;
568
570
 
@@ -673,7 +675,7 @@ class ColumnFamilySet {
673
675
  WriteController* _write_controller,
674
676
  BlockCacheTracer* const block_cache_tracer,
675
677
  const std::shared_ptr<IOTracer>& io_tracer,
676
- const std::string& db_session_id);
678
+ const std::string& db_id, const std::string& db_session_id);
677
679
  ~ColumnFamilySet();
678
680
 
679
681
  ColumnFamilyData* GetDefault() const;
@@ -735,6 +737,7 @@ class ColumnFamilySet {
735
737
  WriteController* write_controller_;
736
738
  BlockCacheTracer* const block_cache_tracer_;
737
739
  std::shared_ptr<IOTracer> io_tracer_;
740
+ const std::string& db_id_;
738
741
  std::string db_session_id_;
739
742
  };
740
743
 
@@ -2160,6 +2160,16 @@ Status CompactionJob::InstallCompactionResults(
2160
2160
  stats.GetBytes());
2161
2161
  }
2162
2162
 
2163
+ if (compaction->compaction_reason() == CompactionReason::kLevelMaxLevelSize &&
2164
+ compaction->immutable_options()->compaction_pri == kRoundRobin) {
2165
+ int start_level = compaction->start_level();
2166
+ if (start_level > 0) {
2167
+ auto vstorage = compaction->input_version()->storage_info();
2168
+ edit->AddCompactCursor(start_level,
2169
+ vstorage->GetNextCompactCursor(start_level));
2170
+ }
2171
+ }
2172
+
2163
2173
  return versions_->LogAndApply(compaction->column_family_data(),
2164
2174
  mutable_cf_options, edit, db_mutex_,
2165
2175
  db_directory_);
@@ -208,11 +208,11 @@ class CompactionJobTestBase : public testing::Test {
208
208
  mutable_db_options_(),
209
209
  table_cache_(NewLRUCache(50000, 16)),
210
210
  write_buffer_manager_(db_options_.db_write_buffer_size),
211
- versions_(new VersionSet(dbname_, &db_options_, env_options_,
212
- table_cache_.get(), &write_buffer_manager_,
213
- &write_controller_,
214
- /*block_cache_tracer=*/nullptr,
215
- /*io_tracer=*/nullptr, /*db_session_id*/ "")),
211
+ versions_(new VersionSet(
212
+ dbname_, &db_options_, env_options_, table_cache_.get(),
213
+ &write_buffer_manager_, &write_controller_,
214
+ /*block_cache_tracer=*/nullptr,
215
+ /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "")),
216
216
  shutting_down_(false),
217
217
  mock_table_factory_(new mock::MockTableFactory()),
218
218
  error_handler_(nullptr, db_options_, &mutex_),
@@ -444,7 +444,7 @@ class CompactionJobTestBase : public testing::Test {
444
444
  new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
445
445
  &write_buffer_manager_, &write_controller_,
446
446
  /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
447
- /*db_session_id*/ ""));
447
+ /*db_id*/ "", /*db_session_id*/ ""));
448
448
  compaction_job_stats_.Reset();
449
449
  ASSERT_OK(SetIdentityFile(env_, dbname_));
450
450
 
@@ -448,6 +448,14 @@ bool LevelCompactionBuilder::PickFileToCompact() {
448
448
  // do not pick a file to compact if it is being compacted
449
449
  // from n-1 level.
450
450
  if (f->being_compacted) {
451
+ if (ioptions_.compaction_pri == kRoundRobin) {
452
+ // TODO(zichen): this file may be involved in one compaction from
453
+ // an upper level, cannot advance the cursor for round-robin policy.
454
+ // Currently, we do not pick any file to compact in this case. We
455
+ // should fix this later to ensure a compaction is picked but the
456
+ // cursor shall not be advanced.
457
+ return false;
458
+ }
451
459
  continue;
452
460
  }
453
461
 
@@ -460,6 +468,13 @@ bool LevelCompactionBuilder::PickFileToCompact() {
460
468
  // A locked (pending compaction) input-level file was pulled in due to
461
469
  // user-key overlap.
462
470
  start_level_inputs_.clear();
471
+
472
+ // To ensure every file is selcted in a round-robin manner, we cannot
473
+ // skip the current file. So we return false and wait for the next time
474
+ // we can pick this file to compact
475
+ if (ioptions_.compaction_pri == kRoundRobin) {
476
+ return false;
477
+ }
463
478
  continue;
464
479
  }
465
480
 
@@ -479,6 +494,10 @@ bool LevelCompactionBuilder::PickFileToCompact() {
479
494
  !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
480
495
  &output_level_inputs)) {
481
496
  start_level_inputs_.clear();
497
+ // The same reason as above to ensure the round-robin compaction
498
+ if (ioptions_.compaction_pri == kRoundRobin) {
499
+ return false;
500
+ }
482
501
  continue;
483
502
  }
484
503
  base_index_ = index;
@@ -486,8 +505,9 @@ bool LevelCompactionBuilder::PickFileToCompact() {
486
505
  }
487
506
 
488
507
  // store where to start the iteration in the next call to PickCompaction
489
- vstorage_->SetNextCompactionIndex(start_level_, cmp_idx);
490
-
508
+ if (ioptions_.compaction_pri != kRoundRobin) {
509
+ vstorage_->SetNextCompactionIndex(start_level_, cmp_idx);
510
+ }
491
511
  return start_level_inputs_.size() > 0;
492
512
  }
493
513
 
@@ -1311,6 +1311,44 @@ TEST_F(CompactionPickerTest, CompactionPriMinOverlapping4) {
1311
1311
  ASSERT_EQ(6U, compaction->input(0, 0)->fd.GetNumber());
1312
1312
  }
1313
1313
 
1314
+ TEST_F(CompactionPickerTest, CompactionPriRoundRobin) {
1315
+ std::vector<InternalKey> test_cursors = {InternalKey("249", 100, kTypeValue),
1316
+ InternalKey("600", 100, kTypeValue),
1317
+ InternalKey()};
1318
+ std::vector<uint32_t> selected_files = {8U, 6U, 6U};
1319
+
1320
+ ioptions_.compaction_pri = kRoundRobin;
1321
+ mutable_cf_options_.max_bytes_for_level_base = 10000000;
1322
+ mutable_cf_options_.max_bytes_for_level_multiplier = 10;
1323
+ for (size_t i = 0; i < test_cursors.size(); i++) {
1324
+ // start a brand new version in each test.
1325
+ NewVersionStorage(6, kCompactionStyleLevel);
1326
+ vstorage_->ResizeCompactCursors(6);
1327
+ // Set the cursor
1328
+ vstorage_->AddCursorForOneLevel(2, test_cursors[i]);
1329
+ Add(2, 6U, "150", "199", 50000000U); // Overlap with 26U, 27U
1330
+ Add(2, 7U, "200", "249", 50000000U); // File not overlapping
1331
+ Add(2, 8U, "300", "600", 50000000U); // Overlap with 28U, 29U
1332
+
1333
+ Add(3, 26U, "130", "165", 60000000U);
1334
+ Add(3, 27U, "166", "170", 60000000U);
1335
+ Add(3, 28U, "270", "340", 60000000U);
1336
+ Add(3, 29U, "401", "500", 60000000U);
1337
+ UpdateVersionStorageInfo();
1338
+ LevelCompactionPicker local_level_compaction_picker =
1339
+ LevelCompactionPicker(ioptions_, &icmp_);
1340
+ std::unique_ptr<Compaction> compaction(
1341
+ local_level_compaction_picker.PickCompaction(
1342
+ cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
1343
+ &log_buffer_));
1344
+ ASSERT_TRUE(compaction.get() != nullptr);
1345
+ ASSERT_EQ(1U, compaction->num_input_files(0));
1346
+ ASSERT_EQ(selected_files[i], compaction->input(0, 0)->fd.GetNumber());
1347
+ // release the version storage
1348
+ DeleteVersionStorage();
1349
+ }
1350
+ }
1351
+
1314
1352
  // This test exhibits the bug where we don't properly reset parent_index in
1315
1353
  // PickCompaction()
1316
1354
  TEST_F(CompactionPickerTest, ParentIndexResetBug) {
@@ -2221,6 +2221,7 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL0) {
2221
2221
  // No async IO in this case since we don't do parallel lookup in L0
2222
2222
  ASSERT_EQ(multiget_io_batch_size.count, 0);
2223
2223
  ASSERT_EQ(multiget_io_batch_size.max, 0);
2224
+ ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
2224
2225
  }
2225
2226
 
2226
2227
  TEST_F(DBMultiGetAsyncIOTest, GetFromL1) {
@@ -2257,6 +2258,7 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL1) {
2257
2258
  // A batch of 3 async IOs is expected, one for each overlapping file in L1
2258
2259
  ASSERT_EQ(multiget_io_batch_size.count, 1);
2259
2260
  ASSERT_EQ(multiget_io_batch_size.max, 3);
2261
+ ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
2260
2262
  }
2261
2263
 
2262
2264
  TEST_F(DBMultiGetAsyncIOTest, LastKeyInFile) {
@@ -2407,27 +2409,37 @@ TEST_F(DBBasicTest, MultiGetStats) {
2407
2409
  values.data(), s.data(), false);
2408
2410
 
2409
2411
  ASSERT_EQ(values.size(), kMultiGetBatchSize);
2410
- HistogramData hist_data_blocks;
2412
+ HistogramData hist_level;
2411
2413
  HistogramData hist_index_and_filter_blocks;
2412
2414
  HistogramData hist_sst;
2413
2415
 
2414
- options.statistics->histogramData(NUM_DATA_BLOCKS_READ_PER_LEVEL,
2415
- &hist_data_blocks);
2416
+ options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level);
2416
2417
  options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
2417
2418
  &hist_index_and_filter_blocks);
2418
2419
  options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst);
2419
2420
 
2420
2421
  // Maximum number of blocks read from a file system in a level.
2421
- ASSERT_EQ(hist_data_blocks.max, 32);
2422
+ ASSERT_EQ(hist_level.max, 1);
2422
2423
  ASSERT_GT(hist_index_and_filter_blocks.max, 0);
2423
2424
  // Maximum number of sst files read from file system in a level.
2424
2425
  ASSERT_EQ(hist_sst.max, 2);
2425
2426
 
2426
2427
  // Minimun number of blocks read in a level.
2427
- ASSERT_EQ(hist_data_blocks.min, 4);
2428
+ ASSERT_EQ(hist_level.min, 1);
2428
2429
  ASSERT_GT(hist_index_and_filter_blocks.min, 0);
2429
2430
  // Minimun number of sst files read in a level.
2430
2431
  ASSERT_EQ(hist_sst.min, 1);
2432
+
2433
+ for (PinnableSlice& value : values) {
2434
+ value.Reset();
2435
+ }
2436
+ for (Status& status : s) {
2437
+ status = Status::OK();
2438
+ }
2439
+ db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[950],
2440
+ values.data(), s.data(), false);
2441
+ options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level);
2442
+ ASSERT_EQ(hist_level.max, 2);
2431
2443
  }
2432
2444
 
2433
2445
  // Test class for batched MultiGet with prefix extractor
@@ -670,7 +670,7 @@ class DBBlockCacheTest1 : public DBTestBase,
670
670
  };
671
671
 
672
672
  INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1,
673
- ::testing::Values(1, 2, 3));
673
+ ::testing::Values(1, 2));
674
674
 
675
675
  TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) {
676
676
  Options options = CurrentOptions();
@@ -686,13 +686,10 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) {
686
686
  table_options.partition_filters = true;
687
687
  table_options.index_type =
688
688
  BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
689
- table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
689
+ table_options.filter_policy.reset(NewBloomFilterPolicy(10));
690
690
  break;
691
- case 2: // block-based filter
692
- table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
693
- break;
694
- case 3: // full filter
695
- table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
691
+ case 2: // full filter
692
+ table_options.filter_policy.reset(NewBloomFilterPolicy(10));
696
693
  break;
697
694
  default:
698
695
  assert(false);