@nxtedition/rocksdb 7.1.10 → 7.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/binding.cc +50 -33
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
  3. package/deps/rocksdb/rocksdb/TARGETS +2 -0
  4. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +60 -17
  5. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +4 -4
  6. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +81 -37
  7. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +6 -0
  8. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -6
  9. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +10 -8
  10. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -9
  11. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +3 -3
  12. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +69 -0
  13. package/deps/rocksdb/rocksdb/db/flush_job.cc +6 -6
  14. package/deps/rocksdb/rocksdb/db/memtable.cc +19 -7
  15. package/deps/rocksdb/rocksdb/db/memtable.h +8 -16
  16. package/deps/rocksdb/rocksdb/db/memtable_list.cc +27 -16
  17. package/deps/rocksdb/rocksdb/db/memtable_list.h +18 -11
  18. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +70 -55
  19. package/deps/rocksdb/rocksdb/db/table_cache.cc +9 -11
  20. package/deps/rocksdb/rocksdb/db/table_cache.h +2 -1
  21. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +3 -3
  22. package/deps/rocksdb/rocksdb/db/version_set.cc +530 -257
  23. package/deps/rocksdb/rocksdb/db/version_set.h +32 -2
  24. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  25. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +64 -12
  26. package/deps/rocksdb/rocksdb/db/wide/wide_columns.cc +18 -0
  27. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +8 -0
  28. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +13 -1
  29. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  30. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +83 -0
  31. package/deps/rocksdb/rocksdb/options/options.cc +4 -2
  32. package/deps/rocksdb/rocksdb/src.mk +1 -0
  33. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3 -10
  34. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +5 -4
  35. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +10 -28
  36. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +4 -4
  37. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +11 -9
  38. package/deps/rocksdb/rocksdb/table/get_context.cc +34 -22
  39. package/deps/rocksdb/rocksdb/table/get_context.h +6 -3
  40. package/deps/rocksdb/rocksdb/table/multiget_context.h +69 -5
  41. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -2
  42. package/deps/rocksdb/rocksdb/table/table_test.cc +8 -8
  43. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +23 -0
  44. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +27 -7
  45. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +8 -4
  46. package/deps/rocksdb/rocksdb.gyp +1 -0
  47. package/index.js +19 -12
  48. package/package.json +1 -1
  49. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  50. package/prebuilds/darwin-x64/node.napi.node +0 -0
  51. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -46,6 +46,10 @@
46
46
  #include "db/version_edit.h"
47
47
  #include "db/write_controller.h"
48
48
  #include "env/file_system_tracer.h"
49
+ #if USE_COROUTINES
50
+ #include "folly/experimental/coro/BlockingWait.h"
51
+ #include "folly/experimental/coro/Collect.h"
52
+ #endif
49
53
  #include "monitoring/instrumented_mutex.h"
50
54
  #include "options/db_options.h"
51
55
  #include "port/port.h"
@@ -54,6 +58,7 @@
54
58
  #include "table/get_context.h"
55
59
  #include "table/multiget_context.h"
56
60
  #include "trace_replay/block_cache_tracer.h"
61
+ #include "util/autovector.h"
57
62
  #include "util/coro_utils.h"
58
63
  #include "util/hash_containers.h"
59
64
 
@@ -76,6 +81,7 @@ class ColumnFamilySet;
76
81
  class MergeIteratorBuilder;
77
82
  class SystemClock;
78
83
  class ManifestTailer;
84
+ class FilePickerMultiGet;
79
85
 
80
86
  // VersionEdit is always supposed to be valid and it is used to point at
81
87
  // entries in Manifest. Ideally it should not be used as a container to
@@ -836,7 +842,8 @@ class Version {
836
842
  // REQUIRES: lock is not held
837
843
  // REQUIRES: pinned_iters_mgr != nullptr
838
844
  void Get(const ReadOptions&, const LookupKey& key, PinnableSlice* value,
839
- std::string* timestamp, Status* status, MergeContext* merge_context,
845
+ PinnableWideColumns* columns, std::string* timestamp, Status* status,
846
+ MergeContext* merge_context,
840
847
  SequenceNumber* max_covering_tombstone_seq,
841
848
  PinnedIteratorsManager* pinned_iters_mgr,
842
849
  bool* value_found = nullptr, bool* key_exists = nullptr,
@@ -990,11 +997,34 @@ class Version {
990
997
  DECLARE_SYNC_AND_ASYNC(
991
998
  /* ret_type */ Status, /* func_name */ MultiGetFromSST,
992
999
  const ReadOptions& read_options, MultiGetRange file_range,
993
- int hit_file_level, bool skip_filters, FdWithKeyRange* f,
1000
+ int hit_file_level, bool skip_filters, bool skip_range_deletions,
1001
+ FdWithKeyRange* f,
994
1002
  std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
995
1003
  Cache::Handle* table_handle, uint64_t& num_filter_read,
996
1004
  uint64_t& num_index_read, uint64_t& num_sst_read);
997
1005
 
1006
+ #ifdef USE_COROUTINES
1007
+ // MultiGet using async IO to read data blocks from SST files in parallel
1008
+ // within and across levels
1009
+ Status MultiGetAsync(
1010
+ const ReadOptions& options, MultiGetRange* range,
1011
+ std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs);
1012
+
1013
+ // A helper function to lookup a batch of keys in a single level. It will
1014
+ // queue coroutine tasks to mget_tasks. It may also split the input batch
1015
+ // by creating a new batch with keys definitely not in this level and
1016
+ // enqueuing it to to_process.
1017
+ Status ProcessBatch(const ReadOptions& read_options,
1018
+ FilePickerMultiGet* batch,
1019
+ std::vector<folly::coro::Task<Status>>& mget_tasks,
1020
+ std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs,
1021
+ autovector<FilePickerMultiGet, 4>& batches,
1022
+ std::deque<size_t>& waiting,
1023
+ std::deque<size_t>& to_process,
1024
+ unsigned int& num_tasks_queued, uint64_t& num_filter_read,
1025
+ uint64_t& num_index_read, uint64_t& num_sst_read);
1026
+ #endif
1027
+
998
1028
  ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs
999
1029
  Logger* info_log_;
1000
1030
  Statistics* db_statistics_;
@@ -14,7 +14,7 @@ namespace ROCKSDB_NAMESPACE {
14
14
  // Lookup a batch of keys in a single SST file
15
15
  DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
16
16
  (const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
17
- bool skip_filters, FdWithKeyRange* f,
17
+ bool skip_filters, bool skip_range_deletions, FdWithKeyRange* f,
18
18
  std::unordered_map<uint64_t, BlobReadContexts>& blob_ctxs,
19
19
  Cache::Handle* table_handle, uint64_t& num_filter_read,
20
20
  uint64_t& num_index_read, uint64_t& num_sst_read) {
@@ -27,7 +27,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
27
27
  read_options, *internal_comparator(), *f->file_metadata, &file_range,
28
28
  mutable_cf_options_.prefix_extractor,
29
29
  cfd_->internal_stats()->GetFileReadHist(hit_file_level), skip_filters,
30
- hit_file_level, table_handle);
30
+ skip_range_deletions, hit_file_level, table_handle);
31
31
  // TODO: examine the behavior for corrupted key
32
32
  if (timer_enabled) {
33
33
  PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(),
@@ -22,10 +22,20 @@ class DBWideBasicTest : public DBTestBase {
22
22
  TEST_F(DBWideBasicTest, PutEntity) {
23
23
  Options options = GetDefaultOptions();
24
24
 
25
+ // Write a couple of wide-column entities and a plain old key-value, then read
26
+ // them back.
25
27
  constexpr char first_key[] = "first";
28
+ constexpr char first_value_of_default_column[] = "hello";
29
+ WideColumns first_columns{
30
+ {kDefaultWideColumnName, first_value_of_default_column},
31
+ {"attr_name1", "foo"},
32
+ {"attr_name2", "bar"}};
33
+
26
34
  constexpr char second_key[] = "second";
35
+ WideColumns second_columns{{"attr_one", "two"}, {"attr_three", "four"}};
27
36
 
28
- constexpr char first_value_of_default_column[] = "hello";
37
+ constexpr char third_key[] = "third";
38
+ constexpr char third_value[] = "baz";
29
39
 
30
40
  auto verify = [&]() {
31
41
  {
@@ -35,6 +45,13 @@ TEST_F(DBWideBasicTest, PutEntity) {
35
45
  ASSERT_EQ(result, first_value_of_default_column);
36
46
  }
37
47
 
48
+ {
49
+ PinnableWideColumns result;
50
+ ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
51
+ first_key, &result));
52
+ ASSERT_EQ(result.columns(), first_columns);
53
+ }
54
+
38
55
  {
39
56
  PinnableSlice result;
40
57
  ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), second_key,
@@ -43,9 +60,32 @@ TEST_F(DBWideBasicTest, PutEntity) {
43
60
  }
44
61
 
45
62
  {
46
- constexpr size_t num_keys = 2;
63
+ PinnableWideColumns result;
64
+ ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
65
+ second_key, &result));
66
+ ASSERT_EQ(result.columns(), second_columns);
67
+ }
47
68
 
48
- std::array<Slice, num_keys> keys{{first_key, second_key}};
69
+ {
70
+ PinnableSlice result;
71
+ ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), third_key,
72
+ &result));
73
+ ASSERT_EQ(result, third_value);
74
+ }
75
+
76
+ {
77
+ PinnableWideColumns result;
78
+ ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
79
+ third_key, &result));
80
+
81
+ const WideColumns expected_columns{{kDefaultWideColumnName, third_value}};
82
+ ASSERT_EQ(result.columns(), expected_columns);
83
+ }
84
+
85
+ {
86
+ constexpr size_t num_keys = 3;
87
+
88
+ std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
49
89
  std::array<PinnableSlice, num_keys> values;
50
90
  std::array<Status, num_keys> statuses;
51
91
 
@@ -57,6 +97,9 @@ TEST_F(DBWideBasicTest, PutEntity) {
57
97
 
58
98
  ASSERT_OK(statuses[1]);
59
99
  ASSERT_TRUE(values[1].empty());
100
+
101
+ ASSERT_OK(statuses[2]);
102
+ ASSERT_EQ(values[2], third_value);
60
103
  }
61
104
 
62
105
  {
@@ -74,6 +117,12 @@ TEST_F(DBWideBasicTest, PutEntity) {
74
117
  ASSERT_EQ(iter->key(), second_key);
75
118
  ASSERT_TRUE(iter->value().empty());
76
119
 
120
+ iter->Next();
121
+ ASSERT_TRUE(iter->Valid());
122
+ ASSERT_OK(iter->status());
123
+ ASSERT_EQ(iter->key(), third_key);
124
+ ASSERT_EQ(iter->value(), third_value);
125
+
77
126
  iter->Next();
78
127
  ASSERT_FALSE(iter->Valid());
79
128
  ASSERT_OK(iter->status());
@@ -81,6 +130,12 @@ TEST_F(DBWideBasicTest, PutEntity) {
81
130
  iter->SeekToLast();
82
131
  ASSERT_TRUE(iter->Valid());
83
132
  ASSERT_OK(iter->status());
133
+ ASSERT_EQ(iter->key(), third_key);
134
+ ASSERT_EQ(iter->value(), third_value);
135
+
136
+ iter->Prev();
137
+ ASSERT_TRUE(iter->Valid());
138
+ ASSERT_OK(iter->status());
84
139
  ASSERT_EQ(iter->key(), second_key);
85
140
  ASSERT_TRUE(iter->value().empty());
86
141
 
@@ -96,23 +151,20 @@ TEST_F(DBWideBasicTest, PutEntity) {
96
151
  }
97
152
  };
98
153
 
99
- // Use the DB::PutEntity API
100
- WideColumns first_columns{
101
- {kDefaultWideColumnName, first_value_of_default_column},
102
- {"attr_name1", "foo"},
103
- {"attr_name2", "bar"}};
104
-
154
+ // Use the DB::PutEntity API to write the first entity
105
155
  ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
106
156
  first_key, first_columns));
107
157
 
108
- // Use WriteBatch
109
- WideColumns second_columns{{"attr_one", "two"}, {"attr_three", "four"}};
110
-
158
+ // Use WriteBatch to write the second entity
111
159
  WriteBatch batch;
112
160
  ASSERT_OK(
113
161
  batch.PutEntity(db_->DefaultColumnFamily(), second_key, second_columns));
114
162
  ASSERT_OK(db_->Write(WriteOptions(), &batch));
115
163
 
164
+ // Use Put to write the plain key-value
165
+ ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), third_key,
166
+ third_value));
167
+
116
168
  // Try reading from memtable
117
169
  verify();
118
170
 
@@ -0,0 +1,18 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "rocksdb/wide_columns.h"
7
+
8
+ #include "db/wide/wide_column_serialization.h"
9
+
10
+ namespace ROCKSDB_NAMESPACE {
11
+
12
+ Status PinnableWideColumns::CreateIndexForWideColumns() {
13
+ Slice value_copy = value_;
14
+
15
+ return WideColumnSerialization::Deserialize(value_copy, columns_);
16
+ }
17
+
18
+ } // namespace ROCKSDB_NAMESPACE
@@ -567,6 +567,14 @@ class DB {
567
567
  return Get(options, DefaultColumnFamily(), key, value, timestamp);
568
568
  }
569
569
 
570
+ // UNDER CONSTRUCTION -- DO NOT USE
571
+ virtual Status GetEntity(const ReadOptions& /* options */,
572
+ ColumnFamilyHandle* /* column_family */,
573
+ const Slice& /* key */,
574
+ PinnableWideColumns* /* columns */) {
575
+ return Status::NotSupported("GetEntity not supported");
576
+ }
577
+
570
578
  // Populates the `merge_operands` array with all the merge operands in the DB
571
579
  // for `key`. The `merge_operands` array will be populated in the order of
572
580
  // insertion. The number of entries populated in `merge_operands` will be
@@ -905,7 +905,8 @@ struct DBOptions {
905
905
  // can be passed into multiple DBs and it will track the sum of size of all
906
906
  // the DBs. If the total size of all live memtables of all the DBs exceeds
907
907
  // a limit, a flush will be triggered in the next DB to which the next write
908
- // is issued.
908
+ // is issued, as long as there is one or more column family not already
909
+ // flushing.
909
910
  //
910
911
  // If the object is only passed to one DB, the behavior is the same as
911
912
  // db_write_buffer_size. When write_buffer_manager is set, the value set will
@@ -1685,6 +1686,17 @@ struct ReadOptions {
1685
1686
  // Default: false
1686
1687
  bool async_io;
1687
1688
 
1689
+ // Experimental
1690
+ //
1691
+ // If async_io is set, then this flag controls whether we read SST files
1692
+ // in multiple levels asynchronously. Enabling this flag can help reduce
1693
+ // MultiGet latency by maximizing the number of SST files read in
1694
+ // parallel if the keys in the MultiGet batch are in different levels. It
1695
+ // comes at the expense of slightly higher CPU overhead.
1696
+ //
1697
+ // Default: false
1698
+ bool optimize_multiget_for_io;
1699
+
1688
1700
  ReadOptions();
1689
1701
  ReadOptions(bool cksum, bool cache);
1690
1702
  };
@@ -99,6 +99,13 @@ class StackableDB : public DB {
99
99
  return db_->Get(options, column_family, key, value);
100
100
  }
101
101
 
102
+ using DB::GetEntity;
103
+ Status GetEntity(const ReadOptions& options,
104
+ ColumnFamilyHandle* column_family, const Slice& key,
105
+ PinnableWideColumns* columns) override {
106
+ return db_->GetEntity(options, column_family, key, columns);
107
+ }
108
+
102
109
  using DB::GetMergeOperands;
103
110
  virtual Status GetMergeOperands(
104
111
  const ReadOptions& options, ColumnFamilyHandle* column_family,
@@ -11,6 +11,7 @@
11
11
 
12
12
  #include "rocksdb/rocksdb_namespace.h"
13
13
  #include "rocksdb/slice.h"
14
+ #include "rocksdb/status.h"
14
15
 
15
16
  namespace ROCKSDB_NAMESPACE {
16
17
 
@@ -69,8 +70,90 @@ inline bool operator!=(const WideColumn& lhs, const WideColumn& rhs) {
69
70
  return !(lhs == rhs);
70
71
  }
71
72
 
73
+ // A collection of wide columns.
72
74
  using WideColumns = std::vector<WideColumn>;
73
75
 
76
+ // The anonymous default wide column (an empty Slice).
74
77
  extern const Slice kDefaultWideColumnName;
75
78
 
79
+ // A self-contained collection of wide columns. Used for the results of
80
+ // wide-column queries.
81
+ class PinnableWideColumns {
82
+ public:
83
+ const WideColumns& columns() const { return columns_; }
84
+ size_t serialized_size() const { return value_.size(); }
85
+
86
+ void SetPlainValue(const Slice& value);
87
+ void SetPlainValue(const Slice& value, Cleanable* cleanable);
88
+
89
+ Status SetWideColumnValue(const Slice& value);
90
+ Status SetWideColumnValue(const Slice& value, Cleanable* cleanable);
91
+
92
+ void Reset();
93
+
94
+ private:
95
+ void CopyValue(const Slice& value);
96
+ void PinOrCopyValue(const Slice& value, Cleanable* cleanable);
97
+ void CreateIndexForPlainValue();
98
+ Status CreateIndexForWideColumns();
99
+
100
+ PinnableSlice value_;
101
+ WideColumns columns_;
102
+ };
103
+
104
+ inline void PinnableWideColumns::CopyValue(const Slice& value) {
105
+ value_.PinSelf(value);
106
+ }
107
+
108
+ inline void PinnableWideColumns::PinOrCopyValue(const Slice& value,
109
+ Cleanable* cleanable) {
110
+ if (!cleanable) {
111
+ CopyValue(value);
112
+ return;
113
+ }
114
+
115
+ value_.PinSlice(value, cleanable);
116
+ }
117
+
118
+ inline void PinnableWideColumns::CreateIndexForPlainValue() {
119
+ columns_ = WideColumns{{kDefaultWideColumnName, value_}};
120
+ }
121
+
122
+ inline void PinnableWideColumns::SetPlainValue(const Slice& value) {
123
+ CopyValue(value);
124
+ CreateIndexForPlainValue();
125
+ }
126
+
127
+ inline void PinnableWideColumns::SetPlainValue(const Slice& value,
128
+ Cleanable* cleanable) {
129
+ PinOrCopyValue(value, cleanable);
130
+ CreateIndexForPlainValue();
131
+ }
132
+
133
+ inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value) {
134
+ CopyValue(value);
135
+ return CreateIndexForWideColumns();
136
+ }
137
+
138
+ inline Status PinnableWideColumns::SetWideColumnValue(const Slice& value,
139
+ Cleanable* cleanable) {
140
+ PinOrCopyValue(value, cleanable);
141
+ return CreateIndexForWideColumns();
142
+ }
143
+
144
+ inline void PinnableWideColumns::Reset() {
145
+ value_.Reset();
146
+ columns_.clear();
147
+ }
148
+
149
+ inline bool operator==(const PinnableWideColumns& lhs,
150
+ const PinnableWideColumns& rhs) {
151
+ return lhs.columns() == rhs.columns();
152
+ }
153
+
154
+ inline bool operator!=(const PinnableWideColumns& lhs,
155
+ const PinnableWideColumns& rhs) {
156
+ return !(lhs == rhs);
157
+ }
158
+
76
159
  } // namespace ROCKSDB_NAMESPACE
@@ -696,7 +696,8 @@ ReadOptions::ReadOptions()
696
696
  io_timeout(std::chrono::microseconds::zero()),
697
697
  value_size_soft_limit(std::numeric_limits<uint64_t>::max()),
698
698
  adaptive_readahead(false),
699
- async_io(false) {}
699
+ async_io(false),
700
+ optimize_multiget_for_io(false) {}
700
701
 
701
702
  ReadOptions::ReadOptions(bool cksum, bool cache)
702
703
  : snapshot(nullptr),
@@ -721,6 +722,7 @@ ReadOptions::ReadOptions(bool cksum, bool cache)
721
722
  io_timeout(std::chrono::microseconds::zero()),
722
723
  value_size_soft_limit(std::numeric_limits<uint64_t>::max()),
723
724
  adaptive_readahead(false),
724
- async_io(false) {}
725
+ async_io(false),
726
+ optimize_multiget_for_io(false) {}
725
727
 
726
728
  } // namespace ROCKSDB_NAMESPACE
@@ -90,6 +90,7 @@ LIB_SOURCES = \
90
90
  db/wal_edit.cc \
91
91
  db/wal_manager.cc \
92
92
  db/wide/wide_column_serialization.cc \
93
+ db/wide/wide_columns.cc \
93
94
  db/write_batch.cc \
94
95
  db/write_batch_base.cc \
95
96
  db/write_controller.cc \
@@ -1251,12 +1251,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
1251
1251
  Statistics* statistics = rep_->ioptions.statistics.get();
1252
1252
  bool using_zstd = rep_->blocks_definitely_zstd_compressed;
1253
1253
  const FilterPolicy* filter_policy = rep_->filter_policy;
1254
- CacheCreateCallback<TBlocklike> callback(read_amp_bytes_per_bit, statistics,
1255
- using_zstd, filter_policy);
1256
- // avoid dynamic memory allocation by using the reference (std::ref) of the
1257
- // callback. Otherwise, binding a functor to std::function will allocate extra
1258
- // memory from heap.
1259
- Cache::CreateCallback create_cb(std::ref(callback));
1254
+ Cache::CreateCallback create_cb = GetCreateCallback<TBlocklike>(
1255
+ read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
1260
1256
 
1261
1257
  // Lookup uncompressed cache first
1262
1258
  if (block_cache != nullptr) {
@@ -1286,11 +1282,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
1286
1282
  BlockContents contents;
1287
1283
  if (rep_->ioptions.lowest_used_cache_tier ==
1288
1284
  CacheTier::kNonVolatileBlockTier) {
1289
- CacheCreateCallback<BlockContents> special_callback(
1285
+ Cache::CreateCallback create_cb_special = GetCreateCallback<BlockContents>(
1290
1286
  read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
1291
- // avoid dynamic memory allocation by using the reference (std::ref) of the
1292
- // callback. Make sure the callback is only used within this code block.
1293
- Cache::CreateCallback create_cb_special(std::ref(special_callback));
1294
1287
  block_cache_compressed_handle = block_cache_compressed->Lookup(
1295
1288
  cache_key,
1296
1289
  BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),
@@ -248,10 +248,11 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) {
248
248
  autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
249
249
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
250
250
  for (size_t i = 0; i < keys.size(); ++i) {
251
- get_context.emplace_back(
252
- BytewiseComparator(), nullptr, nullptr, nullptr, GetContext::kNotFound,
253
- keys[i], &values[i], nullptr, nullptr, nullptr, true /* do_merge */,
254
- nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
251
+ get_context.emplace_back(BytewiseComparator(), nullptr, nullptr, nullptr,
252
+ GetContext::kNotFound, keys[i], &values[i],
253
+ nullptr, nullptr, nullptr, nullptr,
254
+ true /* do_merge */, nullptr, nullptr, nullptr,
255
+ nullptr, nullptr, nullptr);
255
256
  key_context.emplace_back(nullptr, keys[i], &values[i], nullptr,
256
257
  &statuses.back());
257
258
  key_context.back().get_context = &get_context.back();
@@ -21,42 +21,24 @@ template <typename T, CacheEntryRole R>
21
21
  Cache::CacheItemHelper* GetCacheItemHelperForRole();
22
22
 
23
23
  template <typename TBlocklike>
24
- class CacheCreateCallback {
25
- public:
26
- CacheCreateCallback() = delete;
27
- CacheCreateCallback(const CacheCreateCallback&) = delete;
28
- CacheCreateCallback(CacheCreateCallback&&) = delete;
29
- CacheCreateCallback& operator=(const CacheCreateCallback&) = delete;
30
- CacheCreateCallback& operator=(CacheCreateCallback&&) = delete;
31
-
32
- explicit CacheCreateCallback(size_t read_amp_bytes_per_bit,
33
- Statistics* statistics, bool using_zstd,
34
- const FilterPolicy* filter_policy)
35
- : read_amp_bytes_per_bit_(read_amp_bytes_per_bit),
36
- statistics_(statistics),
37
- using_zstd_(using_zstd),
38
- filter_policy_(filter_policy) {}
39
-
40
- Status operator()(const void* buf, size_t size, void** out_obj,
41
- size_t* charge) {
24
+ Cache::CreateCallback GetCreateCallback(size_t read_amp_bytes_per_bit,
25
+ Statistics* statistics, bool using_zstd,
26
+ const FilterPolicy* filter_policy) {
27
+ return [read_amp_bytes_per_bit, statistics, using_zstd, filter_policy](
28
+ const void* buf, size_t size, void** out_obj,
29
+ size_t* charge) -> Status {
42
30
  assert(buf != nullptr);
43
31
  std::unique_ptr<char[]> buf_data(new char[size]());
44
32
  memcpy(buf_data.get(), buf, size);
45
33
  BlockContents bc = BlockContents(std::move(buf_data), size);
46
34
  TBlocklike* ucd_ptr = BlocklikeTraits<TBlocklike>::Create(
47
- std::move(bc), read_amp_bytes_per_bit_, statistics_, using_zstd_,
48
- filter_policy_);
35
+ std::move(bc), read_amp_bytes_per_bit, statistics, using_zstd,
36
+ filter_policy);
49
37
  *out_obj = reinterpret_cast<void*>(ucd_ptr);
50
38
  *charge = size;
51
39
  return Status::OK();
52
- }
53
-
54
- private:
55
- const size_t read_amp_bytes_per_bit_;
56
- Statistics* statistics_;
57
- const bool using_zstd_;
58
- const FilterPolicy* filter_policy_;
59
- };
40
+ };
41
+ }
60
42
 
61
43
  template <>
62
44
  class BlocklikeTraits<BlockContents> {
@@ -625,7 +625,7 @@ TEST(DataBlockHashIndex, BlockBoundary) {
625
625
  InternalKey seek_ikey(seek_ukey, 60, kTypeValue);
626
626
  GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
627
627
  GetContext::kNotFound, seek_ukey, &value, nullptr,
628
- nullptr, true, nullptr, nullptr);
628
+ nullptr, nullptr, true, nullptr, nullptr);
629
629
 
630
630
  TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
631
631
  ASSERT_EQ(get_context.State(), GetContext::kFound);
@@ -650,7 +650,7 @@ TEST(DataBlockHashIndex, BlockBoundary) {
650
650
  InternalKey seek_ikey(seek_ukey, 60, kTypeValue);
651
651
  GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
652
652
  GetContext::kNotFound, seek_ukey, &value, nullptr,
653
- nullptr, true, nullptr, nullptr);
653
+ nullptr, nullptr, true, nullptr, nullptr);
654
654
 
655
655
  TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
656
656
  ASSERT_EQ(get_context.State(), GetContext::kFound);
@@ -675,7 +675,7 @@ TEST(DataBlockHashIndex, BlockBoundary) {
675
675
  InternalKey seek_ikey(seek_ukey, 120, kTypeValue);
676
676
  GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
677
677
  GetContext::kNotFound, seek_ukey, &value, nullptr,
678
- nullptr, true, nullptr, nullptr);
678
+ nullptr, nullptr, true, nullptr, nullptr);
679
679
 
680
680
  TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
681
681
  ASSERT_EQ(get_context.State(), GetContext::kFound);
@@ -700,7 +700,7 @@ TEST(DataBlockHashIndex, BlockBoundary) {
700
700
  InternalKey seek_ikey(seek_ukey, 5, kTypeValue);
701
701
  GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
702
702
  GetContext::kNotFound, seek_ukey, &value, nullptr,
703
- nullptr, true, nullptr, nullptr);
703
+ nullptr, nullptr, true, nullptr, nullptr);
704
704
 
705
705
  TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options);
706
706
  ASSERT_EQ(get_context.State(), GetContext::kNotFound);
@@ -119,7 +119,8 @@ class CuckooReaderTest : public testing::Test {
119
119
  PinnableSlice value;
120
120
  GetContext get_context(ucomp, nullptr, nullptr, nullptr,
121
121
  GetContext::kNotFound, Slice(user_keys[i]), &value,
122
- nullptr, nullptr, true, nullptr, nullptr);
122
+ nullptr, nullptr, nullptr, nullptr, true, nullptr,
123
+ nullptr);
123
124
  ASSERT_OK(
124
125
  reader.Get(ReadOptions(), Slice(keys[i]), &get_context, nullptr));
125
126
  ASSERT_STREQ(values[i].c_str(), value.data());
@@ -341,8 +342,8 @@ TEST_F(CuckooReaderTest, WhenKeyNotFound) {
341
342
  AppendInternalKey(&not_found_key, ikey);
342
343
  PinnableSlice value;
343
344
  GetContext get_context(ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound,
344
- Slice(not_found_key), &value, nullptr, nullptr, true,
345
- nullptr, nullptr);
345
+ Slice(not_found_key), &value, nullptr, nullptr,
346
+ nullptr, nullptr, true, nullptr, nullptr);
346
347
  ASSERT_OK(
347
348
  reader.Get(ReadOptions(), Slice(not_found_key), &get_context, nullptr));
348
349
  ASSERT_TRUE(value.empty());
@@ -356,7 +357,8 @@ TEST_F(CuckooReaderTest, WhenKeyNotFound) {
356
357
  value.Reset();
357
358
  GetContext get_context2(ucmp, nullptr, nullptr, nullptr,
358
359
  GetContext::kNotFound, Slice(not_found_key2), &value,
359
- nullptr, nullptr, true, nullptr, nullptr);
360
+ nullptr, nullptr, nullptr, nullptr, true, nullptr,
361
+ nullptr);
360
362
  ASSERT_OK(
361
363
  reader.Get(ReadOptions(), Slice(not_found_key2), &get_context2, nullptr));
362
364
  ASSERT_TRUE(value.empty());
@@ -370,9 +372,9 @@ TEST_F(CuckooReaderTest, WhenKeyNotFound) {
370
372
  AddHashLookups(ExtractUserKey(unused_key).ToString(),
371
373
  kNumHashFunc, kNumHashFunc);
372
374
  value.Reset();
373
- GetContext get_context3(ucmp, nullptr, nullptr, nullptr,
374
- GetContext::kNotFound, Slice(unused_key), &value,
375
- nullptr, nullptr, true, nullptr, nullptr);
375
+ GetContext get_context3(
376
+ ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(unused_key),
377
+ &value, nullptr, nullptr, nullptr, nullptr, true, nullptr, nullptr);
376
378
  ASSERT_OK(
377
379
  reader.Get(ReadOptions(), Slice(unused_key), &get_context3, nullptr));
378
380
  ASSERT_TRUE(value.empty());
@@ -447,7 +449,7 @@ void WriteFile(const std::vector<std::string>& keys,
447
449
  // Assume only the fast path is triggered
448
450
  GetContext get_context(nullptr, nullptr, nullptr, nullptr,
449
451
  GetContext::kNotFound, Slice(), &value, nullptr,
450
- nullptr, true, nullptr, nullptr);
452
+ nullptr, nullptr, true, nullptr, nullptr);
451
453
  for (uint64_t i = 0; i < num; ++i) {
452
454
  value.Reset();
453
455
  value.clear();
@@ -496,7 +498,7 @@ void ReadKeys(uint64_t num, uint32_t batch_size) {
496
498
  // Assume only the fast path is triggered
497
499
  GetContext get_context(nullptr, nullptr, nullptr, nullptr,
498
500
  GetContext::kNotFound, Slice(), &value, nullptr,
499
- nullptr, true, nullptr, nullptr);
501
+ nullptr, nullptr, true, nullptr, nullptr);
500
502
  uint64_t start_time = env->NowMicros();
501
503
  if (batch_size > 0) {
502
504
  for (uint64_t i = 0; i < num; i += batch_size) {