@nxtedition/rocksdb 7.1.4 → 7.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/binding.cc +32 -14
  2. package/deps/rocksdb/iostats.patch +19 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
  4. package/deps/rocksdb/rocksdb/cache/cache.cc +4 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +6 -8
  6. package/deps/rocksdb/rocksdb/cache/cache_key.cc +184 -164
  7. package/deps/rocksdb/rocksdb/cache/cache_key.h +38 -29
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +4 -4
  9. package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +92 -42
  11. package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +114 -37
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +34 -2
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +187 -38
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +88 -19
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +48 -8
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +481 -224
  19. package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
  20. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +3 -7
  22. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
  24. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +4 -5
  26. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -3
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +12 -4
  28. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  29. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +105 -0
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +56 -53
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +45 -11
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
  41. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
  42. package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
  43. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
  44. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +239 -190
  45. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
  54. package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
  55. package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
  56. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +239 -23
  57. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
  58. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  59. package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
  60. package/deps/rocksdb/rocksdb/db/db_test2.cc +24 -9
  61. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
  62. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
  63. package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
  64. package/deps/rocksdb/rocksdb/db/event_helpers.cc +2 -1
  65. package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  67. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
  68. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
  69. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
  70. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +7 -1
  71. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -0
  72. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
  73. package/deps/rocksdb/rocksdb/db/kv_checksum.h +8 -4
  74. package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
  75. package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
  76. package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
  77. package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +222 -47
  80. package/deps/rocksdb/rocksdb/db/memtable.h +70 -14
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
  82. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
  83. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
  84. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
  85. package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
  86. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
  87. package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
  88. package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
  89. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +10 -15
  90. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
  91. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
  92. package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
  93. package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
  94. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
  95. package/deps/rocksdb/rocksdb/db/version_set.cc +71 -28
  96. package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
  97. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +7 -7
  98. package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
  99. package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
  100. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
  101. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
  102. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
  103. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
  104. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
  107. package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
  108. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +5 -0
  109. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +12 -0
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +8 -0
  111. package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
  112. package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
  113. package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
  114. package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
  115. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
  116. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
  117. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +29 -1
  119. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +31 -6
  120. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  122. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
  124. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
  127. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
  128. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
  129. package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
  130. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +1 -1
  131. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
  132. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
  133. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
  134. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +10 -3
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -5
  137. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -1
  138. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  139. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
  140. package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
  141. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -6
  142. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -0
  143. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +52 -12
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -7
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +9 -1
  146. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
  147. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
  148. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
  149. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
  150. package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
  151. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  152. package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
  153. package/deps/rocksdb/rocksdb/table/unique_id.cc +22 -24
  154. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +2 -1
  155. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +7 -0
  156. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  157. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
  158. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
  159. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
  160. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
  161. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +2 -1
  162. package/deps/rocksdb/rocksdb/util/async_file_reader.h +3 -3
  163. package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -1
  164. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
  165. package/deps/rocksdb/rocksdb/util/hash_test.cc +67 -0
  166. package/deps/rocksdb/rocksdb/util/math.h +41 -0
  167. package/deps/rocksdb/rocksdb/util/math128.h +6 -0
  168. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +2 -1
  169. package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
  170. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
  171. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +3 -6
  172. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
  173. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
  174. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +5 -0
  175. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +6 -0
  176. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
  177. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
  178. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  179. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
  180. package/index.js +17 -8
  181. package/package.json +1 -1
  182. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  183. package/prebuilds/darwin-x64/node.napi.node +0 -0
  184. package/prebuilds/linux-x64/node.napi.node +0 -0
  185. package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
@@ -17,6 +17,7 @@
17
17
  #include "db/merge_context.h"
18
18
  #include "db/merge_helper.h"
19
19
  #include "db/pinned_iterators_manager.h"
20
+ #include "db/wide/wide_column_serialization.h"
20
21
  #include "file/filename.h"
21
22
  #include "logging/logging.h"
22
23
  #include "memory/arena.h"
@@ -71,9 +72,11 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
71
72
  read_options.total_order_seek ||
72
73
  read_options.auto_prefix_mode),
73
74
  read_tier_(read_options.read_tier),
75
+ fill_cache_(read_options.fill_cache),
74
76
  verify_checksums_(read_options.verify_checksums),
75
77
  expose_blob_index_(expose_blob_index),
76
78
  is_blob_(false),
79
+ is_wide_(false),
77
80
  arena_mode_(arena_mode),
78
81
  range_del_agg_(&ioptions.internal_comparator, s),
79
82
  db_impl_(db_impl),
@@ -131,6 +134,8 @@ void DBIter::Next() {
131
134
  PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
132
135
  // Release temporarily pinned blocks from last operation
133
136
  ReleaseTempPinnedData();
137
+ ResetBlobValue();
138
+ ResetWideColumnValue();
134
139
  local_stats_.skip_count_ += num_internal_keys_skipped_;
135
140
  local_stats_.skip_count_--;
136
141
  num_internal_keys_skipped_ = 0;
@@ -173,6 +178,9 @@ void DBIter::Next() {
173
178
  bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
174
179
  const Slice& blob_index) {
175
180
  assert(!is_blob_);
181
+ assert(blob_value_.empty());
182
+ assert(!is_wide_);
183
+ assert(value_of_default_column_.empty());
176
184
 
177
185
  if (expose_blob_index_) { // Stacked BlobDB implementation
178
186
  is_blob_ = true;
@@ -189,6 +197,7 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
189
197
  // avoid having to copy options back and forth.
190
198
  ReadOptions read_options;
191
199
  read_options.read_tier = read_tier_;
200
+ read_options.fill_cache = fill_cache_;
192
201
  read_options.verify_checksums = verify_checksums_;
193
202
 
194
203
  constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
@@ -207,13 +216,25 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
207
216
  return true;
208
217
  }
209
218
 
210
- bool DBIter::SetWideColumnValueIfNeeded(const Slice& /* wide_columns_slice */) {
219
+ bool DBIter::SetWideColumnValueIfNeeded(const Slice& wide_columns_slice) {
211
220
  assert(!is_blob_);
221
+ assert(blob_value_.empty());
222
+ assert(!is_wide_);
223
+ assert(value_of_default_column_.empty());
212
224
 
213
- // TODO: support wide-column entities
214
- status_ = Status::NotSupported("Encountered unexpected wide-column entity");
215
- valid_ = false;
216
- return false;
225
+ Slice wide_columns_copy = wide_columns_slice;
226
+
227
+ const Status s = WideColumnSerialization::GetValueOfDefaultColumn(
228
+ wide_columns_copy, value_of_default_column_);
229
+
230
+ if (!s.ok()) {
231
+ status_ = s;
232
+ valid_ = false;
233
+ return false;
234
+ }
235
+
236
+ is_wide_ = true;
237
+ return true;
217
238
  }
218
239
 
219
240
  // PRE: saved_key_ has the current user key if skipping_saved_key
@@ -262,7 +283,10 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
262
283
  // to one.
263
284
  bool reseek_done = false;
264
285
 
265
- is_blob_ = false;
286
+ assert(!is_blob_);
287
+ assert(blob_value_.empty());
288
+ assert(!is_wide_);
289
+ assert(value_of_default_column_.empty());
266
290
 
267
291
  do {
268
292
  // Will update is_key_seqnum_zero_ as soon as we parsed the current key
@@ -590,7 +614,11 @@ bool DBIter::MergeValuesNewToOld() {
590
614
  if (!s.ok()) {
591
615
  return false;
592
616
  }
593
- is_blob_ = false;
617
+
618
+ ResetBlobValue();
619
+ assert(!is_wide_);
620
+ assert(value_of_default_column_.empty());
621
+
594
622
  // iter_ is positioned after put
595
623
  iter_.Next();
596
624
  if (!iter_.status().ok()) {
@@ -636,6 +664,8 @@ void DBIter::Prev() {
636
664
 
637
665
  PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
638
666
  ReleaseTempPinnedData();
667
+ ResetBlobValue();
668
+ ResetWideColumnValue();
639
669
  ResetInternalKeysSkippedCounter();
640
670
  bool ok = true;
641
671
  if (direction_ == kForward) {
@@ -965,7 +995,12 @@ bool DBIter::FindValueForCurrentKey() {
965
995
 
966
996
  Status s;
967
997
  s.PermitUncheckedError();
968
- is_blob_ = false;
998
+
999
+ assert(!is_blob_);
1000
+ assert(blob_value_.empty());
1001
+ assert(!is_wide_);
1002
+ assert(value_of_default_column_.empty());
1003
+
969
1004
  switch (last_key_entry_type) {
970
1005
  case kTypeDeletion:
971
1006
  case kTypeDeletionWithTimestamp:
@@ -1004,7 +1039,11 @@ bool DBIter::FindValueForCurrentKey() {
1004
1039
  if (!s.ok()) {
1005
1040
  return false;
1006
1041
  }
1007
- is_blob_ = false;
1042
+
1043
+ ResetBlobValue();
1044
+ assert(!is_wide_);
1045
+ assert(value_of_default_column_.empty());
1046
+
1008
1047
  return true;
1009
1048
  } else if (last_not_merge_type == kTypeWideColumnEntity) {
1010
1049
  // TODO: support wide-column entities
@@ -1079,7 +1118,12 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1079
1118
  // In case read_callback presents, the value we seek to may not be visible.
1080
1119
  // Find the next value that's visible.
1081
1120
  ParsedInternalKey ikey;
1082
- is_blob_ = false;
1121
+
1122
+ assert(!is_blob_);
1123
+ assert(blob_value_.empty());
1124
+ assert(!is_wide_);
1125
+ assert(value_of_default_column_.empty());
1126
+
1083
1127
  while (true) {
1084
1128
  if (!iter_.Valid()) {
1085
1129
  valid_ = false;
@@ -1214,7 +1258,11 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1214
1258
  if (!s.ok()) {
1215
1259
  return false;
1216
1260
  }
1217
- is_blob_ = false;
1261
+
1262
+ ResetBlobValue();
1263
+ assert(!is_wide_);
1264
+ assert(value_of_default_column_.empty());
1265
+
1218
1266
  return true;
1219
1267
  } else if (ikey.type == kTypeWideColumnEntity) {
1220
1268
  // TODO: support wide-column entities
@@ -1439,6 +1487,8 @@ void DBIter::Seek(const Slice& target) {
1439
1487
 
1440
1488
  status_ = Status::OK();
1441
1489
  ReleaseTempPinnedData();
1490
+ ResetBlobValue();
1491
+ ResetWideColumnValue();
1442
1492
  ResetInternalKeysSkippedCounter();
1443
1493
 
1444
1494
  // Seek the inner iterator based on the target key.
@@ -1515,6 +1565,8 @@ void DBIter::SeekForPrev(const Slice& target) {
1515
1565
 
1516
1566
  status_ = Status::OK();
1517
1567
  ReleaseTempPinnedData();
1568
+ ResetBlobValue();
1569
+ ResetWideColumnValue();
1518
1570
  ResetInternalKeysSkippedCounter();
1519
1571
 
1520
1572
  // Seek the inner iterator based on the target key.
@@ -1572,6 +1624,8 @@ void DBIter::SeekToFirst() {
1572
1624
  status_ = Status::OK();
1573
1625
  direction_ = kForward;
1574
1626
  ReleaseTempPinnedData();
1627
+ ResetBlobValue();
1628
+ ResetWideColumnValue();
1575
1629
  ResetInternalKeysSkippedCounter();
1576
1630
  ClearSavedValue();
1577
1631
  is_key_seqnum_zero_ = false;
@@ -1619,6 +1673,8 @@ void DBIter::SeekToLast() {
1619
1673
  *iterate_upper_bound_, /*a_has_ts=*/false, k,
1620
1674
  /*b_has_ts=*/false)) {
1621
1675
  ReleaseTempPinnedData();
1676
+ ResetBlobValue();
1677
+ ResetWideColumnValue();
1622
1678
  PrevInternal(nullptr);
1623
1679
 
1624
1680
  k = key();
@@ -1638,6 +1694,8 @@ void DBIter::SeekToLast() {
1638
1694
  status_ = Status::OK();
1639
1695
  direction_ = kReverse;
1640
1696
  ReleaseTempPinnedData();
1697
+ ResetBlobValue();
1698
+ ResetWideColumnValue();
1641
1699
  ResetInternalKeysSkippedCounter();
1642
1700
  ClearSavedValue();
1643
1701
  is_key_seqnum_zero_ = false;
@@ -160,9 +160,12 @@ class DBIter final : public Iterator {
160
160
  }
161
161
  Slice value() const override {
162
162
  assert(valid_);
163
+ assert(!is_blob_ || !is_wide_);
163
164
 
164
165
  if (!expose_blob_index_ && is_blob_) {
165
166
  return blob_value_;
167
+ } else if (is_wide_) {
168
+ return value_of_default_column_;
166
169
  } else if (current_entry_is_merged_) {
167
170
  // If pinned_value_ is set then the result of merge operator is one of
168
171
  // the merge operands and we should return it.
@@ -300,8 +303,18 @@ class DBIter final : public Iterator {
300
303
  // index when using the integrated BlobDB implementation.
301
304
  bool SetBlobValueIfNeeded(const Slice& user_key, const Slice& blob_index);
302
305
 
306
+ void ResetBlobValue() {
307
+ is_blob_ = false;
308
+ blob_value_.Reset();
309
+ }
310
+
303
311
  bool SetWideColumnValueIfNeeded(const Slice& wide_columns_slice);
304
312
 
313
+ void ResetWideColumnValue() {
314
+ is_wide_ = false;
315
+ value_of_default_column_.clear();
316
+ }
317
+
305
318
  Status Merge(const Slice* val, const Slice& user_key);
306
319
 
307
320
  const SliceTransform* prefix_extractor_;
@@ -326,6 +339,7 @@ class DBIter final : public Iterator {
326
339
  Slice pinned_value_;
327
340
  // for prefix seek mode to support prev()
328
341
  PinnableSlice blob_value_;
342
+ Slice value_of_default_column_;
329
343
  Statistics* statistics_;
330
344
  uint64_t max_skip_;
331
345
  uint64_t max_skippable_internal_keys_;
@@ -356,11 +370,13 @@ class DBIter final : public Iterator {
356
370
  // prefix_extractor_ must be non-NULL if the value is false.
357
371
  const bool expect_total_order_inner_iter_;
358
372
  ReadTier read_tier_;
373
+ bool fill_cache_;
359
374
  bool verify_checksums_;
360
375
  // Whether the iterator is allowed to expose blob references. Set to true when
361
376
  // the stacked BlobDB implementation is used, false otherwise.
362
377
  bool expose_blob_index_;
363
378
  bool is_blob_;
379
+ bool is_wide_;
364
380
  bool arena_mode_;
365
381
  // List of operands for merge operator.
366
382
  MergeContext merge_context_;
@@ -13,9 +13,9 @@ enum class WriteBatchOpType {
13
13
  kPut = 0,
14
14
  kDelete,
15
15
  kSingleDelete,
16
- kDeleteRange,
17
16
  kMerge,
18
17
  kPutEntity,
18
+ kDeleteRange,
19
19
  kNum,
20
20
  };
21
21
 
@@ -26,11 +26,14 @@ WriteBatchOpType operator+(WriteBatchOpType lhs, const int rhs) {
26
26
  }
27
27
 
28
28
  enum class WriteMode {
29
+ // `Write()` a `WriteBatch` constructed with `protection_bytes_per_key = 0`
30
+ // and `WriteOptions::protection_bytes_per_key = 0`
31
+ kWriteUnprotectedBatch = 0,
29
32
  // `Write()` a `WriteBatch` constructed with `protection_bytes_per_key > 0`.
30
- kWriteProtectedBatch = 0,
33
+ kWriteProtectedBatch,
31
34
  // `Write()` a `WriteBatch` constructed with `protection_bytes_per_key == 0`.
32
35
  // Protection is enabled via `WriteOptions::protection_bytes_per_key > 0`.
33
- kWriteUnprotectedBatch,
36
+ kWriteOptionProtectedBatch,
34
37
  // TODO(ajkr): add a mode that uses `Write()` wrappers, e.g., `Put()`.
35
38
  kNum,
36
39
  };
@@ -89,19 +92,30 @@ class DbKvChecksumTestBase : public DBTestBase {
89
92
  }
90
93
  };
91
94
 
92
- class DbKvChecksumTest : public DbKvChecksumTestBase,
93
- public ::testing::WithParamInterface<
94
- std::tuple<WriteBatchOpType, char, WriteMode>> {
95
+ class DbKvChecksumTest
96
+ : public DbKvChecksumTestBase,
97
+ public ::testing::WithParamInterface<
98
+ std::tuple<WriteBatchOpType, char, WriteMode,
99
+ uint32_t /* memtable_protection_bytes_per_key */>> {
95
100
  public:
96
101
  DbKvChecksumTest()
97
102
  : DbKvChecksumTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {
98
103
  op_type_ = std::get<0>(GetParam());
99
104
  corrupt_byte_addend_ = std::get<1>(GetParam());
100
105
  write_mode_ = std::get<2>(GetParam());
106
+ memtable_protection_bytes_per_key_ = std::get<3>(GetParam());
101
107
  }
102
108
 
103
109
  Status ExecuteWrite(ColumnFamilyHandle* cf_handle) {
104
110
  switch (write_mode_) {
111
+ case WriteMode::kWriteUnprotectedBatch: {
112
+ auto batch_and_status =
113
+ GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_),
114
+ 0 /* protection_bytes_per_key */, op_type_);
115
+ assert(batch_and_status.second.ok());
116
+ // Default write option has protection_bytes_per_key = 0
117
+ return db_->Write(WriteOptions(), &batch_and_status.first);
118
+ }
105
119
  case WriteMode::kWriteProtectedBatch: {
106
120
  auto batch_and_status =
107
121
  GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_),
@@ -109,7 +123,7 @@ class DbKvChecksumTest : public DbKvChecksumTestBase,
109
123
  assert(batch_and_status.second.ok());
110
124
  return db_->Write(WriteOptions(), &batch_and_status.first);
111
125
  }
112
- case WriteMode::kWriteUnprotectedBatch: {
126
+ case WriteMode::kWriteOptionProtectedBatch: {
113
127
  auto batch_and_status =
114
128
  GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_),
115
129
  0 /* protection_bytes_per_key */, op_type_);
@@ -131,8 +145,6 @@ class DbKvChecksumTest : public DbKvChecksumTestBase,
131
145
  // We learn the entry size on the first attempt
132
146
  entry_len_ = encoded.size();
133
147
  }
134
- // All entries should be the same size
135
- assert(entry_len_ == encoded.size());
136
148
  char* buf = const_cast<char*>(encoded.data());
137
149
  buf[corrupt_byte_offset_] += corrupt_byte_addend_;
138
150
  ++corrupt_byte_offset_;
@@ -144,6 +156,7 @@ class DbKvChecksumTest : public DbKvChecksumTestBase,
144
156
  WriteBatchOpType op_type_;
145
157
  char corrupt_byte_addend_;
146
158
  WriteMode write_mode_;
159
+ uint32_t memtable_protection_bytes_per_key_;
147
160
  size_t corrupt_byte_offset_ = 0;
148
161
  size_t entry_len_ = std::numeric_limits<size_t>::max();
149
162
  };
@@ -169,29 +182,36 @@ std::string GetOpTypeString(const WriteBatchOpType& op_type) {
169
182
  return "";
170
183
  }
171
184
 
185
+ std::string GetWriteModeString(const WriteMode& mode) {
186
+ switch (mode) {
187
+ case WriteMode::kWriteUnprotectedBatch:
188
+ return "WriteUnprotectedBatch";
189
+ case WriteMode::kWriteProtectedBatch:
190
+ return "WriteProtectedBatch";
191
+ case WriteMode::kWriteOptionProtectedBatch:
192
+ return "kWriteOptionProtectedBatch";
193
+ case WriteMode::kNum:
194
+ assert(false);
195
+ }
196
+ return "";
197
+ }
198
+
172
199
  INSTANTIATE_TEST_CASE_P(
173
200
  DbKvChecksumTest, DbKvChecksumTest,
174
201
  ::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
175
202
  WriteBatchOpType::kNum),
176
203
  ::testing::Values(2, 103, 251),
177
- ::testing::Range(static_cast<WriteMode>(0),
178
- WriteMode::kNum)),
204
+ ::testing::Range(WriteMode::kWriteProtectedBatch,
205
+ WriteMode::kNum),
206
+ ::testing::Values(0)),
179
207
  [](const testing::TestParamInfo<
180
- std::tuple<WriteBatchOpType, char, WriteMode>>& args) {
208
+ std::tuple<WriteBatchOpType, char, WriteMode, uint32_t>>& args) {
181
209
  std::ostringstream oss;
182
210
  oss << GetOpTypeString(std::get<0>(args.param)) << "Add"
183
211
  << static_cast<int>(
184
- static_cast<unsigned char>(std::get<1>(args.param)));
185
- switch (std::get<2>(args.param)) {
186
- case WriteMode::kWriteProtectedBatch:
187
- oss << "WriteProtectedBatch";
188
- break;
189
- case WriteMode::kWriteUnprotectedBatch:
190
- oss << "WriteUnprotectedBatch";
191
- break;
192
- case WriteMode::kNum:
193
- assert(false);
194
- }
212
+ static_cast<unsigned char>(std::get<1>(args.param)))
213
+ << GetWriteModeString(std::get<2>(args.param))
214
+ << static_cast<uint32_t>(std::get<3>(args.param));
195
215
  return oss.str();
196
216
  });
197
217
 
@@ -660,6 +680,202 @@ TEST_F(DbKVChecksumWALToWriteBatchTest, WriteBatchChecksumHandoff) {
660
680
  SyncPoint::GetInstance()->DisableProcessing();
661
681
  };
662
682
 
683
+ // TODO (cbi): add DeleteRange coverage once it is implemented
684
+ class DbMemtableKVChecksumTest : public DbKvChecksumTest {
685
+ public:
686
+ DbMemtableKVChecksumTest() : DbKvChecksumTest() {}
687
+
688
+ protected:
689
+ // Indices in the memtable entry that we will not corrupt.
690
+ // For memtable entry format, see comments in MemTable::Add().
691
+ // We do not corrupt key length and value length fields in this test
692
+ // case since it causes segfault and ASAN will complain.
693
+ // For this test case, key and value are all of length 3, so
694
+ // key length field is at index 0 and value length field is at index 12.
695
+ const std::set<size_t> index_not_to_corrupt{0, 12};
696
+
697
+ void SkipNotToCorruptEntry() {
698
+ if (index_not_to_corrupt.find(corrupt_byte_offset_) !=
699
+ index_not_to_corrupt.end()) {
700
+ corrupt_byte_offset_++;
701
+ }
702
+ }
703
+ };
704
+
705
+ INSTANTIATE_TEST_CASE_P(
706
+ DbMemtableKVChecksumTest, DbMemtableKVChecksumTest,
707
+ ::testing::Combine(::testing::Range(static_cast<WriteBatchOpType>(0),
708
+ WriteBatchOpType::kDeleteRange),
709
+ ::testing::Values(2, 103, 251),
710
+ ::testing::Range(static_cast<WriteMode>(0),
711
+ WriteMode::kWriteOptionProtectedBatch),
712
+ // skip 1 byte checksum as it makes test flaky
713
+ ::testing::Values(2, 4, 8)),
714
+ [](const testing::TestParamInfo<
715
+ std::tuple<WriteBatchOpType, char, WriteMode, uint32_t>>& args) {
716
+ std::ostringstream oss;
717
+ oss << GetOpTypeString(std::get<0>(args.param)) << "Add"
718
+ << static_cast<int>(
719
+ static_cast<unsigned char>(std::get<1>(args.param)))
720
+ << GetWriteModeString(std::get<2>(args.param))
721
+ << static_cast<uint32_t>(std::get<3>(args.param));
722
+ return oss.str();
723
+ });
724
+
725
+ TEST_P(DbMemtableKVChecksumTest, GetWithCorruptAfterMemtableInsert) {
726
+ // Record memtable entry size.
727
+ // Not corrupting memtable entry here since it will segfault
728
+ // or fail some asserts inside memtablerep implementation
729
+ // e.g., when key_len is corrupted.
730
+ SyncPoint::GetInstance()->SetCallBack(
731
+ "MemTable::Add:BeforeReturn:Encoded", [&](void* arg) {
732
+ Slice encoded = *static_cast<Slice*>(arg);
733
+ entry_len_ = encoded.size();
734
+ });
735
+
736
+ SyncPoint::GetInstance()->SetCallBack(
737
+ "Memtable::SaveValue:Begin:entry", [&](void* entry) {
738
+ char* buf = *static_cast<char**>(entry);
739
+ buf[corrupt_byte_offset_] += corrupt_byte_addend_;
740
+ ++corrupt_byte_offset_;
741
+ });
742
+ SyncPoint::GetInstance()->EnableProcessing();
743
+ Options options = CurrentOptions();
744
+ options.memtable_protection_bytes_per_key =
745
+ memtable_protection_bytes_per_key_;
746
+ if (op_type_ == WriteBatchOpType::kMerge) {
747
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
748
+ }
749
+
750
+ SkipNotToCorruptEntry();
751
+ while (MoreBytesToCorrupt()) {
752
+ Reopen(options);
753
+ ASSERT_OK(ExecuteWrite(nullptr));
754
+ std::string val;
755
+ ASSERT_TRUE(db_->Get(ReadOptions(), "key", &val).IsCorruption());
756
+ Destroy(options);
757
+ SkipNotToCorruptEntry();
758
+ }
759
+ }
760
+
761
+ TEST_P(DbMemtableKVChecksumTest,
762
+ GetWithColumnFamilyCorruptAfterMemtableInsert) {
763
+ // Record memtable entry size.
764
+ // Not corrupting memtable entry here since it will segfault
765
+ // or fail some asserts inside memtablerep implementation
766
+ // e.g., when key_len is corrupted.
767
+ SyncPoint::GetInstance()->SetCallBack(
768
+ "MemTable::Add:BeforeReturn:Encoded", [&](void* arg) {
769
+ Slice encoded = *static_cast<Slice*>(arg);
770
+ entry_len_ = encoded.size();
771
+ });
772
+
773
+ SyncPoint::GetInstance()->SetCallBack(
774
+ "Memtable::SaveValue:Begin:entry", [&](void* entry) {
775
+ char* buf = *static_cast<char**>(entry);
776
+ buf[corrupt_byte_offset_] += corrupt_byte_addend_;
777
+ ++corrupt_byte_offset_;
778
+ });
779
+ SyncPoint::GetInstance()->EnableProcessing();
780
+ Options options = CurrentOptions();
781
+ options.memtable_protection_bytes_per_key =
782
+ memtable_protection_bytes_per_key_;
783
+ if (op_type_ == WriteBatchOpType::kMerge) {
784
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
785
+ }
786
+
787
+ SkipNotToCorruptEntry();
788
+ while (MoreBytesToCorrupt()) {
789
+ Reopen(options);
790
+ CreateAndReopenWithCF({"pikachu"}, options);
791
+ ASSERT_OK(ExecuteWrite(handles_[1]));
792
+ std::string val;
793
+ ASSERT_TRUE(
794
+ db_->Get(ReadOptions(), handles_[1], "key", &val).IsCorruption());
795
+ Destroy(options);
796
+ SkipNotToCorruptEntry();
797
+ }
798
+ }
799
+
800
+ TEST_P(DbMemtableKVChecksumTest, IteratorWithCorruptAfterMemtableInsert) {
801
+ SyncPoint::GetInstance()->SetCallBack(
802
+ "MemTable::Add:BeforeReturn:Encoded",
803
+ std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
804
+ std::placeholders::_1));
805
+ SyncPoint::GetInstance()->EnableProcessing();
806
+ Options options = CurrentOptions();
807
+ options.memtable_protection_bytes_per_key =
808
+ memtable_protection_bytes_per_key_;
809
+ if (op_type_ == WriteBatchOpType::kMerge) {
810
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
811
+ }
812
+
813
+ SkipNotToCorruptEntry();
814
+ while (MoreBytesToCorrupt()) {
815
+ Reopen(options);
816
+ ASSERT_OK(ExecuteWrite(nullptr));
817
+ Iterator* it = db_->NewIterator(ReadOptions());
818
+ it->SeekToFirst();
819
+ ASSERT_FALSE(it->Valid());
820
+ ASSERT_TRUE(it->status().IsCorruption());
821
+ delete it;
822
+ Destroy(options);
823
+ SkipNotToCorruptEntry();
824
+ }
825
+ }
826
+
827
+ TEST_P(DbMemtableKVChecksumTest,
828
+ IteratorWithColumnFamilyCorruptAfterMemtableInsert) {
829
+ SyncPoint::GetInstance()->SetCallBack(
830
+ "MemTable::Add:BeforeReturn:Encoded",
831
+ std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
832
+ std::placeholders::_1));
833
+ SyncPoint::GetInstance()->EnableProcessing();
834
+ Options options = CurrentOptions();
835
+ options.memtable_protection_bytes_per_key =
836
+ memtable_protection_bytes_per_key_;
837
+ if (op_type_ == WriteBatchOpType::kMerge) {
838
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
839
+ }
840
+
841
+ SkipNotToCorruptEntry();
842
+ while (MoreBytesToCorrupt()) {
843
+ Reopen(options);
844
+ CreateAndReopenWithCF({"pikachu"}, options);
845
+ ASSERT_OK(ExecuteWrite(handles_[1]));
846
+ Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]);
847
+ it->SeekToFirst();
848
+ ASSERT_FALSE(it->Valid());
849
+ ASSERT_TRUE(it->status().IsCorruption());
850
+ delete it;
851
+ Destroy(options);
852
+ SkipNotToCorruptEntry();
853
+ }
854
+ }
855
+
856
+ TEST_P(DbMemtableKVChecksumTest, FlushWithCorruptAfterMemtableInsert) {
857
+ SyncPoint::GetInstance()->SetCallBack(
858
+ "MemTable::Add:BeforeReturn:Encoded",
859
+ std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this,
860
+ std::placeholders::_1));
861
+ SyncPoint::GetInstance()->EnableProcessing();
862
+ Options options = CurrentOptions();
863
+ options.memtable_protection_bytes_per_key =
864
+ memtable_protection_bytes_per_key_;
865
+ if (op_type_ == WriteBatchOpType::kMerge) {
866
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
867
+ }
868
+
869
+ SkipNotToCorruptEntry();
870
+ // Not corruping each byte like other tests since Flush() is relatively slow.
871
+ Reopen(options);
872
+ ASSERT_OK(ExecuteWrite(nullptr));
873
+ ASSERT_TRUE(Flush().IsCorruption());
874
+ // DB enters read-only state when flush reads corrupted data
875
+ ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption());
876
+ Destroy(options);
877
+ }
878
+
663
879
  } // namespace ROCKSDB_NAMESPACE
664
880
 
665
881
  int main(int argc, char** argv) {
@@ -263,7 +263,8 @@ TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
263
263
  SequenceNumber max_covering_tombstone_seq = 0;
264
264
  LookupKey lkey("key", kMaxSequenceNumber);
265
265
  bool res = mem->Get(lkey, &value, /*timestamp=*/nullptr, &status,
266
- &merge_context, &max_covering_tombstone_seq, roptions);
266
+ &merge_context, &max_covering_tombstone_seq, roptions,
267
+ false /* immutable_memtable */);
267
268
  ASSERT_OK(status);
268
269
  ASSERT_TRUE(res);
269
270
  uint64_t ivalue = DecodeFixed64(Slice(value).data());
@@ -397,6 +397,48 @@ TEST_F(DBMergeOperandTest, BlobDBGetMergeOperandsBasic) {
397
397
  ASSERT_EQ(values[3], "ed");
398
398
  }
399
399
 
400
+ TEST_F(DBMergeOperandTest, GetMergeOperandsLargeResultOptimization) {
401
+ // These constants are chosen to trigger the large result optimization
402
+ // (pinning a bundle of `DBImpl` resources).
403
+ const int kNumOperands = 1024;
404
+ const int kOperandLen = 1024;
405
+
406
+ Options options;
407
+ options.create_if_missing = true;
408
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
409
+ DestroyAndReopen(options);
410
+
411
+ Random rnd(301);
412
+ std::vector<std::string> expected_merge_operands;
413
+ expected_merge_operands.reserve(kNumOperands);
414
+ for (int i = 0; i < kNumOperands; ++i) {
415
+ expected_merge_operands.emplace_back(rnd.RandomString(kOperandLen));
416
+ ASSERT_OK(Merge("key", expected_merge_operands.back()));
417
+ }
418
+
419
+ std::vector<PinnableSlice> merge_operands(kNumOperands);
420
+ GetMergeOperandsOptions merge_operands_info;
421
+ merge_operands_info.expected_max_number_of_operands = kNumOperands;
422
+ int num_merge_operands = 0;
423
+ ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(),
424
+ "key", merge_operands.data(),
425
+ &merge_operands_info, &num_merge_operands));
426
+ ASSERT_EQ(num_merge_operands, kNumOperands);
427
+
428
+ // Ensures the large result optimization was used.
429
+ for (int i = 0; i < kNumOperands; ++i) {
430
+ ASSERT_TRUE(merge_operands[i].IsPinned());
431
+ }
432
+
433
+ // Add a Flush() to change the `SuperVersion` to challenge the resource
434
+ // pinning.
435
+ ASSERT_OK(Flush());
436
+
437
+ for (int i = 0; i < kNumOperands; ++i) {
438
+ ASSERT_EQ(expected_merge_operands[i], merge_operands[i]);
439
+ }
440
+ }
441
+
400
442
  } // namespace ROCKSDB_NAMESPACE
401
443
 
402
444
  int main(int argc, char** argv) {