@nxtedition/rocksdb 13.1.5 → 13.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/binding.cc +37 -12
  2. package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  7. package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
  8. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
  9. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
  11. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  12. package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
  13. package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
  14. package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
  15. package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
  16. package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
  39. package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
  54. package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
  55. package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
  56. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
  57. package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
  58. package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
  59. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
  60. package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
  61. package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
  62. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
  65. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
  67. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
  69. package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
  70. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
  72. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
  76. package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
  77. package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
  78. package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
  79. package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
  80. package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
  81. package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
  82. package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
  83. package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
  84. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
  85. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
  86. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
  87. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
  88. package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
  89. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
  90. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
  91. package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
  92. package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
  93. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
  94. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  95. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
  96. package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
  98. package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
  99. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
  100. package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
  101. package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
  102. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
  103. package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
  104. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
  105. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
  106. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
  107. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
  108. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
  109. package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
  110. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
  111. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
  113. package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
  114. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
  115. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
  118. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
  119. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
  120. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
  121. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
  122. package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
  123. package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
  124. package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
  125. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  126. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
  127. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
  128. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
  129. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
  130. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  131. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  132. package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
  135. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
  137. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
  139. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
  140. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
  142. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
  144. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
  147. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
  148. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
  150. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
  152. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  153. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
  154. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
  155. package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
  156. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
  157. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
  158. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
  159. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
  160. package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
  161. package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
  162. package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
  163. package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
  164. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
  165. package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
  166. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
  167. package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
  168. package/deps/rocksdb/rocksdb/port/port.h +5 -9
  169. package/deps/rocksdb/rocksdb/src.mk +8 -0
  170. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
  171. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
  172. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
  174. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
  175. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
  177. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
  178. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
  181. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
  182. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  183. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
  184. package/deps/rocksdb/rocksdb/table/format.cc +3 -3
  185. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
  186. package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
  187. package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
  188. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
  189. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  190. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
  191. package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
  192. package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
  193. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
  194. package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
  196. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
  197. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
  198. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  199. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
  200. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
  201. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
  202. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
  203. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
  204. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
  205. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
  206. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
  207. package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
  208. package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
  209. package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
  210. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
  211. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
  212. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
  213. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
  214. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
  215. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
  216. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
  217. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
  218. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
  219. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
  220. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
  221. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
  222. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
  223. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
  224. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
  225. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
  226. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
  227. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
  228. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
  229. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
  230. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
  231. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
  232. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
  233. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
  234. package/deps/rocksdb/rocksdb.gyp +2 -0
  235. package/package.json +1 -1
  236. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  237. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -31,13 +31,17 @@ class InternalKeyComparator;
31
31
  class Mutex;
32
32
  class VersionSet;
33
33
 
34
- void MemTableListVersion::AddMemTable(MemTable* m) {
34
+ void MemTableListVersion::AddMemTable(ReadOnlyMemTable* m) {
35
+ if (!memlist_.empty()) {
36
+ // ID can be equal for MemPurge
37
+ assert(m->GetID() >= memlist_.front()->GetID());
38
+ }
35
39
  memlist_.push_front(m);
36
40
  *parent_memtable_list_memory_usage_ += m->ApproximateMemoryUsage();
37
41
  }
38
42
 
39
- void MemTableListVersion::UnrefMemTable(autovector<MemTable*>* to_delete,
40
- MemTable* m) {
43
+ void MemTableListVersion::UnrefMemTable(
44
+ autovector<ReadOnlyMemTable*>* to_delete, ReadOnlyMemTable* m) {
41
45
  if (m->Unref()) {
42
46
  to_delete->push_back(m);
43
47
  assert(*parent_memtable_list_memory_usage_ >= m->ApproximateMemoryUsage());
@@ -74,7 +78,7 @@ MemTableListVersion::MemTableListVersion(
74
78
  void MemTableListVersion::Ref() { ++refs_; }
75
79
 
76
80
  // called by superversion::clean()
77
- void MemTableListVersion::Unref(autovector<MemTable*>* to_delete) {
81
+ void MemTableListVersion::Unref(autovector<ReadOnlyMemTable*>* to_delete) {
78
82
  assert(refs_ >= 1);
79
83
  --refs_;
80
84
  if (refs_ == 0) {
@@ -92,14 +96,12 @@ void MemTableListVersion::Unref(autovector<MemTable*>* to_delete) {
92
96
  }
93
97
 
94
98
  int MemTableList::NumNotFlushed() const {
95
- int size = static_cast<int>(current_->memlist_.size());
99
+ int size = current_->NumNotFlushed();
96
100
  assert(num_flush_not_started_ <= size);
97
101
  return size;
98
102
  }
99
103
 
100
- int MemTableList::NumFlushed() const {
101
- return static_cast<int>(current_->memlist_history_.size());
102
- }
104
+ int MemTableList::NumFlushed() const { return current_->NumFlushed(); }
103
105
 
104
106
  // Search all the memtables starting from the most recent one.
105
107
  // Return the most recent value found, if any.
@@ -131,7 +133,7 @@ void MemTableListVersion::MultiGet(const ReadOptions& read_options,
131
133
  bool MemTableListVersion::GetMergeOperands(
132
134
  const LookupKey& key, Status* s, MergeContext* merge_context,
133
135
  SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts) {
134
- for (MemTable* memtable : memlist_) {
136
+ for (ReadOnlyMemTable* memtable : memlist_) {
135
137
  bool done = memtable->Get(
136
138
  key, /*value=*/nullptr, /*columns=*/nullptr, /*timestamp=*/nullptr, s,
137
139
  merge_context, max_covering_tombstone_seq, read_opts,
@@ -154,11 +156,11 @@ bool MemTableListVersion::GetFromHistory(
154
156
  }
155
157
 
156
158
  bool MemTableListVersion::GetFromList(
157
- std::list<MemTable*>* list, const LookupKey& key, std::string* value,
158
- PinnableWideColumns* columns, std::string* timestamp, Status* s,
159
- MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq,
160
- SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback,
161
- bool* is_blob_index) {
159
+ std::list<ReadOnlyMemTable*>* list, const LookupKey& key,
160
+ std::string* value, PinnableWideColumns* columns, std::string* timestamp,
161
+ Status* s, MergeContext* merge_context,
162
+ SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq,
163
+ const ReadOptions& read_opts, ReadCallback* callback, bool* is_blob_index) {
162
164
  *seq = kMaxSequenceNumber;
163
165
 
164
166
  for (auto& memtable : *list) {
@@ -218,7 +220,8 @@ void MemTableListVersion::AddIterators(
218
220
  std::vector<InternalIterator*>* iterator_list, Arena* arena) {
219
221
  for (auto& m : memlist_) {
220
222
  iterator_list->push_back(m->NewIterator(options, seqno_to_time_mapping,
221
- arena, prefix_extractor));
223
+ arena, prefix_extractor,
224
+ /*for_flush=*/false));
222
225
  }
223
226
  }
224
227
 
@@ -230,7 +233,8 @@ void MemTableListVersion::AddIterators(
230
233
  for (auto& m : memlist_) {
231
234
  auto mem_iter =
232
235
  m->NewIterator(options, seqno_to_time_mapping,
233
- merge_iter_builder->GetArena(), prefix_extractor);
236
+ merge_iter_builder->GetArena(), prefix_extractor,
237
+ /*for_flush=*/false);
234
238
  if (!add_range_tombstone_iter || options.ignore_range_deletions) {
235
239
  merge_iter_builder->AddIterator(mem_iter);
236
240
  } else {
@@ -259,14 +263,14 @@ void MemTableListVersion::AddIterators(
259
263
  uint64_t MemTableListVersion::GetTotalNumEntries() const {
260
264
  uint64_t total_num = 0;
261
265
  for (auto& m : memlist_) {
262
- total_num += m->num_entries();
266
+ total_num += m->NumEntries();
263
267
  }
264
268
  return total_num;
265
269
  }
266
270
 
267
- MemTable::MemTableStats MemTableListVersion::ApproximateStats(
268
- const Slice& start_ikey, const Slice& end_ikey) {
269
- MemTable::MemTableStats total_stats = {0, 0};
271
+ ReadOnlyMemTable::MemTableStats MemTableListVersion::ApproximateStats(
272
+ const Slice& start_ikey, const Slice& end_ikey) const {
273
+ ReadOnlyMemTable::MemTableStats total_stats = {0, 0};
270
274
  for (auto& m : memlist_) {
271
275
  auto mStats = m->ApproximateStats(start_ikey, end_ikey);
272
276
  total_stats.size += mStats.size;
@@ -278,7 +282,7 @@ MemTable::MemTableStats MemTableListVersion::ApproximateStats(
278
282
  uint64_t MemTableListVersion::GetTotalNumDeletes() const {
279
283
  uint64_t total_num = 0;
280
284
  for (auto& m : memlist_) {
281
- total_num += m->num_deletes();
285
+ total_num += m->NumDeletion();
282
286
  }
283
287
  return total_num;
284
288
  }
@@ -304,7 +308,8 @@ SequenceNumber MemTableListVersion::GetFirstSequenceNumber() const {
304
308
  }
305
309
 
306
310
  // caller is responsible for referencing m
307
- void MemTableListVersion::Add(MemTable* m, autovector<MemTable*>* to_delete) {
311
+ void MemTableListVersion::Add(ReadOnlyMemTable* m,
312
+ autovector<ReadOnlyMemTable*>* to_delete) {
308
313
  assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable
309
314
  AddMemTable(m);
310
315
  // m->MemoryAllocatedBytes() is added in MemoryAllocatedBytesExcludingLast
@@ -312,8 +317,8 @@ void MemTableListVersion::Add(MemTable* m, autovector<MemTable*>* to_delete) {
312
317
  }
313
318
 
314
319
  // Removes m from list of memtables not flushed. Caller should NOT Unref m.
315
- void MemTableListVersion::Remove(MemTable* m,
316
- autovector<MemTable*>* to_delete) {
320
+ void MemTableListVersion::Remove(ReadOnlyMemTable* m,
321
+ autovector<ReadOnlyMemTable*>* to_delete) {
317
322
  assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable
318
323
  memlist_.remove(m);
319
324
 
@@ -359,12 +364,16 @@ bool MemTableListVersion::MemtableLimitExceeded(size_t usage) {
359
364
  }
360
365
  }
361
366
 
367
+ bool MemTableListVersion::HistoryShouldBeTrimmed(size_t usage) {
368
+ return MemtableLimitExceeded(usage) && !memlist_history_.empty();
369
+ }
370
+
362
371
  // Make sure we don't use up too much space in history
363
- bool MemTableListVersion::TrimHistory(autovector<MemTable*>* to_delete,
372
+ bool MemTableListVersion::TrimHistory(autovector<ReadOnlyMemTable*>* to_delete,
364
373
  size_t usage) {
365
374
  bool ret = false;
366
- while (MemtableLimitExceeded(usage) && !memlist_history_.empty()) {
367
- MemTable* x = memlist_history_.back();
375
+ while (HistoryShouldBeTrimmed(usage)) {
376
+ ReadOnlyMemTable* x = memlist_history_.back();
368
377
  memlist_history_.pop_back();
369
378
 
370
379
  UnrefMemTable(to_delete, x);
@@ -394,7 +403,7 @@ bool MemTableList::IsFlushPendingOrRunning() const {
394
403
 
395
404
  // Returns the memtables that need to be flushed.
396
405
  void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
397
- autovector<MemTable*>* ret,
406
+ autovector<ReadOnlyMemTable*>* ret,
398
407
  uint64_t* max_next_log_number) {
399
408
  AutoThreadOperationStageUpdater stage_updater(
400
409
  ThreadStatus::STAGE_PICK_MEMTABLES_TO_FLUSH);
@@ -407,8 +416,9 @@ void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
407
416
  // ret is filled with memtables already sorted in increasing MemTable ID.
408
417
  // However, when the mempurge feature is activated, new memtables with older
409
418
  // IDs will be added to the memlist.
410
- for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
411
- MemTable* m = *it;
419
+ auto it = memlist.rbegin();
420
+ for (; it != memlist.rend(); ++it) {
421
+ ReadOnlyMemTable* m = *it;
412
422
  if (!atomic_flush && m->atomic_flush_seqno_ != kMaxSequenceNumber) {
413
423
  atomic_flush = true;
414
424
  }
@@ -436,25 +446,32 @@ void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
436
446
  break;
437
447
  }
438
448
  }
449
+ if (!ret->empty() && it != memlist.rend()) {
450
+ // checks that the first memtable not picked to flush is not ingested wbwi.
451
+ // Ingested memtable should be flushed together with the memtable before it
452
+ // since they map to the same WAL and have the same NextLogNumber().
453
+ assert(strcmp((*it)->Name(), "WBWIMemTable") != 0);
454
+ }
439
455
  if (!atomic_flush || num_flush_not_started_ == 0) {
440
456
  flush_requested_ = false; // start-flush request is complete
441
457
  }
442
458
  }
443
459
 
444
- void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
445
- bool rollback_succeeding_memtables) {
460
+ void MemTableList::RollbackMemtableFlush(
461
+ const autovector<ReadOnlyMemTable*>& mems,
462
+ bool rollback_succeeding_memtables) {
446
463
  TEST_SYNC_POINT("RollbackMemtableFlush");
447
464
  AutoThreadOperationStageUpdater stage_updater(
448
465
  ThreadStatus::STAGE_MEMTABLE_ROLLBACK);
449
466
  #ifndef NDEBUG
450
- for (MemTable* m : mems) {
467
+ for (ReadOnlyMemTable* m : mems) {
451
468
  assert(m->flush_in_progress_);
452
469
  assert(m->file_number_ == 0);
453
470
  }
454
471
  #endif
455
472
 
456
473
  if (rollback_succeeding_memtables && !mems.empty()) {
457
- std::list<MemTable*>& memlist = current_->memlist_;
474
+ std::list<ReadOnlyMemTable*>& memlist = current_->memlist_;
458
475
  auto it = memlist.rbegin();
459
476
  for (; *it != mems[0] && it != memlist.rend(); ++it) {
460
477
  }
@@ -464,7 +481,7 @@ void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
464
481
  ++it;
465
482
  }
466
483
  while (it != memlist.rend()) {
467
- MemTable* m = *it;
484
+ ReadOnlyMemTable* m = *it;
468
485
  // Only rollback complete, not in-progress,
469
486
  // in_progress can be flushes that are still writing SSTs
470
487
  if (m->flush_completed_) {
@@ -480,7 +497,7 @@ void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
480
497
  }
481
498
  }
482
499
 
483
- for (MemTable* m : mems) {
500
+ for (ReadOnlyMemTable* m : mems) {
484
501
  if (m->flush_in_progress_) {
485
502
  assert(m->file_number_ == 0);
486
503
  m->file_number_ = 0;
@@ -499,10 +516,10 @@ void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
499
516
  // Status::OK letting a concurrent flush to do actual the recording..
500
517
  Status MemTableList::TryInstallMemtableFlushResults(
501
518
  ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
502
- const autovector<MemTable*>& mems, LogsWithPrepTracker* prep_tracker,
503
- VersionSet* vset, InstrumentedMutex* mu, uint64_t file_number,
504
- autovector<MemTable*>* to_delete, FSDirectory* db_directory,
505
- LogBuffer* log_buffer,
519
+ const autovector<ReadOnlyMemTable*>& mems,
520
+ LogsWithPrepTracker* prep_tracker, VersionSet* vset, InstrumentedMutex* mu,
521
+ uint64_t file_number, autovector<ReadOnlyMemTable*>* to_delete,
522
+ FSDirectory* db_directory, LogBuffer* log_buffer,
506
523
  std::list<std::unique_ptr<FlushJobInfo>>* committed_flush_jobs_info,
507
524
  bool write_edits) {
508
525
  AutoThreadOperationStageUpdater stage_updater(
@@ -549,16 +566,16 @@ Status MemTableList::TryInstallMemtableFlushResults(
549
566
  // (in that order) that have finished flushing. Memtables
550
567
  // are always committed in the order that they were created.
551
568
  uint64_t batch_file_number = 0;
552
- size_t batch_count = 0;
553
569
  autovector<VersionEdit*> edit_list;
554
- autovector<MemTable*> memtables_to_flush;
570
+ autovector<ReadOnlyMemTable*> memtables_to_flush;
555
571
  // enumerate from the last (earliest) element to see how many batch finished
556
572
  for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
557
- MemTable* m = *it;
573
+ ReadOnlyMemTable* m = *it;
558
574
  if (!m->flush_completed_) {
559
575
  break;
560
576
  }
561
577
  if (it == memlist.rbegin() || batch_file_number != m->file_number_) {
578
+ // Oldest memtable in a new batch.
562
579
  batch_file_number = m->file_number_;
563
580
  if (m->edit_.GetBlobFileAdditions().empty()) {
564
581
  ROCKS_LOG_BUFFER(log_buffer,
@@ -574,17 +591,17 @@ Status MemTableList::TryInstallMemtableFlushResults(
574
591
  }
575
592
 
576
593
  edit_list.push_back(&m->edit_);
577
- memtables_to_flush.push_back(m);
578
594
  std::unique_ptr<FlushJobInfo> info = m->ReleaseFlushJobInfo();
579
595
  if (info != nullptr) {
580
596
  committed_flush_jobs_info->push_back(std::move(info));
581
597
  }
582
598
  }
583
- batch_count++;
599
+ memtables_to_flush.push_back(m);
584
600
  }
585
601
 
602
+ size_t num_mem_to_flush = memtables_to_flush.size();
586
603
  // TODO(myabandeh): Not sure how batch_count could be 0 here.
587
- if (batch_count > 0) {
604
+ if (num_mem_to_flush > 0) {
588
605
  VersionEdit edit;
589
606
  #ifdef ROCKSDB_ASSERT_STATUS_CHECKED
590
607
  if (memtables_to_flush.size() == memlist.size()) {
@@ -608,9 +625,9 @@ Status MemTableList::TryInstallMemtableFlushResults(
608
625
  nullptr);
609
626
  edit_list.push_back(&edit);
610
627
 
611
- const auto manifest_write_cb = [this, cfd, batch_count, log_buffer,
628
+ const auto manifest_write_cb = [this, cfd, num_mem_to_flush, log_buffer,
612
629
  to_delete, mu](const Status& status) {
613
- RemoveMemTablesOrRestoreFlags(status, cfd, batch_count, log_buffer,
630
+ RemoveMemTablesOrRestoreFlags(status, cfd, num_mem_to_flush, log_buffer,
614
631
  to_delete, mu);
615
632
  };
616
633
  if (write_edits) {
@@ -623,7 +640,7 @@ Status MemTableList::TryInstallMemtableFlushResults(
623
640
  // If write_edit is false (e.g: successful mempurge),
624
641
  // then remove old memtables, wake up manifest write queue threads,
625
642
  // and don't commit anything to the manifest file.
626
- RemoveMemTablesOrRestoreFlags(s, cfd, batch_count, log_buffer,
643
+ RemoveMemTablesOrRestoreFlags(s, cfd, num_mem_to_flush, log_buffer,
627
644
  to_delete, mu);
628
645
  // Note: cfd->SetLogNumber is only called when a VersionEdit
629
646
  // is written to MANIFEST. When mempurge is succesful, we skip
@@ -642,7 +659,8 @@ Status MemTableList::TryInstallMemtableFlushResults(
642
659
  }
643
660
 
644
661
  // New memtables are inserted at the front of the list.
645
- void MemTableList::Add(MemTable* m, autovector<MemTable*>* to_delete) {
662
+ void MemTableList::Add(ReadOnlyMemTable* m,
663
+ autovector<ReadOnlyMemTable*>* to_delete) {
646
664
  assert(static_cast<int>(current_->memlist_.size()) >= num_flush_not_started_);
647
665
  InstallNewVersion();
648
666
  // this method is used to move mutable memtable into an immutable list.
@@ -660,9 +678,18 @@ void MemTableList::Add(MemTable* m, autovector<MemTable*>* to_delete) {
660
678
  ResetTrimHistoryNeeded();
661
679
  }
662
680
 
663
- bool MemTableList::TrimHistory(autovector<MemTable*>* to_delete, size_t usage) {
681
+ bool MemTableList::TrimHistory(autovector<ReadOnlyMemTable*>* to_delete,
682
+ size_t usage) {
683
+ // Check if history trim is needed first, so that we can avoid installing a
684
+ // new MemTableListVersion without installing a SuperVersion (installed based
685
+ // on return value of this function).
686
+ if (!current_->HistoryShouldBeTrimmed(usage)) {
687
+ ResetTrimHistoryNeeded();
688
+ return false;
689
+ }
664
690
  InstallNewVersion();
665
691
  bool ret = current_->TrimHistory(to_delete, usage);
692
+ assert(ret);
666
693
  UpdateCachedValuesFromMemTableListVersion();
667
694
  ResetTrimHistoryNeeded();
668
695
  return ret;
@@ -714,14 +741,15 @@ void MemTableList::InstallNewVersion() {
714
741
  // somebody else holds the current version, we need to create new one
715
742
  MemTableListVersion* version = current_;
716
743
  current_ = new MemTableListVersion(&current_memory_usage_, *version);
744
+ current_->SetID(++last_memtable_list_version_id_);
717
745
  current_->Ref();
718
746
  version->Unref();
719
747
  }
720
748
  }
721
749
 
722
750
  void MemTableList::RemoveMemTablesOrRestoreFlags(
723
- const Status& s, ColumnFamilyData* cfd, size_t batch_count,
724
- LogBuffer* log_buffer, autovector<MemTable*>* to_delete,
751
+ const Status& s, ColumnFamilyData* cfd, size_t num_mem_to_flush,
752
+ LogBuffer* log_buffer, autovector<ReadOnlyMemTable*>* to_delete,
725
753
  InstrumentedMutex* mu) {
726
754
  assert(mu);
727
755
  mu->AssertHeld();
@@ -749,8 +777,11 @@ void MemTableList::RemoveMemTablesOrRestoreFlags(
749
777
  // read full data as long as column family handle is not deleted, even if
750
778
  // the column family is dropped.
751
779
  if (s.ok() && !cfd->IsDropped()) { // commit new state
752
- while (batch_count-- > 0) {
753
- MemTable* m = current_->memlist_.back();
780
+ while (num_mem_to_flush-- > 0) {
781
+ ReadOnlyMemTable* m = current_->memlist_.back();
782
+ // TODO: The logging can be redundant when we flush multiple memtables
783
+ // into one SST file. We should only check the edit_ of the oldest
784
+ // memtable in the group in that case.
754
785
  if (m->edit_.GetBlobFileAdditions().empty()) {
755
786
  ROCKS_LOG_BUFFER(log_buffer,
756
787
  "[%s] Level-0 commit flush result of table #%" PRIu64
@@ -772,8 +803,8 @@ void MemTableList::RemoveMemTablesOrRestoreFlags(
772
803
  ++mem_id;
773
804
  }
774
805
  } else {
775
- for (auto it = current_->memlist_.rbegin(); batch_count-- > 0; ++it) {
776
- MemTable* m = *it;
806
+ for (auto it = current_->memlist_.rbegin(); num_mem_to_flush-- > 0; ++it) {
807
+ ReadOnlyMemTable* m = *it;
777
808
  // commit failed. setup state so that we can flush again.
778
809
  if (m->edit_.GetBlobFileAdditions().empty()) {
779
810
  ROCKS_LOG_BUFFER(log_buffer,
@@ -801,7 +832,7 @@ void MemTableList::RemoveMemTablesOrRestoreFlags(
801
832
  }
802
833
 
803
834
  uint64_t MemTableList::PrecomputeMinLogContainingPrepSection(
804
- const std::unordered_set<MemTable*>* memtables_to_flush) {
835
+ const std::unordered_set<ReadOnlyMemTable*>* memtables_to_flush) const {
805
836
  uint64_t min_log = 0;
806
837
 
807
838
  for (auto& m : current_->memlist_) {
@@ -824,12 +855,12 @@ Status InstallMemtableAtomicFlushResults(
824
855
  const autovector<MemTableList*>* imm_lists,
825
856
  const autovector<ColumnFamilyData*>& cfds,
826
857
  const autovector<const MutableCFOptions*>& mutable_cf_options_list,
827
- const autovector<const autovector<MemTable*>*>& mems_list, VersionSet* vset,
828
- LogsWithPrepTracker* prep_tracker, InstrumentedMutex* mu,
858
+ const autovector<const autovector<ReadOnlyMemTable*>*>& mems_list,
859
+ VersionSet* vset, LogsWithPrepTracker* prep_tracker, InstrumentedMutex* mu,
829
860
  const autovector<FileMetaData*>& file_metas,
830
861
  const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
831
862
  committed_flush_jobs_info,
832
- autovector<MemTable*>* to_delete, FSDirectory* db_directory,
863
+ autovector<ReadOnlyMemTable*>* to_delete, FSDirectory* db_directory,
833
864
  LogBuffer* log_buffer) {
834
865
  AutoThreadOperationStageUpdater stage_updater(
835
866
  ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS);
@@ -993,14 +1024,14 @@ Status InstallMemtableAtomicFlushResults(
993
1024
  return s;
994
1025
  }
995
1026
 
996
- void MemTableList::RemoveOldMemTables(uint64_t log_number,
997
- autovector<MemTable*>* to_delete) {
1027
+ void MemTableList::RemoveOldMemTables(
1028
+ uint64_t log_number, autovector<ReadOnlyMemTable*>* to_delete) {
998
1029
  assert(to_delete != nullptr);
999
1030
  InstallNewVersion();
1000
1031
  auto& memlist = current_->memlist_;
1001
- autovector<MemTable*> old_memtables;
1032
+ autovector<ReadOnlyMemTable*> old_memtables;
1002
1033
  for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
1003
- MemTable* mem = *it;
1034
+ ReadOnlyMemTable* mem = *it;
1004
1035
  if (mem->GetNextLogNumber() > log_number) {
1005
1036
  break;
1006
1037
  }
@@ -1008,7 +1039,7 @@ void MemTableList::RemoveOldMemTables(uint64_t log_number,
1008
1039
  }
1009
1040
 
1010
1041
  for (auto it = old_memtables.begin(); it != old_memtables.end(); ++it) {
1011
- MemTable* mem = *it;
1042
+ ReadOnlyMemTable* mem = *it;
1012
1043
  current_->Remove(mem, to_delete);
1013
1044
  --num_flush_not_started_;
1014
1045
  if (0 == num_flush_not_started_) {
@@ -1031,9 +1062,9 @@ VersionEdit MemTableList::GetEditForDroppingCurrentVersion(
1031
1062
 
1032
1063
  uint64_t max_next_log_number = 0;
1033
1064
  autovector<VersionEdit*> edit_list;
1034
- autovector<MemTable*> memtables_to_drop;
1065
+ autovector<ReadOnlyMemTable*> memtables_to_drop;
1035
1066
  for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
1036
- MemTable* m = *it;
1067
+ ReadOnlyMemTable* m = *it;
1037
1068
  memtables_to_drop.push_back(m);
1038
1069
  max_next_log_number = std::max(m->GetNextLogNumber(), max_next_log_number);
1039
1070
  }