@nxtedition/rocksdb 13.1.4 → 13.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/binding.cc +43 -16
  2. package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  7. package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
  8. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
  9. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
  11. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  12. package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
  13. package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
  14. package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
  15. package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
  16. package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
  39. package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
  54. package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
  55. package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
  56. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
  57. package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
  58. package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
  59. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
  60. package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
  61. package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
  62. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
  65. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
  67. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
  69. package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
  70. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
  72. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
  76. package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
  77. package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
  78. package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
  79. package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
  80. package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
  81. package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
  82. package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
  83. package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
  84. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
  85. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
  86. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
  87. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
  88. package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
  89. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
  90. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
  91. package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
  92. package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
  93. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
  94. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  95. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
  96. package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
  98. package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
  99. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
  100. package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
  101. package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
  102. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
  103. package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
  104. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
  105. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
  106. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
  107. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
  108. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
  109. package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
  110. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
  111. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
  113. package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
  114. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
  115. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
  118. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
  119. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
  120. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
  121. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
  122. package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
  123. package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
  124. package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
  125. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  126. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
  127. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
  128. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
  129. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
  130. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  131. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  132. package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
  135. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
  137. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
  139. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
  140. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
  142. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
  144. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
  147. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
  148. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
  150. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
  152. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  153. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
  154. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
  155. package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
  156. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
  157. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
  158. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
  159. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
  160. package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
  161. package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
  162. package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
  163. package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
  164. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
  165. package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
  166. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
  167. package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
  168. package/deps/rocksdb/rocksdb/port/port.h +5 -9
  169. package/deps/rocksdb/rocksdb/src.mk +8 -0
  170. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
  171. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
  172. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
  174. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
  175. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
  177. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
  178. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
  181. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
  182. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  183. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
  184. package/deps/rocksdb/rocksdb/table/format.cc +3 -3
  185. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
  186. package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
  187. package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
  188. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
  189. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  190. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
  191. package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
  192. package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
  193. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
  194. package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
  196. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
  197. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
  198. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  199. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
  200. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
  201. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
  202. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
  203. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
  204. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
  205. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
  206. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
  207. package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
  208. package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
  209. package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
  210. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
  211. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
  212. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
  213. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
  214. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
  215. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
  216. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
  217. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
  218. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
  219. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
  220. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
  221. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
  222. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
  223. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
  224. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
  225. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
  226. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
  227. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
  228. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
  229. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
  230. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
  231. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
  232. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
  233. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
  234. package/deps/rocksdb/rocksdb.gyp +2 -0
  235. package/package.json +1 -1
  236. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  237. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -3,6 +3,8 @@
3
3
  // COPYING file in the root directory) and Apache 2.0 License
4
4
  // (found in the LICENSE.Apache file in the root directory).
5
5
 
6
+ #include <table/block_based/block_based_table_factory.h>
7
+
6
8
  #include <functional>
7
9
  #include <memory>
8
10
 
@@ -150,7 +152,7 @@ class ExternalSSTFileTest
150
152
  bool verify_checksums_before_ingest = true, bool ingest_behind = false,
151
153
  bool sort_data = false,
152
154
  std::map<std::string, std::string>* true_data = nullptr,
153
- ColumnFamilyHandle* cfh = nullptr) {
155
+ ColumnFamilyHandle* cfh = nullptr, bool fill_cache = false) {
154
156
  // Generate a file id if not provided
155
157
  if (file_id == -1) {
156
158
  file_id = last_file_id_ + 1;
@@ -194,6 +196,7 @@ class ExternalSSTFileTest
194
196
  ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false;
195
197
  ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
196
198
  ifo.ingest_behind = ingest_behind;
199
+ ifo.fill_cache = fill_cache;
197
200
  if (cfh) {
198
201
  s = db_->IngestExternalFile(cfh, {file_path}, ifo);
199
202
  } else {
@@ -267,15 +270,15 @@ class ExternalSSTFileTest
267
270
  bool verify_checksums_before_ingest = true, bool ingest_behind = false,
268
271
  bool sort_data = false,
269
272
  std::map<std::string, std::string>* true_data = nullptr,
270
- ColumnFamilyHandle* cfh = nullptr) {
273
+ ColumnFamilyHandle* cfh = nullptr, bool fill_cache = false) {
271
274
  std::vector<std::pair<std::string, std::string>> file_data;
272
275
  for (auto& k : keys) {
273
276
  file_data.emplace_back(Key(k), Key(k) + std::to_string(file_id));
274
277
  }
275
- return GenerateAndAddExternalFile(options, file_data, file_id,
276
- allow_global_seqno, write_global_seqno,
277
- verify_checksums_before_ingest,
278
- ingest_behind, sort_data, true_data, cfh);
278
+ return GenerateAndAddExternalFile(
279
+ options, file_data, file_id, allow_global_seqno, write_global_seqno,
280
+ verify_checksums_before_ingest, ingest_behind, sort_data, true_data,
281
+ cfh, fill_cache);
279
282
  }
280
283
 
281
284
  Status DeprecatedAddFile(const std::vector<std::string>& files,
@@ -314,6 +317,49 @@ TEST_F(ExternalSSTFileTest, ComparatorMismatch) {
314
317
  ASSERT_NOK(DeprecatedAddFile({file}));
315
318
  }
316
319
 
320
+ TEST_F(ExternalSSTFileTest, NoBlockCache) {
321
+ LRUCacheOptions co;
322
+ co.capacity = 32 << 20;
323
+ std::shared_ptr<Cache> cache = NewLRUCache(co);
324
+ BlockBasedTableOptions table_options;
325
+ table_options.block_cache = cache;
326
+ table_options.filter_policy.reset(NewBloomFilterPolicy(10));
327
+ table_options.cache_index_and_filter_blocks = true;
328
+ Options options = CurrentOptions();
329
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
330
+ Reopen(options);
331
+
332
+ size_t usage_before_ingestion = cache->GetUsage();
333
+ std::map<std::string, std::string> true_data;
334
+ // Ingest with fill_cache = true
335
+ ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, false, false, true,
336
+ false, false, &true_data, nullptr,
337
+ /*fill_cache=*/true));
338
+ ASSERT_EQ(FilesPerLevel(), "0,0,0,0,0,0,1");
339
+ EXPECT_GT(cache->GetUsage(), usage_before_ingestion);
340
+
341
+ TablePropertiesCollection tp;
342
+ ASSERT_OK(db_->GetPropertiesOfAllTables(&tp));
343
+ for (const auto& entry : tp) {
344
+ EXPECT_GT(entry.second->index_size, 0);
345
+ EXPECT_GT(entry.second->filter_size, 0);
346
+ }
347
+
348
+ usage_before_ingestion = cache->GetUsage();
349
+ // Ingest with fill_cache = false
350
+ ASSERT_OK(GenerateAndAddExternalFile(options, {3, 4}, -1, false, false, true,
351
+ false, false, &true_data, nullptr,
352
+ /*fill_cache=*/false));
353
+ EXPECT_EQ(usage_before_ingestion, cache->GetUsage());
354
+
355
+ tp.clear();
356
+ ASSERT_OK(db_->GetPropertiesOfAllTables(&tp));
357
+ for (const auto& entry : tp) {
358
+ EXPECT_GT(entry.second->index_size, 0);
359
+ EXPECT_GT(entry.second->filter_size, 0);
360
+ }
361
+ }
362
+
317
363
  TEST_F(ExternalSSTFileTest, Basic) {
318
364
  do {
319
365
  Options options = CurrentOptions();
@@ -1941,9 +1987,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedUniversal) {
1941
1987
  options, file_data, -1, true, write_global_seqno,
1942
1988
  verify_checksums_before_ingest, false, false, &true_data));
1943
1989
 
1944
- // This file overlap with files in L4, we will ingest it into the last
1945
- // non-overlapping and non-empty level, in this case, it's L0.
1946
- ASSERT_EQ("3,0,0,0,3", FilesPerLevel());
1990
+ // This file overlap with files in L4, we will ingest it into the closest
1991
+ // non-overlapping level, in this case, it's L3.
1992
+ ASSERT_EQ("2,0,0,1,3", FilesPerLevel());
1947
1993
 
1948
1994
  size_t kcnt = 0;
1949
1995
  VerifyDBFromMap(true_data, &kcnt, false);
@@ -157,7 +157,7 @@ void FlushJob::ReportStartedFlush() {
157
157
  IOSTATS_RESET(bytes_written);
158
158
  }
159
159
 
160
- void FlushJob::ReportFlushInputSize(const autovector<MemTable*>& mems) {
160
+ void FlushJob::ReportFlushInputSize(const autovector<ReadOnlyMemTable*>& mems) {
161
161
  uint64_t input_size = 0;
162
162
  for (auto* mem : mems) {
163
163
  input_size += mem->ApproximateMemoryUsage();
@@ -204,7 +204,7 @@ void FlushJob::PickMemTable() {
204
204
  // entries mems are (implicitly) sorted in ascending order by their created
205
205
  // time. We will use the first memtable's `edit` to keep the meta info for
206
206
  // this flush.
207
- MemTable* m = mems_[0];
207
+ ReadOnlyMemTable* m = mems_[0];
208
208
  edit_ = m->GetEdits();
209
209
  edit_->SetPrevLogNumber(0);
210
210
  // SetLogNumber(log_num) indicates logs with number smaller than log_num
@@ -420,9 +420,10 @@ Status FlushJob::MemPurge() {
420
420
  std::vector<InternalIterator*> memtables;
421
421
  std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
422
422
  range_del_iters;
423
- for (MemTable* m : mems_) {
423
+ for (ReadOnlyMemTable* m : mems_) {
424
424
  memtables.push_back(m->NewIterator(ro, /*seqno_to_time_mapping=*/nullptr,
425
- &arena, /*prefix_extractor=*/nullptr));
425
+ &arena, /*prefix_extractor=*/nullptr,
426
+ /*for_flush=*/true));
426
427
  auto* range_del_iter = m->NewRangeTombstoneIterator(
427
428
  ro, kMaxSequenceNumber, true /* immutable_memtable */);
428
429
  if (range_del_iter != nullptr) {
@@ -624,10 +625,13 @@ Status FlushJob::MemPurge() {
624
625
  // Construct fragmented memtable range tombstones without mutex
625
626
  new_mem->ConstructFragmentedRangeTombstones();
626
627
  db_mutex_->Lock();
627
- uint64_t new_mem_id = mems_[0]->GetID();
628
+ // Take the newest id, so that memtables in MemtableList don't have
629
+ // out-of-order memtable ids.
630
+ uint64_t new_mem_id = mems_.back()->GetID();
628
631
 
629
632
  new_mem->SetID(new_mem_id);
630
- new_mem->SetNextLogNumber(mems_[0]->GetNextLogNumber());
633
+ // Take the latest memtable's next log number.
634
+ new_mem->SetNextLogNumber(mems_.back()->GetNextLogNumber());
631
635
 
632
636
  // This addition will not trigger another flush, because
633
637
  // we do not call EnqueuePendingFlush().
@@ -713,11 +717,11 @@ bool FlushJob::MemPurgeDecider(double threshold) {
713
717
 
714
718
  // Iterate over each memtable of the set.
715
719
  for (auto mem_iter = std::begin(mems_); mem_iter != std::end(mems_);
716
- mem_iter++) {
717
- MemTable* mt = *mem_iter;
720
+ ++mem_iter) {
721
+ ReadOnlyMemTable* mt = *mem_iter;
718
722
 
719
723
  // Else sample from the table.
720
- uint64_t nentries = mt->num_entries();
724
+ uint64_t nentries = mt->NumEntries();
721
725
  // Corrected Cochran formula for small populations
722
726
  // (converges to n0 for large populations).
723
727
  uint64_t target_sample_size =
@@ -858,6 +862,12 @@ Status FlushJob::WriteLevel0Table() {
858
862
  meta_.temperature = mutable_cf_options_.default_write_temperature;
859
863
  file_options_.temperature = meta_.temperature;
860
864
 
865
+ const auto* ucmp = cfd_->internal_comparator().user_comparator();
866
+ assert(ucmp);
867
+ const size_t ts_sz = ucmp->timestamp_size();
868
+ const bool logical_strip_timestamp =
869
+ ts_sz > 0 && !cfd_->ioptions()->persist_user_defined_timestamps;
870
+
861
871
  std::vector<BlobFileAddition> blob_file_additions;
862
872
 
863
873
  {
@@ -888,23 +898,35 @@ Status FlushJob::WriteLevel0Table() {
888
898
  TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:num_memtables",
889
899
  &mems_size);
890
900
  assert(job_context_);
891
- for (MemTable* m : mems_) {
892
- ROCKS_LOG_INFO(
893
- db_options_.info_log,
894
- "[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n",
895
- cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber());
896
- memtables.push_back(m->NewIterator(ro, /*seqno_to_time_mapping=*/nullptr,
897
- &arena, /*prefix_extractor=*/nullptr));
898
- auto* range_del_iter = m->NewRangeTombstoneIterator(
899
- ro, kMaxSequenceNumber, true /* immutable_memtable */);
901
+ for (ReadOnlyMemTable* m : mems_) {
902
+ ROCKS_LOG_INFO(db_options_.info_log,
903
+ "[%s] [JOB %d] Flushing memtable id %" PRIu64
904
+ " with next log file: %" PRIu64 "\n",
905
+ cfd_->GetName().c_str(), job_context_->job_id, m->GetID(),
906
+ m->GetNextLogNumber());
907
+ if (logical_strip_timestamp) {
908
+ memtables.push_back(m->NewTimestampStrippingIterator(
909
+ ro, /*seqno_to_time_mapping=*/nullptr, &arena,
910
+ /*prefix_extractor=*/nullptr, ts_sz));
911
+ } else {
912
+ memtables.push_back(
913
+ m->NewIterator(ro, /*seqno_to_time_mapping=*/nullptr, &arena,
914
+ /*prefix_extractor=*/nullptr, /*for_flush=*/true));
915
+ }
916
+ auto* range_del_iter =
917
+ logical_strip_timestamp
918
+ ? m->NewTimestampStrippingRangeTombstoneIterator(
919
+ ro, kMaxSequenceNumber, ts_sz)
920
+ : m->NewRangeTombstoneIterator(ro, kMaxSequenceNumber,
921
+ true /* immutable_memtable */);
900
922
  if (range_del_iter != nullptr) {
901
923
  range_del_iters.emplace_back(range_del_iter);
902
924
  }
903
- total_num_entries += m->num_entries();
904
- total_num_deletes += m->num_deletes();
905
- total_data_size += m->get_data_size();
925
+ total_num_entries += m->NumEntries();
926
+ total_num_deletes += m->NumDeletion();
927
+ total_data_size += m->GetDataSize();
906
928
  total_memory_usage += m->ApproximateMemoryUsage();
907
- total_num_range_deletes += m->num_range_deletes();
929
+ total_num_range_deletes += m->NumRangeDeletion();
908
930
  }
909
931
 
910
932
  // TODO(cbi): when memtable is flushed due to number of range deletions
@@ -970,9 +992,10 @@ Status FlushJob::WriteLevel0Table() {
970
992
  cfd_->internal_comparator(), cfd_->internal_tbl_prop_coll_factories(),
971
993
  output_compression_, mutable_cf_options_.compression_opts,
972
994
  cfd_->GetID(), cfd_->GetName(), 0 /* level */,
973
- false /* is_bottommost */, TableFileCreationReason::kFlush,
974
- oldest_key_time, current_time, db_id_, db_session_id_,
975
- 0 /* target_file_size */, meta_.fd.GetNumber(),
995
+ current_time /* newest_key_time */, false /* is_bottommost */,
996
+ TableFileCreationReason::kFlush, oldest_key_time, current_time,
997
+ db_id_, db_session_id_, 0 /* target_file_size */,
998
+ meta_.fd.GetNumber(),
976
999
  preclude_last_level_min_seqno_ == kMaxSequenceNumber
977
1000
  ? preclude_last_level_min_seqno_
978
1001
  : std::min(earliest_snapshot_, preclude_last_level_min_seqno_));
@@ -1154,7 +1177,7 @@ void FlushJob::GetEffectiveCutoffUDTForPickedMemTables() {
1154
1177
  return;
1155
1178
  }
1156
1179
  // Find the newest user-defined timestamps from all the flushed memtables.
1157
- for (MemTable* m : mems_) {
1180
+ for (const ReadOnlyMemTable* m : mems_) {
1158
1181
  Slice table_newest_udt = m->GetNewestUDT();
1159
1182
  // Empty memtables can be legitimately created and flushed, for example
1160
1183
  // by error recovery flush attempts.
@@ -1172,7 +1195,7 @@ void FlushJob::GetEffectiveCutoffUDTForPickedMemTables() {
1172
1195
  }
1173
1196
 
1174
1197
  void FlushJob::GetPrecludeLastLevelMinSeqno() {
1175
- if (cfd_->ioptions()->preclude_last_level_data_seconds == 0) {
1198
+ if (mutable_cf_options_.preclude_last_level_data_seconds == 0) {
1176
1199
  return;
1177
1200
  }
1178
1201
  int64_t current_time = 0;
@@ -1185,8 +1208,8 @@ void FlushJob::GetPrecludeLastLevelMinSeqno() {
1185
1208
  SequenceNumber preserve_time_min_seqno;
1186
1209
  seqno_to_time_mapping_->GetCurrentTieringCutoffSeqnos(
1187
1210
  static_cast<uint64_t>(current_time),
1188
- cfd_->ioptions()->preserve_internal_time_seconds,
1189
- cfd_->ioptions()->preclude_last_level_data_seconds,
1211
+ mutable_cf_options_.preserve_internal_time_seconds,
1212
+ mutable_cf_options_.preclude_last_level_data_seconds,
1190
1213
  &preserve_time_min_seqno, &preclude_last_level_min_seqno_);
1191
1214
  }
1192
1215
  }
@@ -91,7 +91,7 @@ class FlushJob {
91
91
  bool* skipped_since_bg_error = nullptr,
92
92
  ErrorHandler* error_handler = nullptr);
93
93
  void Cancel();
94
- const autovector<MemTable*>& GetMemTables() const { return mems_; }
94
+ const autovector<ReadOnlyMemTable*>& GetMemTables() const { return mems_; }
95
95
 
96
96
  std::list<std::unique_ptr<FlushJobInfo>>* GetCommittedFlushJobsInfo() {
97
97
  return &committed_flush_jobs_info_;
@@ -101,7 +101,7 @@ class FlushJob {
101
101
  friend class FlushJobTest_GetRateLimiterPriorityForWrite_Test;
102
102
 
103
103
  void ReportStartedFlush();
104
- void ReportFlushInputSize(const autovector<MemTable*>& mems);
104
+ static void ReportFlushInputSize(const autovector<ReadOnlyMemTable*>& mems);
105
105
  void RecordFlushIOStats();
106
106
  Status WriteLevel0Table();
107
107
 
@@ -205,7 +205,9 @@ class FlushJob {
205
205
 
206
206
  // Variables below are set by PickMemTable():
207
207
  FileMetaData meta_;
208
- autovector<MemTable*> mems_;
208
+ // Memtables to be flushed by this job.
209
+ // Ordered by increasing memtable id, i.e., oldest memtable first.
210
+ autovector<ReadOnlyMemTable*> mems_;
209
211
  VersionEdit* edit_;
210
212
  Version* base_;
211
213
  bool pick_memtable_called;
@@ -264,7 +264,7 @@ TEST_F(FlushJobTest, NonEmpty) {
264
264
  }
265
265
  mock::SortKVVector(&inserted_keys);
266
266
 
267
- autovector<MemTable*> to_delete;
267
+ autovector<ReadOnlyMemTable*> to_delete;
268
268
  new_mem->ConstructFragmentedRangeTombstones();
269
269
  cfd->imm()->Add(new_mem, &to_delete);
270
270
  for (auto& m : to_delete) {
@@ -325,7 +325,7 @@ TEST_F(FlushJobTest, FlushMemTablesSingleColumnFamily) {
325
325
  }
326
326
  }
327
327
 
328
- autovector<MemTable*> to_delete;
328
+ autovector<ReadOnlyMemTable*> to_delete;
329
329
  for (auto mem : new_mems) {
330
330
  mem->ConstructFragmentedRangeTombstones();
331
331
  cfd->imm()->Add(mem, &to_delete);
@@ -380,7 +380,7 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
380
380
  std::vector<uint64_t> memtable_ids;
381
381
  std::vector<SequenceNumber> smallest_seqs;
382
382
  std::vector<SequenceNumber> largest_seqs;
383
- autovector<MemTable*> to_delete;
383
+ autovector<ReadOnlyMemTable*> to_delete;
384
384
  SequenceNumber curr_seqno = 0;
385
385
  size_t k = 0;
386
386
  for (auto cfd : all_cfds) {
@@ -439,7 +439,7 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
439
439
  for (auto& meta : file_metas) {
440
440
  file_meta_ptrs.push_back(&meta);
441
441
  }
442
- autovector<const autovector<MemTable*>*> mems_list;
442
+ autovector<const autovector<ReadOnlyMemTable*>*> mems_list;
443
443
  for (size_t i = 0; i != all_cfds.size(); ++i) {
444
444
  const auto& mems = flush_jobs[i]->GetMemTables();
445
445
  mems_list.push_back(&mems);
@@ -528,7 +528,7 @@ TEST_F(FlushJobTest, Snapshots) {
528
528
  }
529
529
  mock::SortKVVector(&inserted_keys);
530
530
 
531
- autovector<MemTable*> to_delete;
531
+ autovector<ReadOnlyMemTable*> to_delete;
532
532
  new_mem->ConstructFragmentedRangeTombstones();
533
533
  cfd->imm()->Add(new_mem, &to_delete);
534
534
  for (auto& m : to_delete) {
@@ -582,7 +582,7 @@ TEST_F(FlushJobTest, GetRateLimiterPriorityForWrite) {
582
582
  }
583
583
  }
584
584
 
585
- autovector<MemTable*> to_delete;
585
+ autovector<ReadOnlyMemTable*> to_delete;
586
586
  for (auto mem : new_mems) {
587
587
  mem->ConstructFragmentedRangeTombstones();
588
588
  cfd->imm()->Add(mem, &to_delete);
@@ -654,7 +654,7 @@ TEST_F(FlushJobTest, ReplaceTimedPutWriteTimeWithPreferredSeqno) {
654
654
  InternalKey largest_internal_key("foo", SequenceNumber(18), kTypeValue);
655
655
  inserted_entries.push_back(
656
656
  {largest_internal_key.Encode().ToString(), "fval"});
657
- autovector<MemTable*> to_delete;
657
+ autovector<ReadOnlyMemTable*> to_delete;
658
658
  new_mem->ConstructFragmentedRangeTombstones();
659
659
  cfd->imm()->Add(new_mem, &to_delete);
660
660
  for (auto& m : to_delete) {
@@ -744,7 +744,7 @@ class FlushJobTimestampTest
744
744
 
745
745
  TEST_P(FlushJobTimestampTest, AllKeysExpired) {
746
746
  ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
747
- autovector<MemTable*> to_delete;
747
+ autovector<ReadOnlyMemTable*> to_delete;
748
748
 
749
749
  {
750
750
  MemTable* new_mem = cfd->ConstructNewMemtable(
@@ -810,7 +810,7 @@ TEST_P(FlushJobTimestampTest, AllKeysExpired) {
810
810
 
811
811
  TEST_P(FlushJobTimestampTest, NoKeyExpired) {
812
812
  ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
813
- autovector<MemTable*> to_delete;
813
+ autovector<ReadOnlyMemTable*> to_delete;
814
814
 
815
815
  {
816
816
  MemTable* new_mem = cfd->ConstructNewMemtable(
@@ -874,9 +874,15 @@ TEST_P(FlushJobTimestampTest, NoKeyExpired) {
874
874
  expected_full_history_ts_low = full_history_ts_low;
875
875
  }
876
876
  InternalKey smallest(smallest_key, curr_seq_ - 1, ValueType::kTypeValue);
877
- InternalKey largest(largest_key, kStartSeq, ValueType::kTypeValue);
878
- CheckFileMetaData(cfd, smallest, largest, &fmeta);
879
- CheckFullHistoryTsLow(cfd, expected_full_history_ts_low);
877
+ if (!persist_udt_) {
878
+ InternalKey largest(largest_key, curr_seq_ - 1, ValueType::kTypeValue);
879
+ CheckFileMetaData(cfd, smallest, largest, &fmeta);
880
+ CheckFullHistoryTsLow(cfd, expected_full_history_ts_low);
881
+ } else {
882
+ InternalKey largest(largest_key, kStartSeq, ValueType::kTypeValue);
883
+ CheckFileMetaData(cfd, smallest, largest, &fmeta);
884
+ CheckFullHistoryTsLow(cfd, expected_full_history_ts_low);
885
+ }
880
886
  }
881
887
  job_context.Clean();
882
888
  ASSERT_TRUE(to_delete.empty());
@@ -32,11 +32,11 @@ namespace ROCKSDB_NAMESPACE {
32
32
  // iter.Next()
33
33
  class ForwardLevelIterator : public InternalIterator {
34
34
  public:
35
- ForwardLevelIterator(
36
- const ColumnFamilyData* const cfd, const ReadOptions& read_options,
37
- const std::vector<FileMetaData*>& files,
38
- const std::shared_ptr<const SliceTransform>& prefix_extractor,
39
- bool allow_unprepared_value, uint8_t block_protection_bytes_per_key)
35
+ ForwardLevelIterator(const ColumnFamilyData* const cfd,
36
+ const ReadOptions& read_options,
37
+ const std::vector<FileMetaData*>& files,
38
+ const MutableCFOptions& mutable_cf_options,
39
+ bool allow_unprepared_value)
40
40
  : cfd_(cfd),
41
41
  read_options_(read_options),
42
42
  files_(files),
@@ -44,9 +44,8 @@ class ForwardLevelIterator : public InternalIterator {
44
44
  file_index_(std::numeric_limits<uint32_t>::max()),
45
45
  file_iter_(nullptr),
46
46
  pinned_iters_mgr_(nullptr),
47
- prefix_extractor_(prefix_extractor),
48
- allow_unprepared_value_(allow_unprepared_value),
49
- block_protection_bytes_per_key_(block_protection_bytes_per_key) {
47
+ mutable_cf_options_(mutable_cf_options),
48
+ allow_unprepared_value_(allow_unprepared_value) {
50
49
  status_.PermitUncheckedError(); // Allow uninitialized status through
51
50
  }
52
51
 
@@ -83,13 +82,12 @@ class ForwardLevelIterator : public InternalIterator {
83
82
  read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
84
83
  *files_[file_index_],
85
84
  read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
86
- prefix_extractor_, /*table_reader_ptr=*/nullptr,
85
+ mutable_cf_options_, /*table_reader_ptr=*/nullptr,
87
86
  /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator,
88
87
  /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1,
89
88
  /*max_file_size_for_l0_meta_pin=*/0,
90
89
  /*smallest_compaction_key=*/nullptr,
91
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
92
- block_protection_bytes_per_key_);
90
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_);
93
91
  file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
94
92
  valid_ = false;
95
93
  if (!range_del_agg.IsEmpty()) {
@@ -214,10 +212,9 @@ class ForwardLevelIterator : public InternalIterator {
214
212
  Status status_;
215
213
  InternalIterator* file_iter_;
216
214
  PinnedIteratorsManager* pinned_iters_mgr_;
217
- // Kept alive by ForwardIterator::sv_->mutable_cf_options
218
- const std::shared_ptr<const SliceTransform>& prefix_extractor_;
215
+ const MutableCFOptions& mutable_cf_options_;
216
+
219
217
  const bool allow_unprepared_value_;
220
- const uint8_t block_protection_bytes_per_key_;
221
218
  };
222
219
 
223
220
  ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
@@ -717,7 +714,8 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
717
714
  sv_->GetSeqnoToTimeMapping();
718
715
  mutable_iter_ =
719
716
  sv_->mem->NewIterator(read_options_, seqno_to_time_mapping, &arena_,
720
- sv_->mutable_cf_options.prefix_extractor.get());
717
+ sv_->mutable_cf_options.prefix_extractor.get(),
718
+ /*for_flush=*/false);
721
719
  sv_->imm->AddIterators(read_options_, seqno_to_time_mapping,
722
720
  sv_->mutable_cf_options.prefix_extractor.get(),
723
721
  &imm_iters_, &arena_);
@@ -750,14 +748,13 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
750
748
  l0_iters_.push_back(cfd_->table_cache()->NewIterator(
751
749
  read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0,
752
750
  read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
753
- sv_->mutable_cf_options.prefix_extractor,
751
+ sv_->mutable_cf_options,
754
752
  /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
755
753
  TableReaderCaller::kUserIterator, /*arena=*/nullptr,
756
754
  /*skip_filters=*/false, /*level=*/-1,
757
755
  MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
758
756
  /*smallest_compaction_key=*/nullptr,
759
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
760
- sv_->mutable_cf_options.block_protection_bytes_per_key));
757
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_));
761
758
  }
762
759
  BuildLevelIterators(vstorage, sv_);
763
760
  current_ = nullptr;
@@ -788,7 +785,8 @@ void ForwardIterator::RenewIterators() {
788
785
  svnew->GetSeqnoToTimeMapping();
789
786
  mutable_iter_ =
790
787
  svnew->mem->NewIterator(read_options_, seqno_to_time_mapping, &arena_,
791
- svnew->mutable_cf_options.prefix_extractor.get());
788
+ svnew->mutable_cf_options.prefix_extractor.get(),
789
+ /*for_flush=*/false);
792
790
  svnew->imm->AddIterators(read_options_, seqno_to_time_mapping,
793
791
  svnew->mutable_cf_options.prefix_extractor.get(),
794
792
  &imm_iters_, &arena_);
@@ -838,14 +836,13 @@ void ForwardIterator::RenewIterators() {
838
836
  read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
839
837
  *l0_files_new[inew],
840
838
  read_options_.ignore_range_deletions ? nullptr : &range_del_agg,
841
- svnew->mutable_cf_options.prefix_extractor,
839
+ svnew->mutable_cf_options,
842
840
  /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
843
841
  TableReaderCaller::kUserIterator, /*arena=*/nullptr,
844
842
  /*skip_filters=*/false, /*level=*/-1,
845
843
  MaxFileSizeForL0MetaPin(svnew->mutable_cf_options),
846
844
  /*smallest_compaction_key=*/nullptr,
847
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
848
- svnew->mutable_cf_options.block_protection_bytes_per_key));
845
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_));
849
846
  }
850
847
 
851
848
  for (auto* f : l0_iters_) {
@@ -888,9 +885,8 @@ void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage,
888
885
  }
889
886
  } else {
890
887
  level_iters_.push_back(new ForwardLevelIterator(
891
- cfd_, read_options_, level_files,
892
- sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_,
893
- sv->mutable_cf_options.block_protection_bytes_per_key));
888
+ cfd_, read_options_, level_files, sv->mutable_cf_options,
889
+ allow_unprepared_value_));
894
890
  }
895
891
  }
896
892
  }
@@ -905,15 +901,13 @@ void ForwardIterator::ResetIncompleteIterators() {
905
901
  DeleteIterator(l0_iters_[i]);
906
902
  l0_iters_[i] = cfd_->table_cache()->NewIterator(
907
903
  read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
908
- *l0_files[i], /*range_del_agg=*/nullptr,
909
- sv_->mutable_cf_options.prefix_extractor,
904
+ *l0_files[i], /*range_del_agg=*/nullptr, sv_->mutable_cf_options,
910
905
  /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
911
906
  TableReaderCaller::kUserIterator, /*arena=*/nullptr,
912
907
  /*skip_filters=*/false, /*level=*/-1,
913
908
  MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
914
909
  /*smallest_compaction_key=*/nullptr,
915
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
916
- sv_->mutable_cf_options.block_protection_bytes_per_key);
910
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_);
917
911
  l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_);
918
912
  }
919
913
 
@@ -329,7 +329,7 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
329
329
  // TODO(yuzhangyu): User-defined timestamps doesn't support importing column
330
330
  // family. Pass in the correct `user_defined_timestamps_persisted` flag for
331
331
  // creating `TableReaderOptions` when the support is there.
332
- status = cfd_->ioptions()->table_factory->NewTableReader(
332
+ status = sv->mutable_cf_options.table_factory->NewTableReader(
333
333
  TableReaderOptions(
334
334
  *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
335
335
  env_options_, cfd_->internal_comparator(),
@@ -371,7 +371,8 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
371
371
  if (iter->Valid()) {
372
372
  file_to_import->smallest_internal_key.DecodeFrom(iter->key());
373
373
  Slice largest;
374
- if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) {
374
+ if (strcmp(sv->mutable_cf_options.table_factory->Name(), "PlainTable") ==
375
+ 0) {
375
376
  // PlainTable iterator does not support SeekToLast().
376
377
  largest = iter->key();
377
378
  for (; iter->Valid(); iter->Next()) {
@@ -951,6 +951,8 @@ TEST_F(ImportColumnFamilyTest, AssignEpochNumberToMultipleCF) {
951
951
  Options options = CurrentOptions();
952
952
  options.level_compaction_dynamic_level_bytes = true;
953
953
  options.max_background_jobs = 8;
954
+ // Always allow parallel compaction
955
+ options.soft_pending_compaction_bytes_limit = 10;
954
956
  env_->SetBackgroundThreads(2, Env::LOW);
955
957
  env_->SetBackgroundThreads(0, Env::BOTTOM);
956
958
  CreateAndReopenWithCF({"CF1", "CF2"}, options);
@@ -1301,7 +1301,7 @@ bool InternalStats::HandleNumEntriesActiveMemTable(uint64_t* value,
1301
1301
  DBImpl* /*db*/,
1302
1302
  Version* /*version*/) {
1303
1303
  // Current number of entires in the active memtable
1304
- *value = cfd_->mem()->num_entries();
1304
+ *value = cfd_->mem()->NumEntries();
1305
1305
  return true;
1306
1306
  }
1307
1307
 
@@ -1317,7 +1317,7 @@ bool InternalStats::HandleNumDeletesActiveMemTable(uint64_t* value,
1317
1317
  DBImpl* /*db*/,
1318
1318
  Version* /*version*/) {
1319
1319
  // Current number of entires in the active memtable
1320
- *value = cfd_->mem()->num_deletes();
1320
+ *value = cfd_->mem()->NumDeletion();
1321
1321
  return true;
1322
1322
  }
1323
1323
 
@@ -1334,11 +1334,11 @@ bool InternalStats::HandleEstimateNumKeys(uint64_t* value, DBImpl* /*db*/,
1334
1334
  // Estimate number of entries in the column family:
1335
1335
  // Use estimated entries in tables + total entries in memtables.
1336
1336
  const auto* vstorage = cfd_->current()->storage_info();
1337
- uint64_t estimate_keys = cfd_->mem()->num_entries() +
1337
+ uint64_t estimate_keys = cfd_->mem()->NumEntries() +
1338
1338
  cfd_->imm()->current()->GetTotalNumEntries() +
1339
1339
  vstorage->GetEstimatedActiveKeys();
1340
1340
  uint64_t estimate_deletes =
1341
- cfd_->mem()->num_deletes() + cfd_->imm()->current()->GetTotalNumDeletes();
1341
+ cfd_->mem()->NumDeletion() + cfd_->imm()->current()->GetTotalNumDeletes();
1342
1342
  *value = estimate_keys > estimate_deletes * 2
1343
1343
  ? estimate_keys - (estimate_deletes * 2)
1344
1344
  : 0;
@@ -1495,8 +1495,10 @@ bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/,
1495
1495
  }
1496
1496
 
1497
1497
  Cache* InternalStats::GetBlockCacheForStats() {
1498
- auto* table_factory = cfd_->ioptions()->table_factory.get();
1498
+ // NOTE: called in startup before GetCurrentMutableCFOptions() is ready
1499
+ auto* table_factory = cfd_->GetLatestMutableCFOptions()->table_factory.get();
1499
1500
  assert(table_factory != nullptr);
1501
+ // FIXME: need to a shared_ptr if/when block_cache is going to be mutable
1500
1502
  return table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
1501
1503
  }
1502
1504
 
@@ -2161,7 +2163,8 @@ class BlockCachePropertyAggregator : public IntPropertyAggregator {
2161
2163
  virtual ~BlockCachePropertyAggregator() override = default;
2162
2164
 
2163
2165
  void Add(ColumnFamilyData* cfd, uint64_t value) override {
2164
- auto* table_factory = cfd->ioptions()->table_factory.get();
2166
+ auto* table_factory =
2167
+ cfd->GetCurrentMutableCFOptions()->table_factory.get();
2165
2168
  assert(table_factory != nullptr);
2166
2169
  Cache* cache =
2167
2170
  table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());