@nxtedition/rocksdb 8.2.0 → 8.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/binding.cc +3 -3
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
  3. package/deps/rocksdb/rocksdb/Makefile +10 -5
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -345
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
  15. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
  17. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
  25. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
  26. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
  30. package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
  31. package/deps/rocksdb/rocksdb/db/builder.h +2 -2
  32. package/deps/rocksdb/rocksdb/db/c.cc +76 -5
  33. package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
  34. package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
  51. package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
  52. package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
  55. package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
  58. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
  71. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
  72. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
  73. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
  74. package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
  75. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
  76. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
  77. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
  78. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
  79. package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
  80. package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
  81. package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
  82. package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
  83. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
  84. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
  85. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
  86. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
  87. package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
  88. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
  90. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
  93. package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
  94. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
  95. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
  96. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
  97. package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
  98. package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
  99. package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
  100. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
  101. package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
  103. package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
  104. package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
  105. package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
  106. package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
  107. package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
  108. package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
  109. package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
  110. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
  111. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
  113. package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
  114. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
  115. package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
  116. package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
  117. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
  118. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
  119. package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
  120. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  121. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
  122. package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
  123. package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
  124. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
  125. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
  126. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  127. package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
  128. package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
  129. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
  130. package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
  131. package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
  144. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
  145. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
  146. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
  149. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
  150. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
  151. package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
  152. package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
  153. package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
  154. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
  155. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  156. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
  157. package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
  158. package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
  159. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
  160. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
  161. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
  162. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
  163. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
  164. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
  165. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
  166. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
  167. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
  168. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
  169. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
  170. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
  171. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
  173. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
  174. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
  175. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
  176. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
  177. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
  178. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
  184. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
  186. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
  188. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
  189. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
  190. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
  191. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
  192. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
  193. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  194. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
  195. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
  196. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
  197. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
  198. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
  199. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
  200. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
  201. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
  202. package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
  203. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  204. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
  205. package/deps/rocksdb/rocksdb/options/options.cc +12 -53
  206. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  207. package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
  208. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
  209. package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
  210. package/deps/rocksdb/rocksdb/port/lang.h +27 -0
  211. package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
  212. package/deps/rocksdb/rocksdb/src.mk +2 -0
  213. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
  214. package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
  215. package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
  216. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
  217. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
  218. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
  219. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
  220. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
  221. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
  222. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
  223. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
  224. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
  225. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
  226. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
  228. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
  229. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
  230. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
  232. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
  233. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
  234. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
  235. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
  236. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
  237. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
  238. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
  239. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
  240. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
  241. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
  242. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
  243. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
  244. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
  245. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
  247. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
  248. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
  249. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
  250. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
  251. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  252. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  253. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
  254. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
  255. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
  256. package/deps/rocksdb/rocksdb/table/format.cc +4 -4
  257. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  258. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
  259. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
  260. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  261. package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
  262. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
  264. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
  266. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
  267. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
  268. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
  269. package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
  270. package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
  271. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
  272. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
  275. package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
  277. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
  278. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
  279. package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
  280. package/deps/rocksdb/rocksdb/util/compression.h +1 -1
  281. package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
  282. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
  283. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
  284. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  285. package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
  286. package/deps/rocksdb/rocksdb/util/math.h +12 -7
  287. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
  288. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
  289. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
  290. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
  291. package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
  292. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
  293. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
  294. package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
  295. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
  296. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
  297. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
  298. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
  299. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  300. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
  301. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
  302. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
  303. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
  304. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
  305. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
  306. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
  307. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
  308. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
  309. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
  310. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
  311. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
  312. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
  313. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
  314. package/package.json +1 -1
  315. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  316. package/prebuilds/linux-x64/node.napi.node +0 -0
  317. /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
  318. /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
  319. /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
  320. /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
  321. /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
@@ -3918,6 +3918,94 @@ TEST_F(DBBasicTestWithTimestamp, IterSeekToLastWithIterateUpperbound) {
3918
3918
  ASSERT_FALSE(iter->Valid());
3919
3919
  ASSERT_OK(iter->status());
3920
3920
  }
3921
+
3922
+ TEST_F(DBBasicTestWithTimestamp, TimestampFilterTableReadOnGet) {
3923
+ Options options = CurrentOptions();
3924
+ options.env = env_;
3925
+ options.create_if_missing = true;
3926
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
3927
+ const size_t kTimestampSize = Timestamp(0, 0).size();
3928
+ TestComparator test_cmp(kTimestampSize);
3929
+ options.comparator = &test_cmp;
3930
+ BlockBasedTableOptions bbto;
3931
+ bbto.block_size = 100;
3932
+ options.table_factory.reset(NewBlockBasedTableFactory(bbto));
3933
+ DestroyAndReopen(options);
3934
+
3935
+ // Put
3936
+ // Create two SST files
3937
+ // file1: key => [1, 3], timestamp => [10, 20]
3938
+ // file2, key => [2, 4], timestamp => [30, 40]
3939
+ {
3940
+ WriteOptions write_opts;
3941
+ std::string write_ts = Timestamp(10, 0);
3942
+ ASSERT_OK(db_->Put(write_opts, Key1(1), write_ts, "value1"));
3943
+ write_ts = Timestamp(20, 0);
3944
+ ASSERT_OK(db_->Put(write_opts, Key1(3), write_ts, "value3"));
3945
+ ASSERT_OK(Flush());
3946
+
3947
+ write_ts = Timestamp(30, 0);
3948
+ ASSERT_OK(db_->Put(write_opts, Key1(2), write_ts, "value2"));
3949
+ write_ts = Timestamp(40, 0);
3950
+ ASSERT_OK(db_->Put(write_opts, Key1(4), write_ts, "value4"));
3951
+ ASSERT_OK(Flush());
3952
+ }
3953
+
3954
+ // Get with timestamp
3955
+ {
3956
+ auto prev_checked_events = options.statistics->getTickerCount(
3957
+ Tickers::TIMESTAMP_FILTER_TABLE_CHECKED);
3958
+ auto prev_filtered_events = options.statistics->getTickerCount(
3959
+ Tickers::TIMESTAMP_FILTER_TABLE_FILTERED);
3960
+
3961
+ // key=3 (ts=20) does not exist at timestamp=1
3962
+ std::string read_ts_str = Timestamp(1, 0);
3963
+ Slice read_ts_slice = Slice(read_ts_str);
3964
+ ReadOptions read_opts;
3965
+ read_opts.timestamp = &read_ts_slice;
3966
+ std::string value_from_get = "";
3967
+ std::string timestamp_from_get = "";
3968
+ auto status =
3969
+ db_->Get(read_opts, Key1(3), &value_from_get, &timestamp_from_get);
3970
+ ASSERT_TRUE(status.IsNotFound());
3971
+ ASSERT_EQ(value_from_get, std::string(""));
3972
+ ASSERT_EQ(timestamp_from_get, std::string(""));
3973
+
3974
+ // key=3 is in the key ranges for both files, so both files will be queried.
3975
+ // The table read was skipped because the timestamp is out of the table
3976
+ // range, i.e.., 1 < [10,20], [30,40].
3977
+ // The tickers increase by 2 due to 2 files.
3978
+ ASSERT_EQ(prev_checked_events + 2,
3979
+ options.statistics->getTickerCount(
3980
+ Tickers::TIMESTAMP_FILTER_TABLE_CHECKED));
3981
+ ASSERT_EQ(prev_filtered_events + 2,
3982
+ options.statistics->getTickerCount(
3983
+ Tickers::TIMESTAMP_FILTER_TABLE_FILTERED));
3984
+
3985
+ // key=3 (ts=20) exists at timestamp = 25
3986
+ read_ts_str = Timestamp(25, 0);
3987
+ read_ts_slice = Slice(read_ts_str);
3988
+ read_opts.timestamp = &read_ts_slice;
3989
+ ASSERT_OK(
3990
+ db_->Get(read_opts, Key1(3), &value_from_get, &timestamp_from_get));
3991
+ ASSERT_EQ("value3", value_from_get);
3992
+ ASSERT_EQ(Timestamp(20, 0), timestamp_from_get);
3993
+
3994
+ // file1 was not skipped, because the timestamp is in range, [10,20] < 25.
3995
+ // file2 was skipped, because the timestamp is not in range, 25 < [30,40].
3996
+ // So the checked ticker increase by 2 due to 2 files;
3997
+ // filtered ticker increase by 1 because file2 was skipped
3998
+ ASSERT_EQ(prev_checked_events + 4,
3999
+ options.statistics->getTickerCount(
4000
+ Tickers::TIMESTAMP_FILTER_TABLE_CHECKED));
4001
+ ASSERT_EQ(prev_filtered_events + 3,
4002
+ options.statistics->getTickerCount(
4003
+ Tickers::TIMESTAMP_FILTER_TABLE_FILTERED));
4004
+ }
4005
+
4006
+ Close();
4007
+ }
4008
+
3921
4009
  } // namespace ROCKSDB_NAMESPACE
3922
4010
 
3923
4011
  int main(int argc, char** argv) {
@@ -846,6 +846,73 @@ TEST_P(DBWriteBufferManagerTest, StopSwitchingMemTablesOnceFlushing) {
846
846
  delete shared_wbm_db;
847
847
  }
848
848
 
849
+ TEST_F(DBWriteBufferManagerTest, RuntimeChangeableAllowStall) {
850
+ constexpr int kBigValue = 10000;
851
+
852
+ Options options = CurrentOptions();
853
+ options.write_buffer_manager.reset(
854
+ new WriteBufferManager(1, nullptr /* cache */, true /* allow_stall */));
855
+ DestroyAndReopen(options);
856
+
857
+ // Pause flush thread so that
858
+ // (a) the only way to exist write stall below is to change the `allow_stall`
859
+ // (b) the write stall is "stable" without being interfered by flushes so that
860
+ // we can check it without flakiness
861
+ std::unique_ptr<test::SleepingBackgroundTask> sleeping_task(
862
+ new test::SleepingBackgroundTask());
863
+ env_->SetBackgroundThreads(1, Env::HIGH);
864
+ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
865
+ sleeping_task.get(), Env::Priority::HIGH);
866
+ sleeping_task->WaitUntilSleeping();
867
+
868
+ // Test 1: test setting `allow_stall` from true to false
869
+ //
870
+ // Assert existence of a write stall
871
+ WriteOptions wo_no_slowdown;
872
+ wo_no_slowdown.no_slowdown = true;
873
+ Status s = Put(Key(0), DummyString(kBigValue), wo_no_slowdown);
874
+ ASSERT_TRUE(s.IsIncomplete());
875
+ ASSERT_TRUE(s.ToString().find("Write stall") != std::string::npos);
876
+
877
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
878
+ {{"WBMStallInterface::BlockDB",
879
+ "DBWriteBufferManagerTest::RuntimeChangeableThreadSafeParameters::"
880
+ "ChangeParameter"}});
881
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
882
+
883
+ // Test `SetAllowStall()`
884
+ port::Thread thread1([&] { ASSERT_OK(Put(Key(0), DummyString(kBigValue))); });
885
+ port::Thread thread2([&] {
886
+ TEST_SYNC_POINT(
887
+ "DBWriteBufferManagerTest::RuntimeChangeableThreadSafeParameters::"
888
+ "ChangeParameter");
889
+ options.write_buffer_manager->SetAllowStall(false);
890
+ });
891
+
892
+ // Verify `allow_stall` is successfully set to false in thread2.
893
+ // Othwerwise, thread1's write will be stalled and this test will hang
894
+ // forever.
895
+ thread1.join();
896
+ thread2.join();
897
+
898
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
899
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
900
+
901
+ // Test 2: test setting `allow_stall` from false to true
902
+ //
903
+ // Assert no write stall
904
+ ASSERT_OK(Put(Key(0), DummyString(kBigValue), wo_no_slowdown));
905
+
906
+ // Test `SetAllowStall()`
907
+ options.write_buffer_manager->SetAllowStall(true);
908
+
909
+ // Verify `allow_stall` is successfully set to true.
910
+ // Otherwise the following write will not be stalled and therefore succeed.
911
+ s = Put(Key(0), DummyString(kBigValue), wo_no_slowdown);
912
+ ASSERT_TRUE(s.IsIncomplete());
913
+ ASSERT_TRUE(s.ToString().find("Write stall") != std::string::npos);
914
+ sleeping_task->WakeUp();
915
+ }
849
916
 
850
917
  INSTANTIATE_TEST_CASE_P(DBWriteBufferManagerTest, DBWriteBufferManagerTest,
851
918
  testing::Bool());
@@ -636,6 +636,11 @@ TEST_P(DBWriteTest, LockWALInEffect) {
636
636
  ASSERT_TRUE(dbfull()->WALBufferIsEmpty());
637
637
  ASSERT_OK(db_->UnlockWAL());
638
638
 
639
+ // The above `TEST_SwitchWAL()` triggered a flush. That flush needs to finish
640
+ // before we make the filesystem inactive, otherwise the flush might hit an
641
+ // unrecoverable error (e.g., failed MANIFEST update).
642
+ ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(nullptr));
643
+
639
644
  // Fail the WAL flush if applicable
640
645
  fault_fs->SetFilesystemActive(false);
641
646
  Status s = Put("key2", "value");
@@ -1578,7 +1578,7 @@ TEST_F(DBErrorHandlingFSTest, MultiDBCompactionError) {
1578
1578
  }
1579
1579
 
1580
1580
  for (auto i = 0; i < kNumDbInstances; ++i) {
1581
- Status s = static_cast<DBImpl*>(db[i])->TEST_WaitForCompact(true);
1581
+ Status s = static_cast<DBImpl*>(db[i])->TEST_WaitForCompact();
1582
1582
  ASSERT_EQ(s.severity(), Status::Severity::kSoftError);
1583
1583
  fault_fs[i]->SetFilesystemActive(true);
1584
1584
  }
@@ -1587,7 +1587,7 @@ TEST_F(DBErrorHandlingFSTest, MultiDBCompactionError) {
1587
1587
  for (auto i = 0; i < kNumDbInstances; ++i) {
1588
1588
  std::string prop;
1589
1589
  ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true);
1590
- ASSERT_OK(static_cast<DBImpl*>(db[i])->TEST_WaitForCompact(true));
1590
+ ASSERT_OK(static_cast<DBImpl*>(db[i])->TEST_WaitForCompact());
1591
1591
  EXPECT_TRUE(db[i]->GetProperty(
1592
1592
  "rocksdb.num-files-at-level" + std::to_string(0), &prop));
1593
1593
  EXPECT_EQ(atoi(prop.c_str()), 0);
@@ -1701,7 +1701,7 @@ TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) {
1701
1701
  }
1702
1702
 
1703
1703
  for (auto i = 0; i < kNumDbInstances; ++i) {
1704
- Status s = static_cast<DBImpl*>(db[i])->TEST_WaitForCompact(true);
1704
+ Status s = static_cast<DBImpl*>(db[i])->TEST_WaitForCompact();
1705
1705
  switch (i) {
1706
1706
  case 0:
1707
1707
  ASSERT_EQ(s.severity(), Status::Severity::kSoftError);
@@ -1723,7 +1723,7 @@ TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) {
1723
1723
  ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true);
1724
1724
  }
1725
1725
  if (i == 1) {
1726
- ASSERT_OK(static_cast<DBImpl*>(db[i])->TEST_WaitForCompact(true));
1726
+ ASSERT_OK(static_cast<DBImpl*>(db[i])->TEST_WaitForCompact());
1727
1727
  }
1728
1728
  EXPECT_TRUE(db[i]->GetProperty(
1729
1729
  "rocksdb.num-files-at-level" + std::to_string(0), &prop));
@@ -38,6 +38,8 @@ Status UpdateManifestForFilesState(
38
38
  const DBOptions& db_opts, const std::string& db_name,
39
39
  const std::vector<ColumnFamilyDescriptor>& column_families,
40
40
  const UpdateManifestForFilesStateOptions& opts) {
41
+ // TODO: plumb Env::IOActivity
42
+ const ReadOptions read_options;
41
43
  OfflineManifestWriter w(db_opts, db_name);
42
44
  Status s = w.Recover(column_families);
43
45
 
@@ -100,7 +102,7 @@ Status UpdateManifestForFilesState(
100
102
  lf->oldest_blob_file_number, lf->oldest_ancester_time,
101
103
  lf->file_creation_time, lf->epoch_number, lf->file_checksum,
102
104
  lf->file_checksum_func_name, lf->unique_id,
103
- lf->compensated_range_deletion_size);
105
+ lf->compensated_range_deletion_size, lf->tail_size);
104
106
  }
105
107
  }
106
108
  } else {
@@ -114,7 +116,7 @@ Status UpdateManifestForFilesState(
114
116
  std::unique_ptr<FSDirectory> db_dir;
115
117
  s = fs->NewDirectory(db_name, IOOptions(), &db_dir, nullptr);
116
118
  if (s.ok()) {
117
- s = w.LogAndApply(cfd, &edit, db_dir.get());
119
+ s = w.LogAndApply(read_options, cfd, &edit, db_dir.get());
118
120
  }
119
121
  if (s.ok()) {
120
122
  ++cfs_updated;
@@ -102,7 +102,8 @@ class ExternalSSTFileBasicTest
102
102
  // all point operators, even though sst_file_writer.DeleteRange
103
103
  // must be called before other sst_file_writer methods. This is
104
104
  // because point writes take precedence over range deletions
105
- // in the same ingested sst.
105
+ // in the same ingested sst. This precedence is part of
106
+ // `SstFileWriter::DeleteRange()`'s API contract.
106
107
  std::string start_key = Key(range_deletions[i].first);
107
108
  std::string end_key = Key(range_deletions[i].second);
108
109
  s = sst_file_writer.DeleteRange(start_key, end_key);
@@ -1418,6 +1419,7 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
1418
1419
  ASSERT_EQ(4, NumTableFilesAtLevel(0));
1419
1420
  ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
1420
1421
  ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1));
1422
+ VerifyDBFromMap(true_data);
1421
1423
  }
1422
1424
 
1423
1425
  TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) {
@@ -1462,6 +1464,89 @@ TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) {
1462
1464
  DestroyAndRecreateExternalSSTFilesDir();
1463
1465
  }
1464
1466
 
1467
+ TEST_F(ExternalSSTFileBasicTest, UnorderedRangeDeletions) {
1468
+ int kNumLevels = 7;
1469
+ Options options = CurrentOptions();
1470
+ options.disable_auto_compactions = true;
1471
+ options.num_levels = kNumLevels;
1472
+ Reopen(options);
1473
+
1474
+ std::map<std::string, std::string> true_data;
1475
+ int file_id = 1;
1476
+
1477
+ // prevent range deletions from being dropped due to becoming obsolete.
1478
+ const Snapshot* snapshot = db_->GetSnapshot();
1479
+
1480
+ // Range del [0, 50) in memtable
1481
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1482
+ Key(50)));
1483
+
1484
+ // Out of order range del overlaps memtable, so flush is required before file
1485
+ // is ingested into L0
1486
+ ASSERT_OK(GenerateAndAddExternalFile(
1487
+ options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue},
1488
+ {{65, 70}, {45, 50}}, file_id++, true /* write_global_seqno */,
1489
+ true /* verify_checksums_before_ingest */, &true_data));
1490
+ ASSERT_EQ(2, true_data.size());
1491
+ ASSERT_EQ(2, NumTableFilesAtLevel(0));
1492
+ ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 1));
1493
+ VerifyDBFromMap(true_data);
1494
+
1495
+ // Compact to L6
1496
+ MoveFilesToLevel(kNumLevels - 1);
1497
+ ASSERT_EQ(0, NumTableFilesAtLevel(0));
1498
+ ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1));
1499
+ VerifyDBFromMap(true_data);
1500
+
1501
+ // Ingest a file containing out of order range dels that cover nothing
1502
+ ASSERT_OK(GenerateAndAddExternalFile(
1503
+ options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue},
1504
+ {{160, 200}, {120, 180}}, file_id++, true /* write_global_seqno */,
1505
+ true /* verify_checksums_before_ingest */, &true_data));
1506
+ ASSERT_EQ(4, true_data.size());
1507
+ ASSERT_EQ(0, NumTableFilesAtLevel(0));
1508
+ ASSERT_EQ(2, NumTableFilesAtLevel(kNumLevels - 1));
1509
+ VerifyDBFromMap(true_data);
1510
+
1511
+ // Ingest a file containing out of order range dels that cover keys in L6
1512
+ ASSERT_OK(GenerateAndAddExternalFile(
1513
+ options, {}, {}, {{190, 200}, {170, 180}, {55, 65}}, file_id++,
1514
+ true /* write_global_seqno */, true /* verify_checksums_before_ingest */,
1515
+ &true_data));
1516
+ ASSERT_EQ(2, true_data.size());
1517
+ ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
1518
+ ASSERT_EQ(2, NumTableFilesAtLevel(kNumLevels - 1));
1519
+ VerifyDBFromMap(true_data);
1520
+
1521
+ db_->ReleaseSnapshot(snapshot);
1522
+ }
1523
+
1524
+ TEST_F(ExternalSSTFileBasicTest, RangeDeletionEndComesBeforeStart) {
1525
+ Options options = CurrentOptions();
1526
+ SstFileWriter sst_file_writer(EnvOptions(), options);
1527
+
1528
+ // "file.sst"
1529
+ // Verify attempt to delete 300 => 200 fails.
1530
+ // Then, verify attempt to delete 300 => 300 succeeds but writes nothing.
1531
+ // Afterwards, verify attempt to delete 300 => 400 works normally.
1532
+ std::string file = sst_files_dir_ + "file.sst";
1533
+ ASSERT_OK(sst_file_writer.Open(file));
1534
+ ASSERT_TRUE(
1535
+ sst_file_writer.DeleteRange(Key(300), Key(200)).IsInvalidArgument());
1536
+ ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(300)));
1537
+ ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400)));
1538
+ ExternalSstFileInfo file_info;
1539
+ Status s = sst_file_writer.Finish(&file_info);
1540
+ ASSERT_OK(s) << s.ToString();
1541
+ ASSERT_EQ(file_info.file_path, file);
1542
+ ASSERT_EQ(file_info.num_entries, 0);
1543
+ ASSERT_EQ(file_info.smallest_key, "");
1544
+ ASSERT_EQ(file_info.largest_key, "");
1545
+ ASSERT_EQ(file_info.num_range_del_entries, 1);
1546
+ ASSERT_EQ(file_info.smallest_range_del_key, Key(300));
1547
+ ASSERT_EQ(file_info.largest_range_del_key, Key(400));
1548
+ }
1549
+
1465
1550
  TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
1466
1551
  bool change_checksum_called = false;
1467
1552
  const auto& change_checksum = [&](void* arg) {
@@ -464,6 +464,16 @@ Status ExternalSstFileIngestionJob::Run() {
464
464
  current_time = oldest_ancester_time =
465
465
  static_cast<uint64_t>(temp_current_time);
466
466
  }
467
+ uint64_t tail_size = 0;
468
+ bool contain_no_data_blocks = f.table_properties.num_entries > 0 &&
469
+ (f.table_properties.num_entries ==
470
+ f.table_properties.num_range_deletions);
471
+ if (f.table_properties.tail_start_offset > 0 || contain_no_data_blocks) {
472
+ uint64_t file_size = f.fd.GetFileSize();
473
+ assert(f.table_properties.tail_start_offset <= file_size);
474
+ tail_size = file_size - f.table_properties.tail_start_offset;
475
+ }
476
+
467
477
  FileMetaData f_metadata(
468
478
  f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(),
469
479
  f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno,
@@ -472,7 +482,7 @@ Status ExternalSstFileIngestionJob::Run() {
472
482
  ingestion_options_.ingest_behind
473
483
  ? kReservedEpochNumberForFileIngestedBehind
474
484
  : cfd_->NewEpochNumber(),
475
- f.file_checksum, f.file_checksum_func_name, f.unique_id, 0);
485
+ f.file_checksum, f.file_checksum_func_name, f.unique_id, 0, tail_size);
476
486
  f_metadata.temperature = f.file_temperature;
477
487
  edit_.AddFile(f.picked_level, f_metadata);
478
488
  }
@@ -678,6 +688,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
678
688
  TableReaderOptions(
679
689
  *cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
680
690
  env_options_, cfd_->internal_comparator(),
691
+ sv->mutable_cf_options.block_protection_bytes_per_key,
681
692
  /*skip_filters*/ false, /*immortal*/ false,
682
693
  /*force_direct_prefetch*/ false, /*level*/ -1,
683
694
  /*block_cache_tracer*/ nullptr,
@@ -692,6 +703,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
692
703
  // If customized readahead size is needed, we can pass a user option
693
704
  // all the way to here. Right now we just rely on the default readahead
694
705
  // to keep things simple.
706
+ // TODO: plumb Env::IOActivity
695
707
  ReadOptions ro;
696
708
  ro.readahead_size = ingestion_options_.verify_checksums_readahead_size;
697
709
  status = table_reader->VerifyChecksum(
@@ -745,6 +757,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
745
757
  file_to_ingest->num_range_deletions = props->num_range_deletions;
746
758
 
747
759
  ParsedInternalKey key;
760
+ // TODO: plumb Env::IOActivity
748
761
  ReadOptions ro;
749
762
  std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
750
763
  ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
@@ -855,6 +868,7 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
855
868
 
856
869
  bool overlap_with_db = false;
857
870
  Arena arena;
871
+ // TODO: plumb Env::IOActivity
858
872
  ReadOptions ro;
859
873
  ro.total_order_seek = true;
860
874
  int target_level = 0;
@@ -1088,4 +1102,3 @@ Status ExternalSstFileIngestionJob::SyncIngestedFile(TWritableFile* file) {
1088
1102
  }
1089
1103
 
1090
1104
  } // namespace ROCKSDB_NAMESPACE
1091
-
@@ -1289,7 +1289,7 @@ TEST_F(ExternalSSTFileTest, IngestNonExistingFile) {
1289
1289
  ASSERT_OK(Flush());
1290
1290
 
1291
1291
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1292
- ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
1292
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
1293
1293
 
1294
1294
  // After full compaction, there should be only 1 file.
1295
1295
  std::vector<std::string> files;
@@ -2857,4 +2857,3 @@ int main(int argc, char** argv) {
2857
2857
  ::testing::InitGoogleTest(&argc, argv);
2858
2858
  return RUN_ALL_TESTS();
2859
2859
  }
2860
-
@@ -141,11 +141,12 @@ FlushJob::FlushJob(
141
141
  FlushJob::~FlushJob() { ThreadStatusUtil::ResetThreadStatus(); }
142
142
 
143
143
  void FlushJob::ReportStartedFlush() {
144
- ThreadStatusUtil::SetColumnFamily(cfd_, cfd_->ioptions()->env,
145
- db_options_.enable_thread_tracking);
144
+ ThreadStatusUtil::SetEnableTracking(db_options_.enable_thread_tracking);
145
+ ThreadStatusUtil::SetColumnFamily(cfd_);
146
146
  ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_FLUSH);
147
147
  ThreadStatusUtil::SetThreadOperationProperty(ThreadStatus::COMPACTION_JOB_ID,
148
148
  job_context_->job_id);
149
+
149
150
  IOSTATS_RESET(bytes_written);
150
151
  }
151
152
 
@@ -379,6 +380,7 @@ Status FlushJob::MemPurge() {
379
380
  // Create two iterators, one for the memtable data (contains
380
381
  // info from puts + deletes), and one for the memtable
381
382
  // Range Tombstones (from DeleteRanges).
383
+ // TODO: plumb Env::IOActivity
382
384
  ReadOptions ro;
383
385
  ro.total_order_seek = true;
384
386
  Arena arena;
@@ -669,6 +671,7 @@ bool FlushJob::MemPurgeDecider(double threshold) {
669
671
  // Cochran formula for determining sample size.
670
672
  // 95% confidence interval, 7% precision.
671
673
  // n0 = (1.96*1.96)*0.25/(0.07*0.07) = 196.0
674
+ // TODO: plumb Env::IOActivity
672
675
  double n0 = 196.0;
673
676
  ReadOptions ro;
674
677
  ro.total_order_seek = true;
@@ -841,6 +844,7 @@ Status FlushJob::WriteLevel0Table() {
841
844
  range_del_iters;
842
845
  ReadOptions ro;
843
846
  ro.total_order_seek = true;
847
+ ro.io_activity = Env::IOActivity::kFlush;
844
848
  Arena arena;
845
849
  uint64_t total_num_entries = 0, total_num_deletes = 0;
846
850
  uint64_t total_data_size = 0;
@@ -930,17 +934,19 @@ Status FlushJob::WriteLevel0Table() {
930
934
  meta_.fd.GetNumber());
931
935
  const SequenceNumber job_snapshot_seq =
932
936
  job_context_->GetJobSnapshotSequence();
933
- s = BuildTable(
934
- dbname_, versions_, db_options_, tboptions, file_options_,
935
- cfd_->table_cache(), iter.get(), std::move(range_del_iters), &meta_,
936
- &blob_file_additions, existing_snapshots_,
937
- earliest_write_conflict_snapshot_, job_snapshot_seq,
938
- snapshot_checker_, mutable_cf_options_.paranoid_file_checks,
939
- cfd_->internal_stats(), &io_s, io_tracer_,
940
- BlobFileCreationReason::kFlush, seqno_to_time_mapping_, event_logger_,
941
- job_context_->job_id, io_priority, &table_properties_, write_hint,
942
- full_history_ts_low, blob_callback_, base_, &num_input_entries,
943
- &memtable_payload_bytes, &memtable_garbage_bytes);
937
+ const ReadOptions read_options(Env::IOActivity::kFlush);
938
+ s = BuildTable(dbname_, versions_, db_options_, tboptions, file_options_,
939
+ read_options, cfd_->table_cache(), iter.get(),
940
+ std::move(range_del_iters), &meta_, &blob_file_additions,
941
+ existing_snapshots_, earliest_write_conflict_snapshot_,
942
+ job_snapshot_seq, snapshot_checker_,
943
+ mutable_cf_options_.paranoid_file_checks,
944
+ cfd_->internal_stats(), &io_s, io_tracer_,
945
+ BlobFileCreationReason::kFlush, seqno_to_time_mapping_,
946
+ event_logger_, job_context_->job_id, io_priority,
947
+ &table_properties_, write_hint, full_history_ts_low,
948
+ blob_callback_, base_, &num_input_entries,
949
+ &memtable_payload_bytes, &memtable_garbage_bytes);
944
950
  // TODO: Cleanup io_status in BuildTable and table builders
945
951
  assert(!s.ok() || io_s.ok());
946
952
  io_s.PermitUncheckedError();
@@ -1001,7 +1007,8 @@ Status FlushJob::WriteLevel0Table() {
1001
1007
  meta_.oldest_blob_file_number, meta_.oldest_ancester_time,
1002
1008
  meta_.file_creation_time, meta_.epoch_number,
1003
1009
  meta_.file_checksum, meta_.file_checksum_func_name,
1004
- meta_.unique_id, meta_.compensated_range_deletion_size);
1010
+ meta_.unique_id, meta_.compensated_range_deletion_size,
1011
+ meta_.tail_size);
1005
1012
  edit_->SetBlobFileAdditions(std::move(blob_file_additions));
1006
1013
  }
1007
1014
  // Piggyback FlushJobInfo on the first first flushed memtable.
@@ -36,7 +36,7 @@ class ForwardLevelIterator : public InternalIterator {
36
36
  const ColumnFamilyData* const cfd, const ReadOptions& read_options,
37
37
  const std::vector<FileMetaData*>& files,
38
38
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
39
- bool allow_unprepared_value)
39
+ bool allow_unprepared_value, uint8_t block_protection_bytes_per_key)
40
40
  : cfd_(cfd),
41
41
  read_options_(read_options),
42
42
  files_(files),
@@ -45,7 +45,8 @@ class ForwardLevelIterator : public InternalIterator {
45
45
  file_iter_(nullptr),
46
46
  pinned_iters_mgr_(nullptr),
47
47
  prefix_extractor_(prefix_extractor),
48
- allow_unprepared_value_(allow_unprepared_value) {
48
+ allow_unprepared_value_(allow_unprepared_value),
49
+ block_protection_bytes_per_key_(block_protection_bytes_per_key) {
49
50
  status_.PermitUncheckedError(); // Allow uninitialized status through
50
51
  }
51
52
 
@@ -87,7 +88,8 @@ class ForwardLevelIterator : public InternalIterator {
87
88
  /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1,
88
89
  /*max_file_size_for_l0_meta_pin=*/0,
89
90
  /*smallest_compaction_key=*/nullptr,
90
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_);
91
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
92
+ block_protection_bytes_per_key_);
91
93
  file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
92
94
  valid_ = false;
93
95
  if (!range_del_agg.IsEmpty()) {
@@ -211,6 +213,7 @@ class ForwardLevelIterator : public InternalIterator {
211
213
  // Kept alive by ForwardIterator::sv_->mutable_cf_options
212
214
  const std::shared_ptr<const SliceTransform>& prefix_extractor_;
213
215
  const bool allow_unprepared_value_;
216
+ const uint8_t block_protection_bytes_per_key_;
214
217
  };
215
218
 
216
219
  ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
@@ -738,7 +741,8 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
738
741
  /*skip_filters=*/false, /*level=*/-1,
739
742
  MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
740
743
  /*smallest_compaction_key=*/nullptr,
741
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_));
744
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
745
+ sv_->mutable_cf_options.block_protection_bytes_per_key));
742
746
  }
743
747
  BuildLevelIterators(vstorage, sv_);
744
748
  current_ = nullptr;
@@ -819,7 +823,8 @@ void ForwardIterator::RenewIterators() {
819
823
  /*skip_filters=*/false, /*level=*/-1,
820
824
  MaxFileSizeForL0MetaPin(svnew->mutable_cf_options),
821
825
  /*smallest_compaction_key=*/nullptr,
822
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_));
826
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
827
+ svnew->mutable_cf_options.block_protection_bytes_per_key));
823
828
  }
824
829
 
825
830
  for (auto* f : l0_iters_) {
@@ -863,7 +868,8 @@ void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage,
863
868
  } else {
864
869
  level_iters_.push_back(new ForwardLevelIterator(
865
870
  cfd_, read_options_, level_files,
866
- sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_));
871
+ sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_,
872
+ sv->mutable_cf_options.block_protection_bytes_per_key));
867
873
  }
868
874
  }
869
875
  }
@@ -885,7 +891,8 @@ void ForwardIterator::ResetIncompleteIterators() {
885
891
  /*skip_filters=*/false, /*level=*/-1,
886
892
  MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
887
893
  /*smallest_compaction_key=*/nullptr,
888
- /*largest_compaction_key=*/nullptr, allow_unprepared_value_);
894
+ /*largest_compaction_key=*/nullptr, allow_unprepared_value_,
895
+ sv_->mutable_cf_options.block_protection_bytes_per_key);
889
896
  l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_);
890
897
  }
891
898