@nxtedition/rocksdb 5.2.21 → 5.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (909) hide show
  1. package/binding.cc +216 -252
  2. package/binding.gyp +78 -72
  3. package/deps/rocksdb/build_version.cc +70 -4
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
  5. package/deps/rocksdb/rocksdb/Makefile +459 -469
  6. package/deps/rocksdb/rocksdb/README.md +4 -4
  7. package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
  10. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
  11. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
  12. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
  13. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
  14. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
  15. package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
  16. package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
  20. package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
  22. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
  23. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
  24. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
  25. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
  26. package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
  27. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
  28. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
  29. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
  30. package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
  31. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
  32. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
  33. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
  34. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
  35. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
  36. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
  37. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
  38. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
  39. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
  51. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
  56. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
  57. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
  60. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
  61. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
  62. package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
  63. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
  64. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
  65. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
  67. package/deps/rocksdb/rocksdb/db/builder.h +16 -37
  68. package/deps/rocksdb/rocksdb/db/c.cc +413 -208
  69. package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
  71. package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
  74. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
  75. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
  95. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
  96. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
  97. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
  98. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
  99. package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
  100. package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
  101. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
  102. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
  103. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
  104. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
  105. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
  106. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
  107. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
  108. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
  110. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
  111. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
  112. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
  122. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
  123. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
  124. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
  125. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
  126. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
  127. package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
  128. package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
  129. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
  130. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
  131. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
  132. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
  133. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
  134. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
  135. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
  136. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
  137. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
  138. package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
  139. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
  140. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
  141. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
  142. package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
  143. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
  144. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
  145. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
  146. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
  147. package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
  148. package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
  149. package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
  150. package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
  151. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
  152. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
  153. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
  154. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
  155. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
  156. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
  157. package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
  158. package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
  159. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
  160. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
  161. package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
  162. package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
  163. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
  164. package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
  165. package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
  166. package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
  167. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
  168. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
  169. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
  170. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
  171. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
  172. package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
  174. package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
  175. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
  176. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
  178. package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
  179. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
  180. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
  182. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
  183. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
  184. package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
  185. package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
  186. package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
  187. package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
  188. package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
  189. package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
  190. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
  191. package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
  192. package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
  193. package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
  194. package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
  195. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
  196. package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
  197. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
  198. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
  199. package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
  200. package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
  201. package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
  202. package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
  203. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
  204. package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
  205. package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
  206. package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
  207. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
  210. package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
  212. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
  213. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
  216. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
  217. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
  218. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
  219. package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
  220. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
  223. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
  224. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
  226. package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
  227. package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
  228. package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
  229. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
  230. package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
  231. package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
  232. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
  233. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
  234. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
  235. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
  236. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
  237. package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
  238. package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
  239. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
  240. package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
  241. package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
  242. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
  243. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
  244. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
  245. package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
  246. package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
  247. package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
  248. package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
  249. package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
  250. package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
  251. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
  252. package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
  253. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
  254. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
  255. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
  256. package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
  257. package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
  258. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
  259. package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
  260. package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
  261. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
  262. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
  263. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
  264. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
  265. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
  266. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
  267. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
  268. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
  269. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
  270. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
  271. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
  273. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
  274. package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
  280. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
  285. package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
  286. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
  287. package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
  288. package/deps/rocksdb/rocksdb/env/env.cc +632 -42
  289. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
  290. package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
  291. package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
  292. package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
  293. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
  294. package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
  295. package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
  296. package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
  297. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
  298. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
  299. package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
  300. package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
  301. package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
  302. package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
  303. package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
  304. package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
  305. package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
  306. package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
  307. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
  308. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
  309. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
  310. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
  313. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
  315. package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
  316. package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
  317. package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
  318. package/deps/rocksdb/rocksdb/file/filename.h +13 -8
  319. package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
  320. package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
  321. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
  324. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
  325. package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
  326. package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
  327. package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
  334. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
  335. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
  336. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
  342. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
  343. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
  344. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
  345. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
  346. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
  348. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
  349. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
  350. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
  352. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
  353. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
  354. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
  355. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
  357. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
  358. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
  359. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
  360. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
  361. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
  362. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
  363. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
  364. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
  365. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
  366. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
  367. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
  368. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
  371. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
  372. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
  373. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
  375. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
  376. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
  378. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
  380. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
  381. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
  382. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
  383. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
  386. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
  387. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
  389. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
  407. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
  408. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
  409. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
  410. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
  412. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
  413. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
  414. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
  415. package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
  416. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
  417. package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
  418. package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
  419. package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
  420. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
  421. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
  422. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
  423. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
  424. package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
  425. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
  426. package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
  427. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
  428. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
  429. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
  430. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
  431. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
  432. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
  433. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
  434. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
  435. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
  436. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
  437. package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
  438. package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
  439. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
  440. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
  441. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
  442. package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
  443. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
  444. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
  445. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
  446. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
  447. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
  448. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
  449. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
  450. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
  451. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
  452. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
  453. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
  454. package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
  455. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
  456. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
  457. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
  458. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
  459. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
  460. package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
  461. package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
  462. package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
  463. package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
  464. package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
  465. package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
  466. package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
  467. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
  468. package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
  469. package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
  470. package/deps/rocksdb/rocksdb/options/options.cc +49 -17
  471. package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
  472. package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
  473. package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
  474. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
  475. package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
  476. package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
  477. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
  478. package/deps/rocksdb/rocksdb/port/lang.h +52 -0
  479. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  480. package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
  481. package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
  482. package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
  483. package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
  484. package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
  485. package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
  486. package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
  487. package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
  488. package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
  489. package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
  490. package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
  491. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
  492. package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
  493. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
  494. package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
  495. package/deps/rocksdb/rocksdb/python.mk +9 -0
  496. package/deps/rocksdb/rocksdb/src.mk +82 -34
  497. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
  498. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
  499. package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
  500. package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
  501. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
  502. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
  503. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
  504. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
  505. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
  506. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
  507. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
  508. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
  509. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
  510. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
  511. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
  512. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
  513. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
  514. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
  515. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
  516. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
  517. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
  518. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
  519. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
  520. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
  521. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
  522. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
  523. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
  524. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
  525. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
  526. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
  527. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
  528. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
  529. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
  530. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
  531. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
  532. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  533. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
  534. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
  535. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
  536. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
  537. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
  538. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
  539. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
  540. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
  541. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
  542. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
  543. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
  544. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
  545. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
  546. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
  547. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
  548. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
  549. package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
  550. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
  551. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
  552. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
  553. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
  554. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
  555. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
  556. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
  557. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
  558. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
  559. package/deps/rocksdb/rocksdb/table/format.cc +258 -104
  560. package/deps/rocksdb/rocksdb/table/format.h +120 -109
  561. package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
  562. package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
  563. package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
  564. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  565. package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
  566. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
  567. package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
  568. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
  569. package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
  570. package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
  571. package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
  572. package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
  573. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
  574. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
  575. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
  576. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
  577. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
  578. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
  579. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
  580. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
  581. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
  582. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
  583. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
  584. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
  585. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
  586. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
  587. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
  588. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
  589. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
  590. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
  591. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
  592. package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
  593. package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
  594. package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
  595. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
  596. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
  597. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
  598. package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
  599. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
  600. package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
  601. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
  602. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
  603. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
  604. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
  605. package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
  606. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
  607. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
  608. package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
  609. package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
  610. package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
  611. package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
  612. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
  613. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
  614. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
  615. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
  616. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
  617. package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
  618. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
  619. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
  620. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
  621. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
  622. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
  623. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
  624. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
  625. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
  626. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
  627. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
  628. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
  629. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
  630. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
  631. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
  632. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
  633. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
  634. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
  635. package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
  636. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
  637. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
  638. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
  639. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
  640. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
  641. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
  642. package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
  643. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
  644. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
  645. package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
  646. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
  647. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
  648. package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
  649. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
  650. package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
  651. package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
  652. package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
  653. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
  654. package/deps/rocksdb/rocksdb/util/channel.h +2 -0
  655. package/deps/rocksdb/rocksdb/util/coding.h +1 -33
  656. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
  657. package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
  658. package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
  659. package/deps/rocksdb/rocksdb/util/compression.h +212 -7
  660. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
  661. package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
  662. package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
  663. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
  664. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
  665. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
  666. package/deps/rocksdb/rocksdb/util/defer.h +30 -1
  667. package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
  668. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
  669. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
  670. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
  671. package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
  672. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
  673. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
  674. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
  675. package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
  676. package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
  677. package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
  678. package/deps/rocksdb/rocksdb/util/hash.h +31 -1
  679. package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
  680. package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
  681. package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
  682. package/deps/rocksdb/rocksdb/util/heap.h +6 -1
  683. package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
  684. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
  685. package/deps/rocksdb/rocksdb/util/math.h +74 -7
  686. package/deps/rocksdb/rocksdb/util/math128.h +13 -1
  687. package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
  688. package/deps/rocksdb/rocksdb/util/random.cc +9 -0
  689. package/deps/rocksdb/rocksdb/util/random.h +6 -0
  690. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
  691. package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
  692. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
  693. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
  694. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
  695. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
  696. package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
  697. package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
  698. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
  699. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
  700. package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
  701. package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
  702. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
  703. package/deps/rocksdb/rocksdb/util/status.cc +32 -29
  704. package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
  705. package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
  706. package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
  707. package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
  708. package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
  709. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
  710. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
  711. package/deps/rocksdb/rocksdb/util/timer.h +55 -46
  712. package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
  713. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
  714. package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
  715. package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
  716. package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
  717. package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
  718. package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
  719. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
  720. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
  721. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
  722. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
  723. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
  724. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
  725. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
  726. package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
  727. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
  728. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
  729. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
  730. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
  731. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
  732. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
  733. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
  734. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
  735. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
  736. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
  737. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
  738. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
  739. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
  740. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
  741. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
  742. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
  743. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
  744. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
  745. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
  746. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
  747. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
  748. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
  749. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
  750. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
  751. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
  752. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
  753. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
  754. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
  755. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
  756. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
  757. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
  758. package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
  759. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
  760. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
  761. package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
  762. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
  763. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
  764. package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
  765. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
  766. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
  767. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
  768. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
  769. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
  770. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
  771. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
  772. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
  773. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
  774. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
  775. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
  776. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
  777. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
  778. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
  779. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
  780. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
  781. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
  782. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
  783. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
  784. package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
  785. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
  786. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
  787. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
  788. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
  789. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
  790. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
  791. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
  792. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
  793. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
  794. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
  795. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
  796. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
  797. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
  798. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
  799. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  800. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
  801. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
  802. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
  803. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
  804. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
  805. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
  806. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
  807. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
  808. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
  809. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
  810. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
  811. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
  812. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
  813. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  814. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
  815. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
  816. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
  817. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
  818. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  819. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
  820. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
  821. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
  822. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
  823. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
  824. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
  825. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
  826. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
  827. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
  828. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
  829. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
  830. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
  831. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
  832. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
  833. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
  834. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
  835. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
  836. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
  837. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
  838. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
  839. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
  840. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
  841. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
  842. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
  843. package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
  844. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
  845. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
  846. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
  847. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
  848. package/deps/rocksdb/rocksdb.gyp +425 -446
  849. package/package.json +8 -8
  850. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  851. package/prebuilds/darwin-x86/node.napi.node +0 -0
  852. package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
  853. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
  854. package/deps/rocksdb/rocksdb/hdfs/README +0 -23
  855. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
  856. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
  857. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
  858. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
  859. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
  860. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
  861. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
  862. package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
  863. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
  864. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
  865. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
  866. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
  867. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
  868. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
  869. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
  870. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
  871. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
  872. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
  873. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
  874. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
  875. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
  876. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
  877. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
  878. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
  879. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
  880. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
  881. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
  882. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
  883. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
  884. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
  885. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
  886. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
  887. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
  888. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
  889. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
  890. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
  891. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
  892. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
  893. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
  894. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
  895. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
  896. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
  897. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
  898. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
  899. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
  900. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
  901. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
  902. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
  903. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
  904. package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
  905. package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
  906. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
  907. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
  908. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
  909. package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
@@ -53,6 +53,7 @@
53
53
  #include "db/version_set.h"
54
54
  #include "db/write_batch_internal.h"
55
55
  #include "db/write_callback.h"
56
+ #include "env/unique_id_gen.h"
56
57
  #include "file/file_util.h"
57
58
  #include "file/filename.h"
58
59
  #include "file/random_access_file_reader.h"
@@ -60,9 +61,8 @@
60
61
  #include "logging/auto_roll_logger.h"
61
62
  #include "logging/log_buffer.h"
62
63
  #include "logging/logging.h"
63
- #include "memtable/hash_linklist_rep.h"
64
- #include "memtable/hash_skiplist_rep.h"
65
64
  #include "monitoring/in_memory_stats_history.h"
65
+ #include "monitoring/instrumented_mutex.h"
66
66
  #include "monitoring/iostats_context_imp.h"
67
67
  #include "monitoring/perf_context_imp.h"
68
68
  #include "monitoring/persistent_stats_history.h"
@@ -82,6 +82,7 @@
82
82
  #include "rocksdb/stats_history.h"
83
83
  #include "rocksdb/status.h"
84
84
  #include "rocksdb/table.h"
85
+ #include "rocksdb/version.h"
85
86
  #include "rocksdb/write_buffer_manager.h"
86
87
  #include "table/block_based/block.h"
87
88
  #include "table/block_based/block_based_table_factory.h"
@@ -91,16 +92,20 @@
91
92
  #include "table/sst_file_dumper.h"
92
93
  #include "table/table_builder.h"
93
94
  #include "table/two_level_iterator.h"
95
+ #include "table/unique_id_impl.h"
94
96
  #include "test_util/sync_point.h"
97
+ #include "trace_replay/trace_replay.h"
95
98
  #include "util/autovector.h"
96
- #include "util/build_version.h"
97
99
  #include "util/cast_util.h"
98
100
  #include "util/coding.h"
99
101
  #include "util/compression.h"
100
102
  #include "util/crc32c.h"
103
+ #include "util/defer.h"
104
+ #include "util/hash_containers.h"
101
105
  #include "util/mutexlock.h"
102
106
  #include "util/stop_watch.h"
103
107
  #include "util/string_util.h"
108
+ #include "utilities/trace/replayer_impl.h"
104
109
 
105
110
  namespace ROCKSDB_NAMESPACE {
106
111
 
@@ -115,18 +120,16 @@ CompressionType GetCompressionFlush(
115
120
  // Compressing memtable flushes might not help unless the sequential load
116
121
  // optimization is used for leveled compaction. Otherwise the CPU and
117
122
  // latency overhead is not offset by saving much space.
118
- if (ioptions.compaction_style == kCompactionStyleUniversal) {
119
- if (mutable_cf_options.compaction_options_universal
120
- .compression_size_percent < 0) {
121
- return mutable_cf_options.compression;
122
- } else {
123
- return kNoCompression;
124
- }
125
- } else if (!ioptions.compression_per_level.empty()) {
126
- // For leveled compress when min_level_to_compress != 0.
127
- return ioptions.compression_per_level[0];
128
- } else {
123
+ if (ioptions.compaction_style == kCompactionStyleUniversal &&
124
+ mutable_cf_options.compaction_options_universal
125
+ .compression_size_percent >= 0) {
126
+ return kNoCompression;
127
+ }
128
+ if (mutable_cf_options.compression_per_level.empty()) {
129
129
  return mutable_cf_options.compression;
130
+ } else {
131
+ // For leveled compress when min_level_to_compress != 0.
132
+ return mutable_cf_options.compression_per_level[0];
130
133
  }
131
134
  }
132
135
 
@@ -146,27 +149,31 @@ void DumpSupportInfo(Logger* logger) {
146
149
  } // namespace
147
150
 
148
151
  DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
149
- const bool seq_per_batch, const bool batch_per_txn)
152
+ const bool seq_per_batch, const bool batch_per_txn,
153
+ bool read_only)
150
154
  : dbname_(dbname),
151
155
  own_info_log_(options.info_log == nullptr),
152
- initial_db_options_(SanitizeOptions(dbname, options)),
156
+ initial_db_options_(SanitizeOptions(dbname, options, read_only)),
153
157
  env_(initial_db_options_.env),
154
158
  io_tracer_(std::make_shared<IOTracer>()),
155
159
  immutable_db_options_(initial_db_options_),
156
160
  fs_(immutable_db_options_.fs, io_tracer_),
157
161
  mutable_db_options_(initial_db_options_),
158
- stats_(immutable_db_options_.statistics.get()),
159
- mutex_(stats_, env_, DB_MUTEX_WAIT_MICROS,
162
+ stats_(immutable_db_options_.stats),
163
+ mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS,
160
164
  immutable_db_options_.use_adaptive_mutex),
161
165
  default_cf_handle_(nullptr),
166
+ error_handler_(this, immutable_db_options_, &mutex_),
167
+ event_logger_(immutable_db_options_.info_log.get()),
162
168
  max_total_in_memory_state_(0),
163
169
  file_options_(BuildDBOptions(immutable_db_options_, mutable_db_options_)),
164
170
  file_options_for_compaction_(fs_->OptimizeForCompactionTableWrite(
165
171
  file_options_, immutable_db_options_)),
166
172
  seq_per_batch_(seq_per_batch),
167
173
  batch_per_txn_(batch_per_txn),
168
- db_lock_(nullptr),
174
+ next_job_id_(1),
169
175
  shutting_down_(false),
176
+ db_lock_(nullptr),
170
177
  manual_compaction_paused_(false),
171
178
  bg_cv_(&mutex_),
172
179
  logfile_number_(0),
@@ -191,9 +198,8 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
191
198
  bg_purge_scheduled_(0),
192
199
  disable_delete_obsolete_files_(0),
193
200
  pending_purge_obsolete_files_(0),
194
- delete_obsolete_files_last_run_(env_->NowMicros()),
201
+ delete_obsolete_files_last_run_(immutable_db_options_.clock->NowMicros()),
195
202
  last_stats_dump_time_microsec_(0),
196
- next_job_id_(1),
197
203
  has_unpersisted_data_(false),
198
204
  unable_to_release_oldest_log_(false),
199
205
  num_running_ingest_file_(0),
@@ -201,7 +207,6 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
201
207
  wal_manager_(immutable_db_options_, file_options_, io_tracer_,
202
208
  seq_per_batch),
203
209
  #endif // ROCKSDB_LITE
204
- event_logger_(immutable_db_options_.info_log.get()),
205
210
  bg_work_paused_(0),
206
211
  bg_compaction_paused_(0),
207
212
  refitting_level_(false),
@@ -228,10 +233,11 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
228
233
  use_custom_gc_(seq_per_batch),
229
234
  shutdown_initiated_(false),
230
235
  own_sfm_(options.sst_file_manager == nullptr),
231
- preserve_deletes_(options.preserve_deletes),
232
236
  closed_(false),
233
- error_handler_(this, immutable_db_options_, &mutex_),
234
- atomic_flush_install_cv_(&mutex_) {
237
+ atomic_flush_install_cv_(&mutex_),
238
+ blob_callback_(immutable_db_options_.sst_file_manager.get(), &mutex_,
239
+ &error_handler_, &event_logger_,
240
+ immutable_db_options_.listeners, dbname_) {
235
241
  // !batch_per_trx_ implies seq_per_batch_ because it is only unset for
236
242
  // WriteUnprepared, which should use seq_per_batch_.
237
243
  assert(batch_per_txn_ || seq_per_batch_);
@@ -248,25 +254,25 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
248
254
  co.num_shard_bits = immutable_db_options_.table_cache_numshardbits;
249
255
  co.metadata_charge_policy = kDontChargeCacheMetadata;
250
256
  table_cache_ = NewLRUCache(co);
257
+ SetDbSessionId();
258
+ assert(!db_session_id_.empty());
251
259
 
252
260
  versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_,
253
261
  table_cache_.get(), write_buffer_manager_,
254
262
  &write_controller_, &block_cache_tracer_,
255
- io_tracer_));
263
+ io_tracer_, db_session_id_));
256
264
  column_family_memtables_.reset(
257
265
  new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet()));
258
266
 
259
267
  DumpRocksDBBuildVersion(immutable_db_options_.info_log.get());
260
- SetDbSessionId();
261
268
  DumpDBFileSummary(immutable_db_options_, dbname_, db_session_id_);
262
269
  immutable_db_options_.Dump(immutable_db_options_.info_log.get());
263
270
  mutable_db_options_.Dump(immutable_db_options_.info_log.get());
264
271
  DumpSupportInfo(immutable_db_options_.info_log.get());
265
272
 
266
- // always open the DB with 0 here, which means if preserve_deletes_==true
267
- // we won't drop any deletion markers until SetPreserveDeletesSequenceNumber()
268
- // is called by client and this seqnum is advanced.
269
- preserve_deletes_seqnum_.store(0);
273
+ if (write_buffer_manager_) {
274
+ wbm_stall_.reset(new WBMStallInterface());
275
+ }
270
276
  }
271
277
 
272
278
  Status DBImpl::Resume() {
@@ -370,15 +376,12 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
370
376
  s = AtomicFlushMemTables(cfds, flush_opts, context.flush_reason);
371
377
  mutex_.Lock();
372
378
  } else {
373
- for (auto cfd : *versions_->GetColumnFamilySet()) {
379
+ for (auto cfd : versions_->GetRefedColumnFamilySet()) {
374
380
  if (cfd->IsDropped()) {
375
381
  continue;
376
382
  }
377
- cfd->Ref();
378
- mutex_.Unlock();
383
+ InstrumentedMutexUnlock u(&mutex_);
379
384
  s = FlushMemTable(cfd, flush_opts, context.flush_reason);
380
- mutex_.Lock();
381
- cfd->UnrefAndTryDelete();
382
385
  if (!s.ok()) {
383
386
  break;
384
387
  }
@@ -393,14 +396,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
393
396
 
394
397
  JobContext job_context(0);
395
398
  FindObsoleteFiles(&job_context, true);
396
- if (s.ok()) {
397
- s = error_handler_.ClearBGError();
398
- } else {
399
- // NOTE: this is needed to pass ASSERT_STATUS_CHECKED
400
- // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test.
401
- // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952
402
- error_handler_.GetRecoveryError().PermitUncheckedError();
403
- }
404
399
  mutex_.Unlock();
405
400
 
406
401
  job_context.manifest_file_number = 1;
@@ -421,11 +416,31 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
421
416
  immutable_db_options_.info_log,
422
417
  "DB resume requested but could not enable file deletions [%s]",
423
418
  s.ToString().c_str());
419
+ assert(false);
424
420
  }
425
421
  }
426
- ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
427
422
  }
423
+
428
424
  mutex_.Lock();
425
+ if (s.ok()) {
426
+ // This will notify and unblock threads waiting for error recovery to
427
+ // finish. Those previouly waiting threads can now proceed, which may
428
+ // include closing the db.
429
+ s = error_handler_.ClearBGError();
430
+ } else {
431
+ // NOTE: this is needed to pass ASSERT_STATUS_CHECKED
432
+ // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test.
433
+ // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952
434
+ error_handler_.GetRecoveryError().PermitUncheckedError();
435
+ }
436
+
437
+ if (s.ok()) {
438
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
439
+ } else {
440
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "Failed to resume DB [%s]",
441
+ s.ToString().c_str());
442
+ }
443
+
429
444
  // Check for shutdown again before scheduling further compactions,
430
445
  // since we released and re-acquired the lock above
431
446
  if (shutdown_initiated_) {
@@ -478,18 +493,14 @@ void DBImpl::CancelAllBackgroundWork(bool wait) {
478
493
  s.PermitUncheckedError(); //**TODO: What to do on error?
479
494
  mutex_.Lock();
480
495
  } else {
481
- for (auto cfd : *versions_->GetColumnFamilySet()) {
496
+ for (auto cfd : versions_->GetRefedColumnFamilySet()) {
482
497
  if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) {
483
- cfd->Ref();
484
- mutex_.Unlock();
498
+ InstrumentedMutexUnlock u(&mutex_);
485
499
  Status s = FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown);
486
500
  s.PermitUncheckedError(); //**TODO: What to do on error?
487
- mutex_.Lock();
488
- cfd->UnrefAndTryDelete();
489
501
  }
490
502
  }
491
503
  }
492
- versions_->GetColumnFamilySet()->FreeDeadColumnFamilies();
493
504
  }
494
505
 
495
506
  shutting_down_.store(true, std::memory_order_release);
@@ -511,19 +522,29 @@ Status DBImpl::CloseHelper() {
511
522
  }
512
523
  mutex_.Unlock();
513
524
 
525
+ // Below check is added as recovery_error_ is not checked and it causes crash
526
+ // in DBSSTTest.DBWithMaxSpaceAllowedWithBlobFiles when space limit is
527
+ // reached.
528
+ error_handler_.GetRecoveryError().PermitUncheckedError();
529
+
514
530
  // CancelAllBackgroundWork called with false means we just set the shutdown
515
531
  // marker. After this we do a variant of the waiting and unschedule work
516
532
  // (to consider: moving all the waiting into CancelAllBackgroundWork(true))
517
533
  CancelAllBackgroundWork(false);
518
- int bottom_compactions_unscheduled =
519
- env_->UnSchedule(this, Env::Priority::BOTTOM);
520
- int compactions_unscheduled = env_->UnSchedule(this, Env::Priority::LOW);
521
- int flushes_unscheduled = env_->UnSchedule(this, Env::Priority::HIGH);
522
- Status ret = Status::OK();
534
+
535
+ // Cancel manual compaction if there's any
536
+ if (HasPendingManualCompaction()) {
537
+ DisableManualCompaction();
538
+ }
523
539
  mutex_.Lock();
524
- bg_bottom_compaction_scheduled_ -= bottom_compactions_unscheduled;
525
- bg_compaction_scheduled_ -= compactions_unscheduled;
526
- bg_flush_scheduled_ -= flushes_unscheduled;
540
+ // Unschedule all tasks for this DB
541
+ for (uint8_t i = 0; i < static_cast<uint8_t>(TaskType::kCount); i++) {
542
+ env_->UnSchedule(GetTaskTag(i), Env::Priority::BOTTOM);
543
+ env_->UnSchedule(GetTaskTag(i), Env::Priority::LOW);
544
+ env_->UnSchedule(GetTaskTag(i), Env::Priority::HIGH);
545
+ }
546
+
547
+ Status ret = Status::OK();
527
548
 
528
549
  // Wait for background work to finish
529
550
  while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
@@ -539,12 +560,45 @@ Status DBImpl::CloseHelper() {
539
560
  flush_scheduler_.Clear();
540
561
  trim_history_scheduler_.Clear();
541
562
 
563
+ // For now, simply trigger a manual flush at close time
564
+ // on all the column families.
565
+ // TODO(bjlemaire): Check if this is needed. Also, in the
566
+ // future we can contemplate doing a more fine-grained
567
+ // flushing by first checking if there is a need for
568
+ // flushing (but need to implement something
569
+ // else than imm()->IsFlushPending() because the output
570
+ // memtables added to imm() dont trigger flushes).
571
+ if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
572
+ Status flush_ret;
573
+ mutex_.Unlock();
574
+ for (ColumnFamilyData* cf : *versions_->GetColumnFamilySet()) {
575
+ if (immutable_db_options_.atomic_flush) {
576
+ flush_ret = AtomicFlushMemTables({cf}, FlushOptions(),
577
+ FlushReason::kManualFlush);
578
+ if (!flush_ret.ok()) {
579
+ ROCKS_LOG_INFO(
580
+ immutable_db_options_.info_log,
581
+ "Atomic flush memtables failed upon closing (mempurge).");
582
+ }
583
+ } else {
584
+ flush_ret =
585
+ FlushMemTable(cf, FlushOptions(), FlushReason::kManualFlush);
586
+ if (!flush_ret.ok()) {
587
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
588
+ "Flush memtables failed upon closing (mempurge).");
589
+ }
590
+ }
591
+ }
592
+ mutex_.Lock();
593
+ }
594
+
542
595
  while (!flush_queue_.empty()) {
543
596
  const FlushRequest& flush_req = PopFirstFromFlushQueue();
544
597
  for (const auto& iter : flush_req) {
545
598
  iter.first->UnrefAndTryDelete();
546
599
  }
547
600
  }
601
+
548
602
  while (!compaction_queue_.empty()) {
549
603
  auto cfd = PopFirstFromCompactionQueue();
550
604
  cfd->UnrefAndTryDelete();
@@ -597,7 +651,7 @@ Status DBImpl::CloseHelper() {
597
651
  ROCKS_LOG_WARN(
598
652
  immutable_db_options_.info_log,
599
653
  "Unable to Sync WAL file %s with error -- %s",
600
- LogFileName(immutable_db_options_.wal_dir, log_number).c_str(),
654
+ LogFileName(immutable_db_options_.GetWalDir(), log_number).c_str(),
601
655
  s.ToString().c_str());
602
656
  // Retain the first error
603
657
  if (ret.ok()) {
@@ -656,6 +710,10 @@ Status DBImpl::CloseHelper() {
656
710
  }
657
711
  }
658
712
 
713
+ if (write_buffer_manager_ && wbm_stall_) {
714
+ write_buffer_manager_->RemoveDBFromQueue(wbm_stall_.get());
715
+ }
716
+
659
717
  if (ret.IsAborted()) {
660
718
  // Reserve IsAborted() error for those where users didn't release
661
719
  // certain resource and they can release them and come back and
@@ -668,9 +726,11 @@ Status DBImpl::CloseHelper() {
668
726
  Status DBImpl::CloseImpl() { return CloseHelper(); }
669
727
 
670
728
  DBImpl::~DBImpl() {
729
+ InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
671
730
  if (!closed_) {
672
731
  closed_ = true;
673
- CloseHelper().PermitUncheckedError();
732
+ closing_status_ = CloseHelper();
733
+ closing_status_.PermitUncheckedError();
674
734
  }
675
735
  }
676
736
 
@@ -685,23 +745,24 @@ void DBImpl::MaybeIgnoreError(Status* s) const {
685
745
  }
686
746
 
687
747
  const Status DBImpl::CreateArchivalDirectory() {
688
- if (immutable_db_options_.wal_ttl_seconds > 0 ||
689
- immutable_db_options_.wal_size_limit_mb > 0) {
690
- std::string archivalPath = ArchivalDirectory(immutable_db_options_.wal_dir);
748
+ if (immutable_db_options_.WAL_ttl_seconds > 0 ||
749
+ immutable_db_options_.WAL_size_limit_MB > 0) {
750
+ std::string archivalPath =
751
+ ArchivalDirectory(immutable_db_options_.GetWalDir());
691
752
  return env_->CreateDirIfMissing(archivalPath);
692
753
  }
693
754
  return Status::OK();
694
755
  }
695
756
 
696
757
  void DBImpl::PrintStatistics() {
697
- auto dbstats = immutable_db_options_.statistics.get();
758
+ auto dbstats = immutable_db_options_.stats;
698
759
  if (dbstats) {
699
760
  ROCKS_LOG_INFO(immutable_db_options_.info_log, "STATISTICS:\n %s",
700
761
  dbstats->ToString().c_str());
701
762
  }
702
763
  }
703
764
 
704
- void DBImpl::StartPeriodicWorkScheduler() {
765
+ Status DBImpl::StartPeriodicWorkScheduler() {
705
766
  #ifndef ROCKSDB_LITE
706
767
 
707
768
  #ifndef NDEBUG
@@ -711,7 +772,7 @@ void DBImpl::StartPeriodicWorkScheduler() {
711
772
  "DBImpl::StartPeriodicWorkScheduler:DisableScheduler",
712
773
  &disable_scheduler);
713
774
  if (disable_scheduler) {
714
- return;
775
+ return Status::OK();
715
776
  }
716
777
  #endif // !NDEBUG
717
778
 
@@ -722,10 +783,11 @@ void DBImpl::StartPeriodicWorkScheduler() {
722
783
  &periodic_work_scheduler_);
723
784
  }
724
785
 
725
- periodic_work_scheduler_->Register(
786
+ return periodic_work_scheduler_->Register(
726
787
  this, mutable_db_options_.stats_dump_period_sec,
727
788
  mutable_db_options_.stats_persist_period_sec);
728
789
  #endif // !ROCKSDB_LITE
790
+ return Status::OK();
729
791
  }
730
792
 
731
793
  // esitmate the total size of stats_history_
@@ -736,8 +798,7 @@ size_t DBImpl::EstimateInMemoryStatsHistorySize() const {
736
798
  size_t size_per_slice =
737
799
  sizeof(uint64_t) + sizeof(std::map<std::string, uint64_t>);
738
800
  // non-empty map, stats_history_.begin() guaranteed to exist
739
- std::map<std::string, uint64_t> sample_slice(stats_history_.begin()->second);
740
- for (const auto& pairs : sample_slice) {
801
+ for (const auto& pairs : stats_history_.begin()->second) {
741
802
  size_per_slice +=
742
803
  pairs.first.capacity() + sizeof(pairs.first) + sizeof(pairs.second);
743
804
  }
@@ -752,9 +813,10 @@ void DBImpl::PersistStats() {
752
813
  return;
753
814
  }
754
815
  TEST_SYNC_POINT("DBImpl::PersistStats:StartRunning");
755
- uint64_t now_seconds = env_->NowMicros() / kMicrosInSecond;
816
+ uint64_t now_seconds =
817
+ immutable_db_options_.clock->NowMicros() / kMicrosInSecond;
756
818
 
757
- Statistics* statistics = immutable_db_options_.statistics.get();
819
+ Statistics* statistics = immutable_db_options_.stats;
758
820
  if (!statistics) {
759
821
  return;
760
822
  }
@@ -893,32 +955,50 @@ Status DBImpl::GetStatsHistory(
893
955
  void DBImpl::DumpStats() {
894
956
  TEST_SYNC_POINT("DBImpl::DumpStats:1");
895
957
  #ifndef ROCKSDB_LITE
896
- const DBPropertyInfo* cf_property_info =
897
- GetPropertyInfo(DB::Properties::kCFStats);
898
- assert(cf_property_info != nullptr);
899
- const DBPropertyInfo* db_property_info =
900
- GetPropertyInfo(DB::Properties::kDBStats);
901
- assert(db_property_info != nullptr);
902
-
903
958
  std::string stats;
904
959
  if (shutdown_initiated_) {
905
960
  return;
906
961
  }
962
+
907
963
  TEST_SYNC_POINT("DBImpl::DumpStats:StartRunning");
908
964
  {
909
965
  InstrumentedMutexLock l(&mutex_);
910
- default_cf_internal_stats_->GetStringProperty(
911
- *db_property_info, DB::Properties::kDBStats, &stats);
966
+ for (auto cfd : versions_->GetRefedColumnFamilySet()) {
967
+ if (cfd->initialized()) {
968
+ // Release DB mutex for gathering cache entry stats. Pass over all
969
+ // column families for this first so that other stats are dumped
970
+ // near-atomically.
971
+ InstrumentedMutexUnlock u(&mutex_);
972
+ cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false);
973
+ }
974
+ }
975
+
976
+ const std::string* property = &DB::Properties::kDBStats;
977
+ const DBPropertyInfo* property_info = GetPropertyInfo(*property);
978
+ assert(property_info != nullptr);
979
+ assert(!property_info->need_out_of_mutex);
980
+ default_cf_internal_stats_->GetStringProperty(*property_info, *property,
981
+ &stats);
982
+
983
+ property = &DB::Properties::kCFStatsNoFileHistogram;
984
+ property_info = GetPropertyInfo(*property);
985
+ assert(property_info != nullptr);
986
+ assert(!property_info->need_out_of_mutex);
912
987
  for (auto cfd : *versions_->GetColumnFamilySet()) {
913
988
  if (cfd->initialized()) {
914
- cfd->internal_stats()->GetStringProperty(
915
- *cf_property_info, DB::Properties::kCFStatsNoFileHistogram, &stats);
989
+ cfd->internal_stats()->GetStringProperty(*property_info, *property,
990
+ &stats);
916
991
  }
917
992
  }
993
+
994
+ property = &DB::Properties::kCFFileHistogram;
995
+ property_info = GetPropertyInfo(*property);
996
+ assert(property_info != nullptr);
997
+ assert(!property_info->need_out_of_mutex);
918
998
  for (auto cfd : *versions_->GetColumnFamilySet()) {
919
999
  if (cfd->initialized()) {
920
- cfd->internal_stats()->GetStringProperty(
921
- *cf_property_info, DB::Properties::kCFFileHistogram, &stats);
1000
+ cfd->internal_stats()->GetStringProperty(*property_info, *property,
1001
+ &stats);
922
1002
  }
923
1003
  }
924
1004
  }
@@ -1002,7 +1082,6 @@ Status DBImpl::SetOptions(
1002
1082
  MutableCFOptions new_options;
1003
1083
  Status s;
1004
1084
  Status persist_options_status;
1005
- persist_options_status.PermitUncheckedError(); // Allow uninitialized access
1006
1085
  SuperVersionContext sv_context(/* create_superversion */ true);
1007
1086
  {
1008
1087
  auto db_options = GetDBOptions();
@@ -1038,9 +1117,11 @@ Status DBImpl::SetOptions(
1038
1117
  "[%s] SetOptions() succeeded", cfd->GetName().c_str());
1039
1118
  new_options.Dump(immutable_db_options_.info_log.get());
1040
1119
  if (!persist_options_status.ok()) {
1120
+ // NOTE: WriteOptionsFile already logs on failure
1041
1121
  s = persist_options_status;
1042
1122
  }
1043
1123
  } else {
1124
+ persist_options_status.PermitUncheckedError(); // less important
1044
1125
  ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] SetOptions() failed",
1045
1126
  cfd->GetName().c_str());
1046
1127
  }
@@ -1070,9 +1151,19 @@ Status DBImpl::SetDBOptions(
1070
1151
  InstrumentedMutexLock l(&mutex_);
1071
1152
  s = GetMutableDBOptionsFromStrings(mutable_db_options_, options_map,
1072
1153
  &new_options);
1154
+
1073
1155
  if (new_options.bytes_per_sync == 0) {
1074
1156
  new_options.bytes_per_sync = 1024 * 1024;
1075
1157
  }
1158
+
1159
+ if (MutableDBOptionsAreEqual(mutable_db_options_, new_options)) {
1160
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1161
+ "SetDBOptions(), input option value is not changed, "
1162
+ "skipping updating.");
1163
+ persist_options_status.PermitUncheckedError();
1164
+ return s;
1165
+ }
1166
+
1076
1167
  DBOptions new_db_options =
1077
1168
  BuildDBOptions(immutable_db_options_, new_options);
1078
1169
  if (s.ok()) {
@@ -1126,7 +1217,7 @@ Status DBImpl::SetDBOptions(
1126
1217
  mutable_db_options_.stats_persist_period_sec) {
1127
1218
  mutex_.Unlock();
1128
1219
  periodic_work_scheduler_->Unregister(this);
1129
- periodic_work_scheduler_->Register(
1220
+ s = periodic_work_scheduler_->Register(
1130
1221
  this, new_options.stats_dump_period_sec,
1131
1222
  new_options.stats_persist_period_sec);
1132
1223
  mutex_.Lock();
@@ -1301,7 +1392,9 @@ Status DBImpl::SyncWAL() {
1301
1392
  IOStatusCheck(io_s);
1302
1393
  }
1303
1394
  if (status.ok() && need_log_dir_sync) {
1304
- status = directories_.GetWalDir()->Fsync(IOOptions(), nullptr);
1395
+ status = directories_.GetWalDir()->FsyncWithDirOptions(
1396
+ IOOptions(), nullptr,
1397
+ DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
1305
1398
  }
1306
1399
  TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:2");
1307
1400
 
@@ -1397,13 +1490,28 @@ void DBImpl::SetLastPublishedSequence(SequenceNumber seq) {
1397
1490
  versions_->SetLastPublishedSequence(seq);
1398
1491
  }
1399
1492
 
1400
- bool DBImpl::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) {
1401
- if (seqnum > preserve_deletes_seqnum_.load()) {
1402
- preserve_deletes_seqnum_.store(seqnum);
1403
- return true;
1493
+ Status DBImpl::GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
1494
+ std::string* ts_low) {
1495
+ if (ts_low == nullptr) {
1496
+ return Status::InvalidArgument("ts_low is nullptr");
1497
+ }
1498
+ ColumnFamilyData* cfd = nullptr;
1499
+ if (column_family == nullptr) {
1500
+ cfd = default_cf_handle_->cfd();
1404
1501
  } else {
1405
- return false;
1502
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
1503
+ assert(cfh != nullptr);
1504
+ cfd = cfh->cfd();
1505
+ }
1506
+ assert(cfd != nullptr && cfd->user_comparator() != nullptr);
1507
+ if (cfd->user_comparator()->timestamp_size() == 0) {
1508
+ return Status::InvalidArgument(
1509
+ "Timestamp is not enabled in this column family");
1406
1510
  }
1511
+ InstrumentedMutexLock l(&mutex_);
1512
+ *ts_low = cfd->GetFullHistoryTsLow();
1513
+ assert(cfd->user_comparator()->timestamp_size() == ts_low->size());
1514
+ return Status::OK();
1407
1515
  }
1408
1516
 
1409
1517
  InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
@@ -1456,6 +1564,8 @@ void DBImpl::BackgroundCallPurge() {
1456
1564
  mutex_.Lock();
1457
1565
  }
1458
1566
 
1567
+ assert(bg_purge_scheduled_ > 0);
1568
+
1459
1569
  // Can't use iterator to go over purge_files_ because inside the loop we're
1460
1570
  // unlocking the mutex that protects purge_files_.
1461
1571
  while (!purge_files_.empty()) {
@@ -1523,17 +1633,7 @@ static void CleanupIteratorState(void* arg1, void* /*arg2*/) {
1523
1633
  delete state->super_version;
1524
1634
  }
1525
1635
  if (job_context.HaveSomethingToDelete()) {
1526
- if (state->background_purge) {
1527
- // PurgeObsoleteFiles here does not delete files. Instead, it adds the
1528
- // files to be deleted to a job queue, and deletes it in a separate
1529
- // background thread.
1530
- state->db->PurgeObsoleteFiles(job_context, true /* schedule only */);
1531
- state->mu->Lock();
1532
- state->db->SchedulePurge();
1533
- state->mu->Unlock();
1534
- } else {
1535
- state->db->PurgeObsoleteFiles(job_context);
1536
- }
1636
+ state->db->PurgeObsoleteFiles(job_context, state->background_purge);
1537
1637
  }
1538
1638
  job_context.Clean();
1539
1639
  }
@@ -1639,22 +1739,24 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
1639
1739
  get_impl_options.merge_operands != nullptr);
1640
1740
 
1641
1741
  assert(get_impl_options.column_family);
1642
- const Comparator* ucmp = get_impl_options.column_family->GetComparator();
1643
- assert(ucmp);
1644
- size_t ts_sz = ucmp->timestamp_size();
1645
- GetWithTimestampReadCallback read_cb(0); // Will call Refresh
1646
1742
 
1647
- #ifndef NDEBUG
1648
- if (ts_sz > 0) {
1649
- assert(read_options.timestamp);
1650
- assert(read_options.timestamp->size() == ts_sz);
1743
+ if (read_options.timestamp) {
1744
+ const Status s = FailIfTsSizesMismatch(get_impl_options.column_family,
1745
+ *(read_options.timestamp));
1746
+ if (!s.ok()) {
1747
+ return s;
1748
+ }
1651
1749
  } else {
1652
- assert(!read_options.timestamp);
1750
+ const Status s = FailIfCfHasTs(get_impl_options.column_family);
1751
+ if (!s.ok()) {
1752
+ return s;
1753
+ }
1653
1754
  }
1654
- #endif // NDEBUG
1655
1755
 
1656
- PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
1657
- StopWatch sw(env_, stats_, DB_GET);
1756
+ GetWithTimestampReadCallback read_cb(0); // Will call Refresh
1757
+
1758
+ PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
1759
+ StopWatch sw(immutable_db_options_.clock, stats_, DB_GET);
1658
1760
  PERF_TIMER_GUARD(get_snapshot_time);
1659
1761
 
1660
1762
  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(
@@ -1718,7 +1820,11 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
1718
1820
  }
1719
1821
  // If timestamp is used, we use read callback to ensure <key,t,s> is returned
1720
1822
  // only if t <= read_opts.timestamp and s <= snapshot.
1721
- if (ts_sz > 0) {
1823
+ // HACK: temporarily overwrite input struct field but restore
1824
+ SaveAndRestore<ReadCallback*> restore_callback(&get_impl_options.callback);
1825
+ const Comparator* ucmp = get_impl_options.column_family->GetComparator();
1826
+ assert(ucmp);
1827
+ if (ucmp->timestamp_size() > 0) {
1722
1828
  assert(!get_impl_options
1723
1829
  .callback); // timestamp with callback is not supported
1724
1830
  read_cb.Refresh(snapshot);
@@ -1741,7 +1847,8 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
1741
1847
  bool skip_memtable = (read_options.read_tier == kPersistedTier &&
1742
1848
  has_unpersisted_data_.load(std::memory_order_relaxed));
1743
1849
  bool done = false;
1744
- std::string* timestamp = ts_sz > 0 ? get_impl_options.timestamp : nullptr;
1850
+ std::string* timestamp =
1851
+ ucmp->timestamp_size() > 0 ? get_impl_options.timestamp : nullptr;
1745
1852
  if (!skip_memtable) {
1746
1853
  // Get value associated with key
1747
1854
  if (get_impl_options.get_value) {
@@ -1783,11 +1890,14 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
1783
1890
  return s;
1784
1891
  }
1785
1892
  }
1893
+ TEST_SYNC_POINT("DBImpl::GetImpl:PostMemTableGet:0");
1894
+ TEST_SYNC_POINT("DBImpl::GetImpl:PostMemTableGet:1");
1895
+ PinnedIteratorsManager pinned_iters_mgr;
1786
1896
  if (!done) {
1787
1897
  PERF_TIMER_GUARD(get_from_output_files_time);
1788
1898
  sv->current->Get(
1789
1899
  read_options, lkey, get_impl_options.value, timestamp, &s,
1790
- &merge_context, &max_covering_tombstone_seq,
1900
+ &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr,
1791
1901
  get_impl_options.get_value ? get_impl_options.value_found : nullptr,
1792
1902
  nullptr, nullptr,
1793
1903
  get_impl_options.get_value ? get_impl_options.callback : nullptr,
@@ -1799,8 +1909,6 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
1799
1909
  {
1800
1910
  PERF_TIMER_GUARD(get_post_process_time);
1801
1911
 
1802
- ReturnAndCleanupSuperVersion(cfd, sv);
1803
-
1804
1912
  RecordTick(stats_, NUMBER_KEYS_READ);
1805
1913
  size_t size = 0;
1806
1914
  if (s.ok()) {
@@ -1827,6 +1935,8 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
1827
1935
  PERF_COUNTER_ADD(get_read_bytes, size);
1828
1936
  }
1829
1937
  RecordInHistogram(stats_, BYTES_PER_READ, size);
1938
+
1939
+ ReturnAndCleanupSuperVersion(cfd, sv);
1830
1940
  }
1831
1941
  return s;
1832
1942
  }
@@ -1844,27 +1954,54 @@ std::vector<Status> DBImpl::MultiGet(
1844
1954
  const std::vector<ColumnFamilyHandle*>& column_family,
1845
1955
  const std::vector<Slice>& keys, std::vector<std::string>* values,
1846
1956
  std::vector<std::string>* timestamps) {
1847
- PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
1848
- StopWatch sw(env_, stats_, DB_MULTIGET);
1957
+ PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
1958
+ StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET);
1849
1959
  PERF_TIMER_GUARD(get_snapshot_time);
1850
1960
 
1851
- #ifndef NDEBUG
1852
- for (const auto* cfh : column_family) {
1853
- assert(cfh);
1854
- const Comparator* const ucmp = cfh->GetComparator();
1855
- assert(ucmp);
1856
- if (ucmp->timestamp_size() > 0) {
1857
- assert(read_options.timestamp);
1858
- assert(ucmp->timestamp_size() == read_options.timestamp->size());
1961
+ size_t num_keys = keys.size();
1962
+ assert(column_family.size() == num_keys);
1963
+ std::vector<Status> stat_list(num_keys);
1964
+
1965
+ bool should_fail = false;
1966
+ for (size_t i = 0; i < num_keys; ++i) {
1967
+ assert(column_family[i]);
1968
+ if (read_options.timestamp) {
1969
+ stat_list[i] =
1970
+ FailIfTsSizesMismatch(column_family[i], *(read_options.timestamp));
1971
+ if (!stat_list[i].ok()) {
1972
+ should_fail = true;
1973
+ }
1859
1974
  } else {
1860
- assert(!read_options.timestamp);
1975
+ stat_list[i] = FailIfCfHasTs(column_family[i]);
1976
+ if (!stat_list[i].ok()) {
1977
+ should_fail = true;
1978
+ }
1979
+ }
1980
+ }
1981
+
1982
+ if (should_fail) {
1983
+ for (auto& s : stat_list) {
1984
+ if (s.ok()) {
1985
+ s = Status::Incomplete(
1986
+ "DB not queried due to invalid argument(s) in the same MultiGet");
1987
+ }
1988
+ }
1989
+ return stat_list;
1990
+ }
1991
+
1992
+ if (tracer_) {
1993
+ // TODO: This mutex should be removed later, to improve performance when
1994
+ // tracing is enabled.
1995
+ InstrumentedMutexLock lock(&trace_mutex_);
1996
+ if (tracer_) {
1997
+ // TODO: maybe handle the tracing status?
1998
+ tracer_->MultiGet(column_family, keys).PermitUncheckedError();
1861
1999
  }
1862
2000
  }
1863
- #endif // NDEBUG
1864
2001
 
1865
2002
  SequenceNumber consistent_seqnum;
1866
2003
 
1867
- std::unordered_map<uint32_t, MultiGetColumnFamilyData> multiget_cf_data(
2004
+ UnorderedMap<uint32_t, MultiGetColumnFamilyData> multiget_cf_data(
1868
2005
  column_family.size());
1869
2006
  for (auto cf : column_family) {
1870
2007
  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(cf);
@@ -1876,13 +2013,13 @@ std::vector<Status> DBImpl::MultiGet(
1876
2013
  }
1877
2014
 
1878
2015
  std::function<MultiGetColumnFamilyData*(
1879
- std::unordered_map<uint32_t, MultiGetColumnFamilyData>::iterator&)>
2016
+ UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&)>
1880
2017
  iter_deref_lambda =
1881
- [](std::unordered_map<uint32_t, MultiGetColumnFamilyData>::iterator&
2018
+ [](UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&
1882
2019
  cf_iter) { return &cf_iter->second; };
1883
2020
 
1884
2021
  bool unref_only =
1885
- MultiCFSnapshot<std::unordered_map<uint32_t, MultiGetColumnFamilyData>>(
2022
+ MultiCFSnapshot<UnorderedMap<uint32_t, MultiGetColumnFamilyData>>(
1886
2023
  read_options, nullptr, iter_deref_lambda, &multiget_cf_data,
1887
2024
  &consistent_seqnum);
1888
2025
 
@@ -1893,8 +2030,6 @@ std::vector<Status> DBImpl::MultiGet(
1893
2030
  MergeContext merge_context;
1894
2031
 
1895
2032
  // Note: this always resizes the values array
1896
- size_t num_keys = keys.size();
1897
- std::vector<Status> stat_list(num_keys);
1898
2033
  values->resize(num_keys);
1899
2034
  if (timestamps) {
1900
2035
  timestamps->resize(num_keys);
@@ -1954,11 +2089,13 @@ std::vector<Status> DBImpl::MultiGet(
1954
2089
  if (!done) {
1955
2090
  PinnableSlice pinnable_val;
1956
2091
  PERF_TIMER_GUARD(get_from_output_files_time);
1957
- super_version->current->Get(
1958
- read_options, lkey, &pinnable_val, timestamp, &s, &merge_context,
1959
- &max_covering_tombstone_seq, /*value_found=*/nullptr,
1960
- /*key_exists=*/nullptr,
1961
- /*seq=*/nullptr, read_callback);
2092
+ PinnedIteratorsManager pinned_iters_mgr;
2093
+ super_version->current->Get(read_options, lkey, &pinnable_val, timestamp,
2094
+ &s, &merge_context,
2095
+ &max_covering_tombstone_seq,
2096
+ &pinned_iters_mgr, /*value_found=*/nullptr,
2097
+ /*key_exists=*/nullptr,
2098
+ /*seq=*/nullptr, read_callback);
1962
2099
  value->assign(pinnable_val.data(), pinnable_val.size());
1963
2100
  RecordTick(stats_, MEMTABLE_MISS);
1964
2101
  }
@@ -1974,9 +2111,8 @@ std::vector<Status> DBImpl::MultiGet(
1974
2111
  break;
1975
2112
  }
1976
2113
  }
1977
-
1978
2114
  if (read_options.deadline.count() &&
1979
- env_->NowMicros() >
2115
+ immutable_db_options_.clock->NowMicros() >
1980
2116
  static_cast<uint64_t>(read_options.deadline.count())) {
1981
2117
  break;
1982
2118
  }
@@ -1985,8 +2121,8 @@ std::vector<Status> DBImpl::MultiGet(
1985
2121
  if (keys_read < num_keys) {
1986
2122
  // The only reason to break out of the loop is when the deadline is
1987
2123
  // exceeded
1988
- assert(env_->NowMicros() >
1989
- static_cast<uint64_t>(read_options.deadline.count()));
2124
+ assert(immutable_db_options_.clock->NowMicros() >
2125
+ static_cast<uint64_t>(read_options.deadline.count()));
1990
2126
  for (++keys_read; keys_read < num_keys; ++keys_read) {
1991
2127
  stat_list[keys_read] = Status::TimedOut();
1992
2128
  }
@@ -2066,7 +2202,7 @@ bool DBImpl::MultiCFSnapshot(
2066
2202
  // consecutive retries, it means the write rate is very high. In that case
2067
2203
  // its probably ok to take the mutex on the 3rd try so we can succeed for
2068
2204
  // sure
2069
- static const int num_retries = 3;
2205
+ constexpr int num_retries = 3;
2070
2206
  for (int i = 0; i < num_retries; ++i) {
2071
2207
  last_try = (i == num_retries - 1);
2072
2208
  bool retry = false;
@@ -2096,8 +2232,9 @@ bool DBImpl::MultiCFSnapshot(
2096
2232
  *snapshot = versions_->LastPublishedSequence();
2097
2233
  }
2098
2234
  } else {
2099
- *snapshot = reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
2100
- ->number_;
2235
+ *snapshot =
2236
+ static_cast_with_check<const SnapshotImpl>(read_options.snapshot)
2237
+ ->number_;
2101
2238
  }
2102
2239
  for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end();
2103
2240
  ++cf_iter) {
@@ -2159,20 +2296,41 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
2159
2296
  return;
2160
2297
  }
2161
2298
 
2162
- #ifndef NDEBUG
2299
+ bool should_fail = false;
2163
2300
  for (size_t i = 0; i < num_keys; ++i) {
2164
2301
  ColumnFamilyHandle* cfh = column_families[i];
2165
2302
  assert(cfh);
2166
- const Comparator* const ucmp = cfh->GetComparator();
2167
- assert(ucmp);
2168
- if (ucmp->timestamp_size() > 0) {
2169
- assert(read_options.timestamp);
2170
- assert(read_options.timestamp->size() == ucmp->timestamp_size());
2303
+ if (read_options.timestamp) {
2304
+ statuses[i] = FailIfTsSizesMismatch(cfh, *(read_options.timestamp));
2305
+ if (!statuses[i].ok()) {
2306
+ should_fail = true;
2307
+ }
2171
2308
  } else {
2172
- assert(!read_options.timestamp);
2309
+ statuses[i] = FailIfCfHasTs(cfh);
2310
+ if (!statuses[i].ok()) {
2311
+ should_fail = true;
2312
+ }
2313
+ }
2314
+ }
2315
+ if (should_fail) {
2316
+ for (size_t i = 0; i < num_keys; ++i) {
2317
+ if (statuses[i].ok()) {
2318
+ statuses[i] = Status::Incomplete(
2319
+ "DB not queried due to invalid argument(s) in the same MultiGet");
2320
+ }
2321
+ }
2322
+ return;
2323
+ }
2324
+
2325
+ if (tracer_) {
2326
+ // TODO: This mutex should be removed later, to improve performance when
2327
+ // tracing is enabled.
2328
+ InstrumentedMutexLock lock(&trace_mutex_);
2329
+ if (tracer_) {
2330
+ // TODO: maybe handle the tracing status?
2331
+ tracer_->MultiGet(num_keys, column_families, keys).PermitUncheckedError();
2173
2332
  }
2174
2333
  }
2175
- #endif // NDEBUG
2176
2334
 
2177
2335
  autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
2178
2336
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
@@ -2191,20 +2349,18 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
2191
2349
  multiget_cf_data;
2192
2350
  size_t cf_start = 0;
2193
2351
  ColumnFamilyHandle* cf = sorted_keys[0]->column_family;
2352
+
2194
2353
  for (size_t i = 0; i < num_keys; ++i) {
2195
2354
  KeyContext* key_ctx = sorted_keys[i];
2196
2355
  if (key_ctx->column_family != cf) {
2197
- multiget_cf_data.emplace_back(
2198
- MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr));
2356
+ multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr);
2199
2357
  cf_start = i;
2200
2358
  cf = key_ctx->column_family;
2201
2359
  }
2202
2360
  }
2203
- {
2204
- // multiget_cf_data.emplace_back(
2205
- // MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr));
2206
- multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
2207
- }
2361
+
2362
+ multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
2363
+
2208
2364
  std::function<MultiGetColumnFamilyData*(
2209
2365
  autovector<MultiGetColumnFamilyData,
2210
2366
  MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
@@ -2264,7 +2420,7 @@ struct CompareKeyContext {
2264
2420
  static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
2265
2421
  uint32_t cfd_id1 = cfh->cfd()->GetID();
2266
2422
  const Comparator* comparator = cfh->cfd()->user_comparator();
2267
- cfh = static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
2423
+ cfh = static_cast<ColumnFamilyHandleImpl*>(rhs->column_family);
2268
2424
  uint32_t cfd_id2 = cfh->cfd()->GetID();
2269
2425
 
2270
2426
  if (cfd_id1 < cfd_id2) {
@@ -2288,39 +2444,16 @@ struct CompareKeyContext {
2288
2444
  void DBImpl::PrepareMultiGetKeys(
2289
2445
  size_t num_keys, bool sorted_input,
2290
2446
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
2291
- #ifndef NDEBUG
2292
2447
  if (sorted_input) {
2293
- for (size_t index = 0; index < sorted_keys->size(); ++index) {
2294
- if (index > 0) {
2295
- KeyContext* lhs = (*sorted_keys)[index - 1];
2296
- KeyContext* rhs = (*sorted_keys)[index];
2297
- ColumnFamilyHandleImpl* cfh =
2298
- static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
2299
- uint32_t cfd_id1 = cfh->cfd()->GetID();
2300
- const Comparator* comparator = cfh->cfd()->user_comparator();
2301
- cfh =
2302
- static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
2303
- uint32_t cfd_id2 = cfh->cfd()->GetID();
2304
-
2305
- assert(cfd_id1 <= cfd_id2);
2306
- if (cfd_id1 < cfd_id2) {
2307
- continue;
2308
- }
2309
-
2310
- // Both keys are from the same column family
2311
- int cmp = comparator->CompareWithoutTimestamp(
2312
- *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false);
2313
- assert(cmp <= 0);
2314
- }
2315
- index++;
2316
- }
2317
- }
2448
+ #ifndef NDEBUG
2449
+ assert(std::is_sorted(sorted_keys->begin(), sorted_keys->end(),
2450
+ CompareKeyContext()));
2318
2451
  #endif
2319
- if (!sorted_input) {
2320
- CompareKeyContext sort_comparator;
2321
- std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
2322
- sort_comparator);
2452
+ return;
2323
2453
  }
2454
+
2455
+ std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
2456
+ CompareKeyContext());
2324
2457
  }
2325
2458
 
2326
2459
  void DBImpl::MultiGet(const ReadOptions& read_options,
@@ -2336,6 +2469,15 @@ void DBImpl::MultiGet(const ReadOptions& read_options,
2336
2469
  const Slice* keys, PinnableSlice* values,
2337
2470
  std::string* timestamps, Status* statuses,
2338
2471
  const bool sorted_input) {
2472
+ if (tracer_) {
2473
+ // TODO: This mutex should be removed later, to improve performance when
2474
+ // tracing is enabled.
2475
+ InstrumentedMutexLock lock(&trace_mutex_);
2476
+ if (tracer_) {
2477
+ // TODO: maybe handle the tracing status?
2478
+ tracer_->MultiGet(num_keys, column_family, keys).PermitUncheckedError();
2479
+ }
2480
+ }
2339
2481
  autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
2340
2482
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
2341
2483
  sorted_keys.resize(num_keys);
@@ -2425,8 +2567,8 @@ Status DBImpl::MultiGetImpl(
2425
2567
  autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys,
2426
2568
  SuperVersion* super_version, SequenceNumber snapshot,
2427
2569
  ReadCallback* callback) {
2428
- PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
2429
- StopWatch sw(env_, stats_, DB_MULTIGET);
2570
+ PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
2571
+ StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET);
2430
2572
 
2431
2573
  // For each of the given keys, apply the entire "get" process as follows:
2432
2574
  // First look in the memtable, then in the immutable memtable (if any).
@@ -2437,7 +2579,7 @@ Status DBImpl::MultiGetImpl(
2437
2579
  uint64_t curr_value_size = 0;
2438
2580
  while (keys_left) {
2439
2581
  if (read_options.deadline.count() &&
2440
- env_->NowMicros() >
2582
+ immutable_db_options_.clock->NowMicros() >
2441
2583
  static_cast<uint64_t>(read_options.deadline.count())) {
2442
2584
  s = Status::TimedOut();
2443
2585
  break;
@@ -2805,15 +2947,22 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
2805
2947
  return NewErrorIterator(Status::NotSupported(
2806
2948
  "ReadTier::kPersistedData is not yet supported in iterators."));
2807
2949
  }
2808
- // if iterator wants internal keys, we can only proceed if
2809
- // we can guarantee the deletes haven't been processed yet
2810
- if (immutable_db_options_.preserve_deletes &&
2811
- read_options.iter_start_seqnum > 0 &&
2812
- read_options.iter_start_seqnum < preserve_deletes_seqnum_.load()) {
2813
- return NewErrorIterator(Status::InvalidArgument(
2814
- "Iterator requested internal keys which are too old and are not"
2815
- " guaranteed to be preserved, try larger iter_start_seqnum opt."));
2950
+
2951
+ assert(column_family);
2952
+
2953
+ if (read_options.timestamp) {
2954
+ const Status s =
2955
+ FailIfTsSizesMismatch(column_family, *(read_options.timestamp));
2956
+ if (!s.ok()) {
2957
+ return NewErrorIterator(s);
2958
+ }
2959
+ } else {
2960
+ const Status s = FailIfCfHasTs(column_family);
2961
+ if (!s.ok()) {
2962
+ return NewErrorIterator(s);
2963
+ }
2816
2964
  }
2965
+
2817
2966
  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
2818
2967
  ColumnFamilyData* cfd = cfh->cfd();
2819
2968
  assert(cfd != nullptr);
@@ -2940,6 +3089,25 @@ Status DBImpl::NewIterators(
2940
3089
  return Status::NotSupported(
2941
3090
  "ReadTier::kPersistedData is not yet supported in iterators.");
2942
3091
  }
3092
+
3093
+ if (read_options.timestamp) {
3094
+ for (auto* cf : column_families) {
3095
+ assert(cf);
3096
+ const Status s = FailIfTsSizesMismatch(cf, *(read_options.timestamp));
3097
+ if (!s.ok()) {
3098
+ return s;
3099
+ }
3100
+ }
3101
+ } else {
3102
+ for (auto* cf : column_families) {
3103
+ assert(cf);
3104
+ const Status s = FailIfCfHasTs(cf);
3105
+ if (!s.ok()) {
3106
+ return s;
3107
+ }
3108
+ }
3109
+ }
3110
+
2943
3111
  ReadCallback* read_callback = nullptr; // No read callback provided.
2944
3112
  iterators->clear();
2945
3113
  iterators->reserve(column_families.size());
@@ -2990,7 +3158,8 @@ const Snapshot* DBImpl::GetSnapshotForWriteConflictBoundary() {
2990
3158
  SnapshotImpl* DBImpl::GetSnapshotImpl(bool is_write_conflict_boundary,
2991
3159
  bool lock) {
2992
3160
  int64_t unix_time = 0;
2993
- env_->GetCurrentTime(&unix_time).PermitUncheckedError(); // Ignore error
3161
+ immutable_db_options_.clock->GetCurrentTime(&unix_time)
3162
+ .PermitUncheckedError(); // Ignore error
2994
3163
  SnapshotImpl* s = new SnapshotImpl;
2995
3164
 
2996
3165
  if (lock) {
@@ -3016,7 +3185,7 @@ SnapshotImpl* DBImpl::GetSnapshotImpl(bool is_write_conflict_boundary,
3016
3185
  }
3017
3186
 
3018
3187
  namespace {
3019
- typedef autovector<ColumnFamilyData*, 2> CfdList;
3188
+ using CfdList = autovector<ColumnFamilyData*, 2>;
3020
3189
  bool CfdListContains(const CfdList& list, ColumnFamilyData* cfd) {
3021
3190
  for (const ColumnFamilyData* t : list) {
3022
3191
  if (t == cfd) {
@@ -3028,6 +3197,12 @@ bool CfdListContains(const CfdList& list, ColumnFamilyData* cfd) {
3028
3197
  } // namespace
3029
3198
 
3030
3199
  void DBImpl::ReleaseSnapshot(const Snapshot* s) {
3200
+ if (s == nullptr) {
3201
+ // DBImpl::GetSnapshot() can return nullptr when snapshot
3202
+ // not supported by specifying the condition:
3203
+ // inplace_update_support enabled.
3204
+ return;
3205
+ }
3031
3206
  const SnapshotImpl* casted_s = reinterpret_cast<const SnapshotImpl*>(s);
3032
3207
  {
3033
3208
  InstrumentedMutexLock l(&mutex_);
@@ -3135,12 +3310,17 @@ FileSystem* DBImpl::GetFileSystem() const {
3135
3310
  return immutable_db_options_.fs.get();
3136
3311
  }
3137
3312
 
3313
+ SystemClock* DBImpl::GetSystemClock() const {
3314
+ return immutable_db_options_.clock;
3315
+ }
3316
+
3138
3317
  #ifndef ROCKSDB_LITE
3139
3318
 
3140
- Status DBImpl::StartIOTrace(Env* env, const TraceOptions& trace_options,
3319
+ Status DBImpl::StartIOTrace(const TraceOptions& trace_options,
3141
3320
  std::unique_ptr<TraceWriter>&& trace_writer) {
3142
3321
  assert(trace_writer != nullptr);
3143
- return io_tracer_->StartIOTrace(env, trace_options, std::move(trace_writer));
3322
+ return io_tracer_->StartIOTrace(GetSystemClock(), trace_options,
3323
+ std::move(trace_writer));
3144
3324
  }
3145
3325
 
3146
3326
  Status DBImpl::EndIOTrace() {
@@ -3179,16 +3359,21 @@ bool DBImpl::GetProperty(ColumnFamilyHandle* column_family,
3179
3359
  }
3180
3360
  return ret_value;
3181
3361
  } else if (property_info->handle_string) {
3182
- InstrumentedMutexLock l(&mutex_);
3183
- return cfd->internal_stats()->GetStringProperty(*property_info, property,
3184
- value);
3362
+ if (property_info->need_out_of_mutex) {
3363
+ return cfd->internal_stats()->GetStringProperty(*property_info, property,
3364
+ value);
3365
+ } else {
3366
+ InstrumentedMutexLock l(&mutex_);
3367
+ return cfd->internal_stats()->GetStringProperty(*property_info, property,
3368
+ value);
3369
+ }
3185
3370
  } else if (property_info->handle_string_dbimpl) {
3186
- std::string tmp_value;
3187
- bool ret_value = (this->*(property_info->handle_string_dbimpl))(&tmp_value);
3188
- if (ret_value) {
3189
- *value = tmp_value;
3371
+ if (property_info->need_out_of_mutex) {
3372
+ return (this->*(property_info->handle_string_dbimpl))(value);
3373
+ } else {
3374
+ InstrumentedMutexLock l(&mutex_);
3375
+ return (this->*(property_info->handle_string_dbimpl))(value);
3190
3376
  }
3191
- return ret_value;
3192
3377
  }
3193
3378
  // Shouldn't reach here since exactly one of handle_string and handle_int
3194
3379
  // should be non-nullptr.
@@ -3206,9 +3391,14 @@ bool DBImpl::GetMapProperty(ColumnFamilyHandle* column_family,
3206
3391
  if (property_info == nullptr) {
3207
3392
  return false;
3208
3393
  } else if (property_info->handle_map) {
3209
- InstrumentedMutexLock l(&mutex_);
3210
- return cfd->internal_stats()->GetMapProperty(*property_info, property,
3211
- value);
3394
+ if (property_info->need_out_of_mutex) {
3395
+ return cfd->internal_stats()->GetMapProperty(*property_info, property,
3396
+ value);
3397
+ } else {
3398
+ InstrumentedMutexLock l(&mutex_);
3399
+ return cfd->internal_stats()->GetMapProperty(*property_info, property,
3400
+ value);
3401
+ }
3212
3402
  }
3213
3403
  // If we reach this point it means that handle_map is not provided for the
3214
3404
  // requested property
@@ -3259,7 +3449,7 @@ bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd,
3259
3449
 
3260
3450
  bool DBImpl::GetPropertyHandleOptionsStatistics(std::string* value) {
3261
3451
  assert(value != nullptr);
3262
- Statistics* statistics = immutable_db_options_.statistics.get();
3452
+ Statistics* statistics = immutable_db_options_.stats;
3263
3453
  if (!statistics) {
3264
3454
  return false;
3265
3455
  }
@@ -3292,15 +3482,13 @@ bool DBImpl::GetAggregatedIntProperty(const Slice& property,
3292
3482
  // Needs mutex to protect the list of column families.
3293
3483
  InstrumentedMutexLock l(&mutex_);
3294
3484
  uint64_t value;
3295
- for (auto* cfd : *versions_->GetColumnFamilySet()) {
3485
+ for (auto* cfd : versions_->GetRefedColumnFamilySet()) {
3296
3486
  if (!cfd->initialized()) {
3297
3487
  continue;
3298
3488
  }
3299
- cfd->Ref();
3300
3489
  ret = GetIntPropertyInternal(cfd, *property_info, true, &value);
3301
3490
  // GetIntPropertyInternal may release db mutex and re-acquire it.
3302
3491
  mutex_.AssertHeld();
3303
- cfd->UnrefAndTryDelete();
3304
3492
  if (ret) {
3305
3493
  sum += value;
3306
3494
  } else {
@@ -3422,7 +3610,7 @@ void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
3422
3610
  Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options,
3423
3611
  ColumnFamilyHandle* column_family,
3424
3612
  const Range* range, int n, uint64_t* sizes) {
3425
- if (!options.include_memtabtles && !options.include_files) {
3613
+ if (!options.include_memtables && !options.include_files) {
3426
3614
  return Status::InvalidArgument("Invalid options");
3427
3615
  }
3428
3616
 
@@ -3460,7 +3648,7 @@ Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options,
3460
3648
  options, v, k1.Encode(), k2.Encode(), /*start_level=*/0,
3461
3649
  /*end_level=*/-1, TableReaderCaller::kUserApproximateSize);
3462
3650
  }
3463
- if (options.include_memtabtles) {
3651
+ if (options.include_memtables) {
3464
3652
  sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size;
3465
3653
  sizes[i] += sv->imm->ApproximateStats(k1.Encode(), k2.Encode()).size;
3466
3654
  }
@@ -3494,6 +3682,11 @@ Status DBImpl::GetUpdatesSince(
3494
3682
  SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
3495
3683
  const TransactionLogIterator::ReadOptions& read_options) {
3496
3684
  RecordTick(stats_, GET_UPDATES_SINCE_CALLS);
3685
+ if (seq_per_batch_) {
3686
+ return Status::NotSupported(
3687
+ "This API is not yet compatible with write-prepared/write-unprepared "
3688
+ "transactions");
3689
+ }
3497
3690
  if (seq > versions_->LastSequence()) {
3498
3691
  return Status::NotFound("Requested sequence not yet written in the db");
3499
3692
  }
@@ -3657,6 +3850,8 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
3657
3850
  deleted_files.insert(level_file);
3658
3851
  level_file->being_compacted = true;
3659
3852
  }
3853
+ vstorage->ComputeCompactionScore(*cfd->ioptions(),
3854
+ *cfd->GetLatestMutableCFOptions());
3660
3855
  }
3661
3856
  }
3662
3857
  if (edit.GetDeletedFiles().empty()) {
@@ -3720,6 +3915,17 @@ void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
3720
3915
  ReturnAndCleanupSuperVersion(cfd, sv);
3721
3916
  }
3722
3917
 
3918
+ void DBImpl::GetAllColumnFamilyMetaData(
3919
+ std::vector<ColumnFamilyMetaData>* metadata) {
3920
+ InstrumentedMutexLock l(&mutex_);
3921
+ for (auto cfd : *(versions_->GetColumnFamilySet())) {
3922
+ {
3923
+ metadata->emplace_back();
3924
+ cfd->current()->GetColumnFamilyMetaData(&metadata->back());
3925
+ }
3926
+ }
3927
+ }
3928
+
3723
3929
  #endif // ROCKSDB_LITE
3724
3930
 
3725
3931
  Status DBImpl::CheckConsistency() {
@@ -3811,7 +4017,8 @@ Status DBImpl::GetDbIdentityFromIdentityFile(std::string* identity) const {
3811
4017
  return s;
3812
4018
  }
3813
4019
 
3814
- // If last character is '\n' remove it from identity
4020
+ // If last character is '\n' remove it from identity. (Old implementations
4021
+ // of Env::GenerateUniqueId() would include a trailing '\n'.)
3815
4022
  if (identity->size() > 0 && identity->back() == '\n') {
3816
4023
  identity->pop_back();
3817
4024
  }
@@ -3823,29 +4030,32 @@ Status DBImpl::GetDbSessionId(std::string& session_id) const {
3823
4030
  return Status::OK();
3824
4031
  }
3825
4032
 
3826
- void DBImpl::SetDbSessionId() {
3827
- // GenerateUniqueId() generates an identifier that has a negligible
3828
- // probability of being duplicated, ~128 bits of entropy
3829
- std::string uuid = env_->GenerateUniqueId();
3830
-
3831
- // Hash and reformat that down to a more compact format, 20 characters
3832
- // in base-36 ([0-9A-Z]), which is ~103 bits of entropy, which is enough
3833
- // to expect no collisions across a billion servers each opening DBs
3834
- // a million times (~2^50). Benefits vs. raw unique id:
3835
- // * Save ~ dozen bytes per SST file
3836
- // * Shorter shared backup file names (some platforms have low limits)
3837
- // * Visually distinct from DB id format
3838
- uint64_t a = NPHash64(uuid.data(), uuid.size(), 1234U);
3839
- uint64_t b = NPHash64(uuid.data(), uuid.size(), 5678U);
3840
- db_session_id_.resize(20);
3841
- static const char* const base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
3842
- size_t i = 0;
3843
- for (; i < 10U; ++i, a /= 36U) {
3844
- db_session_id_[i] = base36[a % 36];
3845
- }
3846
- for (; i < 20U; ++i, b /= 36U) {
3847
- db_session_id_[i] = base36[b % 36];
4033
+ namespace {
4034
+ SemiStructuredUniqueIdGen* DbSessionIdGen() {
4035
+ static SemiStructuredUniqueIdGen gen;
4036
+ return &gen;
4037
+ }
4038
+ } // namespace
4039
+
4040
+ void DBImpl::TEST_ResetDbSessionIdGen() { DbSessionIdGen()->Reset(); }
4041
+
4042
+ std::string DBImpl::GenerateDbSessionId(Env*) {
4043
+ // See SemiStructuredUniqueIdGen for its desirable properties.
4044
+ auto gen = DbSessionIdGen();
4045
+
4046
+ uint64_t lo, hi;
4047
+ gen->GenerateNext(&hi, &lo);
4048
+ if (lo == 0) {
4049
+ // Avoid emitting session ID with lo==0, so that SST unique
4050
+ // IDs can be more easily ensured non-zero
4051
+ gen->GenerateNext(&hi, &lo);
4052
+ assert(lo != 0);
3848
4053
  }
4054
+ return EncodeSessionId(hi, lo);
4055
+ }
4056
+
4057
+ void DBImpl::SetDbSessionId() {
4058
+ db_session_id_ = GenerateDbSessionId(env_);
3849
4059
  TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_);
3850
4060
  }
3851
4061
 
@@ -3879,6 +4089,10 @@ Status DB::DropColumnFamilies(
3879
4089
  }
3880
4090
 
3881
4091
  Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) {
4092
+ if (DefaultColumnFamily() == column_family) {
4093
+ return Status::InvalidArgument(
4094
+ "Cannot destroy the handle returned by DefaultColumnFamily()");
4095
+ }
3882
4096
  delete column_family;
3883
4097
  return Status::OK();
3884
4098
  }
@@ -3886,19 +4100,20 @@ Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) {
3886
4100
  DB::~DB() {}
3887
4101
 
3888
4102
  Status DBImpl::Close() {
3889
- if (!closed_) {
3890
- {
3891
- InstrumentedMutexLock l(&mutex_);
3892
- // If there is unreleased snapshot, fail the close call
3893
- if (!snapshots_.empty()) {
3894
- return Status::Aborted("Cannot close DB with unreleased snapshot.");
3895
- }
4103
+ InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
4104
+ if (closed_) {
4105
+ return closing_status_;
4106
+ }
4107
+ {
4108
+ InstrumentedMutexLock l(&mutex_);
4109
+ // If there is unreleased snapshot, fail the close call
4110
+ if (!snapshots_.empty()) {
4111
+ return Status::Aborted("Cannot close DB with unreleased snapshot.");
3896
4112
  }
3897
-
3898
- closed_ = true;
3899
- return CloseImpl();
3900
4113
  }
3901
- return Status::OK();
4114
+ closing_status_ = CloseImpl();
4115
+ closed_ = true;
4116
+ return closing_status_;
3902
4117
  }
3903
4118
 
3904
4119
  Status DB::ListColumnFamilies(const DBOptions& db_options,
@@ -3915,7 +4130,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
3915
4130
  ImmutableDBOptions soptions(SanitizeOptions(dbname, options));
3916
4131
  Env* env = soptions.env;
3917
4132
  std::vector<std::string> filenames;
3918
- bool wal_in_db_path = IsWalDirSameAsDBPath(&soptions);
4133
+ bool wal_in_db_path = soptions.IsWalDirSameAsDBPath();
3919
4134
 
3920
4135
  // Reset the logger because it holds a handle to the
3921
4136
  // log file and prevents cleanup and directory removal
@@ -3937,9 +4152,12 @@ Status DestroyDB(const std::string& dbname, const Options& options,
3937
4152
  std::string path_to_delete = dbname + "/" + fname;
3938
4153
  if (type == kMetaDatabase) {
3939
4154
  del = DestroyDB(path_to_delete, options);
3940
- } else if (type == kTableFile || type == kWalFile) {
3941
- del = DeleteDBFile(&soptions, path_to_delete, dbname,
3942
- /*force_bg=*/false, /*force_fg=*/!wal_in_db_path);
4155
+ } else if (type == kTableFile || type == kWalFile ||
4156
+ type == kBlobFile) {
4157
+ del = DeleteDBFile(
4158
+ &soptions, path_to_delete, dbname,
4159
+ /*force_bg=*/false,
4160
+ /*force_fg=*/(type == kWalFile) ? !wal_in_db_path : false);
3943
4161
  } else {
3944
4162
  del = env->DeleteFile(path_to_delete);
3945
4163
  }
@@ -3962,9 +4180,10 @@ Status DestroyDB(const std::string& dbname, const Options& options,
3962
4180
  if (env->GetChildren(path, &filenames).ok()) {
3963
4181
  for (const auto& fname : filenames) {
3964
4182
  if (ParseFileName(fname, &number, &type) &&
3965
- type == kTableFile) { // Lock file will be deleted at end
3966
- std::string table_path = path + "/" + fname;
3967
- Status del = DeleteDBFile(&soptions, table_path, dbname,
4183
+ (type == kTableFile ||
4184
+ type == kBlobFile)) { // Lock file will be deleted at end
4185
+ std::string file_path = path + "/" + fname;
4186
+ Status del = DeleteDBFile(&soptions, file_path, dbname,
3968
4187
  /*force_bg=*/false, /*force_fg=*/false);
3969
4188
  if (!del.ok() && result.ok()) {
3970
4189
  result = del;
@@ -3979,7 +4198,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
3979
4198
  std::vector<std::string> walDirFiles;
3980
4199
  std::string archivedir = ArchivalDirectory(dbname);
3981
4200
  bool wal_dir_exists = false;
3982
- if (dbname != soptions.wal_dir) {
4201
+ if (!soptions.IsWalDirSameAsDBPath(dbname)) {
3983
4202
  wal_dir_exists = env->GetChildren(soptions.wal_dir, &walDirFiles).ok();
3984
4203
  archivedir = ArchivalDirectory(soptions.wal_dir);
3985
4204
  }
@@ -4069,6 +4288,8 @@ Status DBImpl::WriteOptionsFile(bool need_mutex_lock,
4069
4288
 
4070
4289
  TEST_SYNC_POINT("DBImpl::WriteOptionsFile:1");
4071
4290
  TEST_SYNC_POINT("DBImpl::WriteOptionsFile:2");
4291
+ TEST_SYNC_POINT_CALLBACK("DBImpl::WriteOptionsFile:PersistOptions",
4292
+ &db_options);
4072
4293
 
4073
4294
  std::string file_name =
4074
4295
  TempOptionsFileName(GetName(), versions_->NewFileNumber());
@@ -4158,11 +4379,24 @@ Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) {
4158
4379
  uint64_t options_file_number = versions_->NewFileNumber();
4159
4380
  std::string options_file_name =
4160
4381
  OptionsFileName(GetName(), options_file_number);
4161
- // Retry if the file name happen to conflict with an existing one.
4162
- s = GetEnv()->RenameFile(file_name, options_file_name);
4382
+ uint64_t options_file_size = 0;
4383
+ s = GetEnv()->GetFileSize(file_name, &options_file_size);
4384
+ if (s.ok()) {
4385
+ // Retry if the file name happen to conflict with an existing one.
4386
+ s = GetEnv()->RenameFile(file_name, options_file_name);
4387
+ std::unique_ptr<FSDirectory> dir_obj;
4388
+ if (s.ok()) {
4389
+ s = fs_->NewDirectory(GetName(), IOOptions(), &dir_obj, nullptr);
4390
+ }
4391
+ if (s.ok()) {
4392
+ s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr,
4393
+ DirFsyncOptions(options_file_name));
4394
+ }
4395
+ }
4163
4396
  if (s.ok()) {
4164
4397
  InstrumentedMutexLock l(&mutex_);
4165
4398
  versions_->options_file_number_ = options_file_number;
4399
+ versions_->options_file_size_ = options_file_size;
4166
4400
  }
4167
4401
 
4168
4402
  if (0 == disable_delete_obsolete_files_) {
@@ -4208,16 +4442,17 @@ void DBImpl::EraseThreadStatusDbInfo() const {}
4208
4442
  //
4209
4443
  // A global method that can dump out the build version
4210
4444
  void DumpRocksDBBuildVersion(Logger* log) {
4211
- #if !defined(IOS_CROSS_COMPILE)
4212
- // if we compile with Xcode, we don't run build_detect_version, so we don't
4213
- // generate util/build_version.cc
4214
- ROCKS_LOG_HEADER(log, "RocksDB version: %d.%d.%d\n", ROCKSDB_MAJOR,
4215
- ROCKSDB_MINOR, ROCKSDB_PATCH);
4216
- ROCKS_LOG_HEADER(log, "Git sha %s", rocksdb_build_git_sha);
4217
- ROCKS_LOG_HEADER(log, "Compile date %s", rocksdb_build_compile_date);
4218
- #else
4219
- (void)log; // ignore "-Wunused-parameter"
4220
- #endif
4445
+ ROCKS_LOG_HEADER(log, "RocksDB version: %s\n",
4446
+ GetRocksVersionAsString().c_str());
4447
+ const auto& props = GetRocksBuildProperties();
4448
+ const auto& sha = props.find("rocksdb_build_git_sha");
4449
+ if (sha != props.end()) {
4450
+ ROCKS_LOG_HEADER(log, "Git sha %s", sha->second.c_str());
4451
+ }
4452
+ const auto date = props.find("rocksdb_build_date");
4453
+ if (date != props.end()) {
4454
+ ROCKS_LOG_HEADER(log, "Compile date %s", date->second.c_str());
4455
+ }
4221
4456
  }
4222
4457
 
4223
4458
  #ifndef ROCKSDB_LITE
@@ -4234,28 +4469,39 @@ SequenceNumber DBImpl::GetEarliestMemTableSequenceNumber(SuperVersion* sv,
4234
4469
 
4235
4470
  return earliest_seq;
4236
4471
  }
4237
- #endif // ROCKSDB_LITE
4238
4472
 
4239
- #ifndef ROCKSDB_LITE
4240
- Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4241
- bool cache_only,
4242
- SequenceNumber lower_bound_seq,
4243
- SequenceNumber* seq,
4244
- bool* found_record_for_key,
4245
- bool* is_blob_index) {
4473
+ Status DBImpl::GetLatestSequenceForKey(
4474
+ SuperVersion* sv, const Slice& key, bool cache_only,
4475
+ SequenceNumber lower_bound_seq, SequenceNumber* seq, std::string* timestamp,
4476
+ bool* found_record_for_key, bool* is_blob_index) {
4246
4477
  Status s;
4247
4478
  MergeContext merge_context;
4248
4479
  SequenceNumber max_covering_tombstone_seq = 0;
4249
4480
 
4250
4481
  ReadOptions read_options;
4251
4482
  SequenceNumber current_seq = versions_->LastSequence();
4252
- LookupKey lkey(key, current_seq);
4483
+
4484
+ ColumnFamilyData* cfd = sv->cfd;
4485
+ assert(cfd);
4486
+ const Comparator* const ucmp = cfd->user_comparator();
4487
+ assert(ucmp);
4488
+ size_t ts_sz = ucmp->timestamp_size();
4489
+ std::string ts_buf;
4490
+ if (ts_sz > 0) {
4491
+ assert(timestamp);
4492
+ ts_buf.assign(ts_sz, '\xff');
4493
+ } else {
4494
+ assert(!timestamp);
4495
+ }
4496
+ Slice ts(ts_buf);
4497
+
4498
+ LookupKey lkey(key, current_seq, ts_sz == 0 ? nullptr : &ts);
4253
4499
 
4254
4500
  *seq = kMaxSequenceNumber;
4255
4501
  *found_record_for_key = false;
4256
4502
 
4257
4503
  // Check if there is a record for this key in the latest memtable
4258
- sv->mem->Get(lkey, nullptr, nullptr, &s, &merge_context,
4504
+ sv->mem->Get(lkey, /*value=*/nullptr, timestamp, &s, &merge_context,
4259
4505
  &max_covering_tombstone_seq, seq, read_options,
4260
4506
  nullptr /*read_callback*/, is_blob_index);
4261
4507
 
@@ -4267,6 +4513,10 @@ Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4267
4513
 
4268
4514
  return s;
4269
4515
  }
4516
+ assert(!ts_sz ||
4517
+ (*seq != kMaxSequenceNumber &&
4518
+ *timestamp != std::string(ts_sz, '\xff')) ||
4519
+ (*seq == kMaxSequenceNumber && timestamp->empty()));
4270
4520
 
4271
4521
  if (*seq != kMaxSequenceNumber) {
4272
4522
  // Found a sequence number, no need to check immutable memtables
@@ -4282,7 +4532,7 @@ Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4282
4532
  }
4283
4533
 
4284
4534
  // Check if there is a record for this key in the immutable memtables
4285
- sv->imm->Get(lkey, nullptr, nullptr, &s, &merge_context,
4535
+ sv->imm->Get(lkey, /*value=*/nullptr, timestamp, &s, &merge_context,
4286
4536
  &max_covering_tombstone_seq, seq, read_options,
4287
4537
  nullptr /*read_callback*/, is_blob_index);
4288
4538
 
@@ -4295,6 +4545,11 @@ Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4295
4545
  return s;
4296
4546
  }
4297
4547
 
4548
+ assert(!ts_sz ||
4549
+ (*seq != kMaxSequenceNumber &&
4550
+ *timestamp != std::string(ts_sz, '\xff')) ||
4551
+ (*seq == kMaxSequenceNumber && timestamp->empty()));
4552
+
4298
4553
  if (*seq != kMaxSequenceNumber) {
4299
4554
  // Found a sequence number, no need to check memtable history
4300
4555
  *found_record_for_key = true;
@@ -4309,9 +4564,9 @@ Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4309
4564
  }
4310
4565
 
4311
4566
  // Check if there is a record for this key in the immutable memtables
4312
- sv->imm->GetFromHistory(lkey, nullptr, nullptr, &s, &merge_context,
4313
- &max_covering_tombstone_seq, seq, read_options,
4314
- is_blob_index);
4567
+ sv->imm->GetFromHistory(lkey, /*value=*/nullptr, timestamp, &s,
4568
+ &merge_context, &max_covering_tombstone_seq, seq,
4569
+ read_options, is_blob_index);
4315
4570
 
4316
4571
  if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
4317
4572
  // unexpected error reading memtable.
@@ -4323,8 +4578,13 @@ Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4323
4578
  return s;
4324
4579
  }
4325
4580
 
4581
+ assert(!ts_sz ||
4582
+ (*seq != kMaxSequenceNumber &&
4583
+ *timestamp != std::string(ts_sz, '\xff')) ||
4584
+ (*seq == kMaxSequenceNumber && timestamp->empty()));
4326
4585
  if (*seq != kMaxSequenceNumber) {
4327
4586
  // Found a sequence number, no need to check SST files
4587
+ assert(0 == ts_sz || *timestamp != std::string(ts_sz, '\xff'));
4328
4588
  *found_record_for_key = true;
4329
4589
  return Status::OK();
4330
4590
  }
@@ -4337,8 +4597,10 @@ Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key,
4337
4597
  // SST files if cache_only=true?
4338
4598
  if (!cache_only) {
4339
4599
  // Check tables
4340
- sv->current->Get(read_options, lkey, nullptr, nullptr, &s, &merge_context,
4341
- &max_covering_tombstone_seq, nullptr /* value_found */,
4600
+ PinnedIteratorsManager pinned_iters_mgr;
4601
+ sv->current->Get(read_options, lkey, /*value=*/nullptr, timestamp, &s,
4602
+ &merge_context, &max_covering_tombstone_seq,
4603
+ &pinned_iters_mgr, nullptr /* value_found */,
4342
4604
  found_record_for_key, seq, nullptr /*read_callback*/,
4343
4605
  is_blob_index);
4344
4606
 
@@ -4419,9 +4681,9 @@ Status DBImpl::IngestExternalFiles(
4419
4681
  std::vector<ExternalSstFileIngestionJob> ingestion_jobs;
4420
4682
  for (const auto& arg : args) {
4421
4683
  auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd();
4422
- ingestion_jobs.emplace_back(
4423
- env_, versions_.get(), cfd, immutable_db_options_, file_options_,
4424
- &snapshots_, arg.options, &directories_, &event_logger_, io_tracer_);
4684
+ ingestion_jobs.emplace_back(versions_.get(), cfd, immutable_db_options_,
4685
+ file_options_, &snapshots_, arg.options,
4686
+ &directories_, &event_logger_, io_tracer_);
4425
4687
  }
4426
4688
 
4427
4689
  // TODO(yanqin) maybe make jobs run in parallel
@@ -4433,7 +4695,8 @@ Status DBImpl::IngestExternalFiles(
4433
4695
  SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
4434
4696
  Status es = ingestion_jobs[i].Prepare(
4435
4697
  args[i].external_files, args[i].files_checksums,
4436
- args[i].files_checksum_func_names, start_file_number, super_version);
4698
+ args[i].files_checksum_func_names, args[i].file_temperature,
4699
+ start_file_number, super_version);
4437
4700
  // capture first error only
4438
4701
  if (!es.ok() && status.ok()) {
4439
4702
  status = es;
@@ -4448,7 +4711,8 @@ Status DBImpl::IngestExternalFiles(
4448
4711
  SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
4449
4712
  Status es = ingestion_jobs[0].Prepare(
4450
4713
  args[0].external_files, args[0].files_checksums,
4451
- args[0].files_checksum_func_names, next_file_number, super_version);
4714
+ args[0].files_checksum_func_names, args[0].file_temperature,
4715
+ next_file_number, super_version);
4452
4716
  if (!es.ok()) {
4453
4717
  status = es;
4454
4718
  }
@@ -4556,14 +4820,11 @@ Status DBImpl::IngestExternalFiles(
4556
4820
  if (status.ok()) {
4557
4821
  int consumed_seqno_count =
4558
4822
  ingestion_jobs[0].ConsumedSequenceNumbersCount();
4559
- #ifndef NDEBUG
4560
4823
  for (size_t i = 1; i != num_cfs; ++i) {
4561
- assert(!!consumed_seqno_count ==
4562
- !!ingestion_jobs[i].ConsumedSequenceNumbersCount());
4563
- consumed_seqno_count +=
4564
- ingestion_jobs[i].ConsumedSequenceNumbersCount();
4824
+ consumed_seqno_count =
4825
+ std::max(consumed_seqno_count,
4826
+ ingestion_jobs[i].ConsumedSequenceNumbersCount());
4565
4827
  }
4566
- #endif
4567
4828
  if (consumed_seqno_count > 0) {
4568
4829
  const SequenceNumber last_seqno = versions_->LastSequence();
4569
4830
  versions_->SetLastAllocatedSequence(last_seqno + consumed_seqno_count);
@@ -4688,9 +4949,9 @@ Status DBImpl::CreateColumnFamilyWithImport(
4688
4949
  // Import sst files from metadata.
4689
4950
  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(*handle);
4690
4951
  auto cfd = cfh->cfd();
4691
- ImportColumnFamilyJob import_job(env_, versions_.get(), cfd,
4692
- immutable_db_options_, file_options_,
4693
- import_options, metadata.files, io_tracer_);
4952
+ ImportColumnFamilyJob import_job(versions_.get(), cfd, immutable_db_options_,
4953
+ file_options_, import_options,
4954
+ metadata.files, io_tracer_);
4694
4955
 
4695
4956
  SuperVersionContext dummy_sv_ctx(/* create_superversion */ true);
4696
4957
  VersionEdit dummy_edit;
@@ -4805,6 +5066,11 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
4805
5066
 
4806
5067
  Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
4807
5068
  bool use_file_checksum) {
5069
+ // `bytes_read` stat is enabled based on compile-time support and cannot
5070
+ // be dynamically toggled. So we do not need to worry about `PerfLevel`
5071
+ // here, unlike many other `IOStatsContext` / `PerfContext` stats.
5072
+ uint64_t prev_bytes_read = IOSTATS(bytes_read);
5073
+
4808
5074
  Status s;
4809
5075
 
4810
5076
  if (use_file_checksum) {
@@ -4818,6 +5084,7 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
4818
5084
  }
4819
5085
  }
4820
5086
 
5087
+ // TODO: simplify using GetRefedColumnFamilySet?
4821
5088
  std::vector<ColumnFamilyData*> cfd_list;
4822
5089
  {
4823
5090
  InstrumentedMutexLock l(&mutex_);
@@ -4852,17 +5119,44 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
4852
5119
  std::string fname = TableFileName(cfd->ioptions()->cf_paths,
4853
5120
  fd.GetNumber(), fd.GetPathId());
4854
5121
  if (use_file_checksum) {
4855
- s = VerifySstFileChecksum(*fmeta, fname, read_options);
5122
+ s = VerifyFullFileChecksum(fmeta->file_checksum,
5123
+ fmeta->file_checksum_func_name, fname,
5124
+ read_options);
4856
5125
  } else {
4857
5126
  s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
4858
5127
  read_options, fname);
4859
5128
  }
5129
+ RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
5130
+ IOSTATS(bytes_read) - prev_bytes_read);
5131
+ prev_bytes_read = IOSTATS(bytes_read);
5132
+ }
5133
+ }
5134
+
5135
+ if (s.ok() && use_file_checksum) {
5136
+ const auto& blob_files = vstorage->GetBlobFiles();
5137
+ for (const auto& meta : blob_files) {
5138
+ assert(meta);
5139
+
5140
+ const uint64_t blob_file_number = meta->GetBlobFileNumber();
5141
+
5142
+ const std::string blob_file_name = BlobFileName(
5143
+ cfd->ioptions()->cf_paths.front().path, blob_file_number);
5144
+ s = VerifyFullFileChecksum(meta->GetChecksumValue(),
5145
+ meta->GetChecksumMethod(), blob_file_name,
5146
+ read_options);
5147
+ RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
5148
+ IOSTATS(bytes_read) - prev_bytes_read);
5149
+ prev_bytes_read = IOSTATS(bytes_read);
5150
+ if (!s.ok()) {
5151
+ break;
5152
+ }
4860
5153
  }
4861
5154
  }
4862
5155
  if (!s.ok()) {
4863
5156
  break;
4864
5157
  }
4865
5158
  }
5159
+
4866
5160
  bool defer_purge =
4867
5161
  immutable_db_options().avoid_unnecessary_blocking_io;
4868
5162
  {
@@ -4884,32 +5178,37 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
4884
5178
  cfd->UnrefAndTryDelete();
4885
5179
  }
4886
5180
  }
5181
+ RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
5182
+ IOSTATS(bytes_read) - prev_bytes_read);
4887
5183
  return s;
4888
5184
  }
4889
5185
 
4890
- Status DBImpl::VerifySstFileChecksum(const FileMetaData& fmeta,
4891
- const std::string& fname,
4892
- const ReadOptions& read_options) {
5186
+ Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected,
5187
+ const std::string& func_name_expected,
5188
+ const std::string& fname,
5189
+ const ReadOptions& read_options) {
4893
5190
  Status s;
4894
- if (fmeta.file_checksum == kUnknownFileChecksum) {
5191
+ if (file_checksum_expected == kUnknownFileChecksum) {
4895
5192
  return s;
4896
5193
  }
4897
5194
  std::string file_checksum;
4898
5195
  std::string func_name;
4899
5196
  s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum(
4900
5197
  fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(),
4901
- fmeta.file_checksum_func_name, &file_checksum, &func_name,
5198
+ func_name_expected, &file_checksum, &func_name,
4902
5199
  read_options.readahead_size, immutable_db_options_.allow_mmap_reads,
4903
- io_tracer_, immutable_db_options_.rate_limiter.get());
5200
+ io_tracer_, immutable_db_options_.rate_limiter.get(),
5201
+ read_options.rate_limiter_priority);
4904
5202
  if (s.ok()) {
4905
- assert(fmeta.file_checksum_func_name == func_name);
4906
- if (file_checksum != fmeta.file_checksum) {
5203
+ assert(func_name_expected == func_name);
5204
+ if (file_checksum != file_checksum_expected) {
4907
5205
  std::ostringstream oss;
4908
5206
  oss << fname << " file checksum mismatch, ";
4909
- oss << "expecting " << Slice(fmeta.file_checksum).ToString(/*hex=*/true);
5207
+ oss << "expecting "
5208
+ << Slice(file_checksum_expected).ToString(/*hex=*/true);
4910
5209
  oss << ", but actual " << Slice(file_checksum).ToString(/*hex=*/true);
4911
5210
  s = Status::Corruption(oss.str());
4912
- TEST_SYNC_POINT_CALLBACK("DBImpl::VerifySstFileChecksum:mismatch", &s);
5211
+ TEST_SYNC_POINT_CALLBACK("DBImpl::VerifyFullFileChecksum:mismatch", &s);
4913
5212
  }
4914
5213
  }
4915
5214
  return s;
@@ -4944,7 +5243,8 @@ void DBImpl::WaitForIngestFile() {
4944
5243
  Status DBImpl::StartTrace(const TraceOptions& trace_options,
4945
5244
  std::unique_ptr<TraceWriter>&& trace_writer) {
4946
5245
  InstrumentedMutexLock lock(&trace_mutex_);
4947
- tracer_.reset(new Tracer(env_, trace_options, std::move(trace_writer)));
5246
+ tracer_.reset(new Tracer(immutable_db_options_.clock, trace_options,
5247
+ std::move(trace_writer)));
4948
5248
  return Status::OK();
4949
5249
  }
4950
5250
 
@@ -4955,16 +5255,24 @@ Status DBImpl::EndTrace() {
4955
5255
  s = tracer_->Close();
4956
5256
  tracer_.reset();
4957
5257
  } else {
4958
- return Status::IOError("No trace file to close");
5258
+ s = Status::IOError("No trace file to close");
4959
5259
  }
4960
5260
  return s;
4961
5261
  }
4962
5262
 
5263
+ Status DBImpl::NewDefaultReplayer(
5264
+ const std::vector<ColumnFamilyHandle*>& handles,
5265
+ std::unique_ptr<TraceReader>&& reader,
5266
+ std::unique_ptr<Replayer>* replayer) {
5267
+ replayer->reset(new ReplayerImpl(this, handles, std::move(reader)));
5268
+ return Status::OK();
5269
+ }
5270
+
4963
5271
  Status DBImpl::StartBlockCacheTrace(
4964
5272
  const TraceOptions& trace_options,
4965
5273
  std::unique_ptr<TraceWriter>&& trace_writer) {
4966
- return block_cache_tracer_.StartTrace(env_, trace_options,
4967
- std::move(trace_writer));
5274
+ return block_cache_tracer_.StartTrace(immutable_db_options_.clock,
5275
+ trace_options, std::move(trace_writer));
4968
5276
  }
4969
5277
 
4970
5278
  Status DBImpl::EndBlockCacheTrace() {
@@ -4972,24 +5280,27 @@ Status DBImpl::EndBlockCacheTrace() {
4972
5280
  return Status::OK();
4973
5281
  }
4974
5282
 
4975
- Status DBImpl::TraceIteratorSeek(const uint32_t& cf_id, const Slice& key) {
5283
+ Status DBImpl::TraceIteratorSeek(const uint32_t& cf_id, const Slice& key,
5284
+ const Slice& lower_bound,
5285
+ const Slice upper_bound) {
4976
5286
  Status s;
4977
5287
  if (tracer_) {
4978
5288
  InstrumentedMutexLock lock(&trace_mutex_);
4979
5289
  if (tracer_) {
4980
- s = tracer_->IteratorSeek(cf_id, key);
5290
+ s = tracer_->IteratorSeek(cf_id, key, lower_bound, upper_bound);
4981
5291
  }
4982
5292
  }
4983
5293
  return s;
4984
5294
  }
4985
5295
 
4986
- Status DBImpl::TraceIteratorSeekForPrev(const uint32_t& cf_id,
4987
- const Slice& key) {
5296
+ Status DBImpl::TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
5297
+ const Slice& lower_bound,
5298
+ const Slice upper_bound) {
4988
5299
  Status s;
4989
5300
  if (tracer_) {
4990
5301
  InstrumentedMutexLock lock(&trace_mutex_);
4991
5302
  if (tracer_) {
4992
- s = tracer_->IteratorSeekForPrev(cf_id, key);
5303
+ s = tracer_->IteratorSeekForPrev(cf_id, key, lower_bound, upper_bound);
4993
5304
  }
4994
5305
  }
4995
5306
  return s;