@nxtedition/rocksdb 5.2.21 → 5.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (923) hide show
  1. package/binding.cc +510 -967
  2. package/binding.gyp +78 -72
  3. package/chained-batch.js +1 -2
  4. package/deps/rocksdb/build_version.cc +70 -4
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
  6. package/deps/rocksdb/rocksdb/Makefile +459 -469
  7. package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
  10. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
  11. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
  12. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
  13. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
  14. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
  15. package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
  16. package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
  20. package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
  22. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
  23. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
  24. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
  25. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
  26. package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
  27. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
  28. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
  29. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
  30. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  37. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
  38. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  39. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  40. package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
  41. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
  42. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
  43. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
  57. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
  59. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
  60. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
  61. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
  64. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
  65. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
  66. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
  67. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
  68. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
  69. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
  70. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
  71. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
  72. package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
  73. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
  74. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
  75. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
  76. package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
  77. package/deps/rocksdb/rocksdb/db/builder.h +16 -37
  78. package/deps/rocksdb/rocksdb/db/c.cc +413 -208
  79. package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
  80. package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
  81. package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
  82. package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
  83. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
  95. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
  96. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
  97. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
  98. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
  99. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
  100. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
  101. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
  102. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
  103. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
  104. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
  105. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
  106. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
  107. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
  108. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
  110. package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
  111. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
  112. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
  113. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
  114. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
  115. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
  116. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
  117. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
  118. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
  119. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
  120. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
  121. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
  122. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
  123. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
  124. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
  125. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
  126. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
  127. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
  128. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
  129. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
  130. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
  131. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
  132. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
  133. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
  134. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
  135. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
  136. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
  137. package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
  138. package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
  139. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
  140. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
  141. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
  142. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
  143. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
  144. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
  145. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
  146. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
  147. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
  148. package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
  149. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
  150. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
  151. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
  152. package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
  153. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
  154. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
  155. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
  156. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
  157. package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
  158. package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
  159. package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
  160. package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
  161. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
  162. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
  163. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
  164. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
  165. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
  166. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
  167. package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
  168. package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
  169. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
  170. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
  171. package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
  172. package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
  173. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
  174. package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
  175. package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
  176. package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
  177. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
  178. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
  179. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
  180. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
  181. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
  182. package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
  183. package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
  184. package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
  185. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
  186. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
  187. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
  188. package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
  189. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
  190. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
  191. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
  192. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
  193. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
  194. package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
  195. package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
  196. package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
  197. package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
  198. package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
  199. package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
  200. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
  201. package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
  202. package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
  203. package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
  204. package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
  205. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
  206. package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
  207. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
  208. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
  209. package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
  210. package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
  211. package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
  212. package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
  213. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
  214. package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
  215. package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
  216. package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
  217. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
  218. package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
  219. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
  220. package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
  221. package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
  222. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
  224. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
  225. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
  226. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
  227. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
  228. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
  229. package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
  230. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
  231. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
  232. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
  233. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
  234. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
  235. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
  236. package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
  237. package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
  238. package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
  239. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
  240. package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
  241. package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
  242. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
  243. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
  244. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
  245. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
  246. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
  247. package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
  248. package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
  249. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
  250. package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
  251. package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
  252. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
  253. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
  254. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
  255. package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
  256. package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
  257. package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
  258. package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
  260. package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
  261. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
  262. package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
  263. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
  264. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
  284. package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
  290. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
  291. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
  292. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
  293. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
  294. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
  295. package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
  296. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
  297. package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
  298. package/deps/rocksdb/rocksdb/env/env.cc +632 -42
  299. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
  300. package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
  301. package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
  302. package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
  303. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
  304. package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
  305. package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
  306. package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
  307. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
  308. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
  309. package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
  310. package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
  311. package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
  312. package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
  313. package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
  314. package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
  315. package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
  316. package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
  317. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
  318. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
  319. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
  320. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
  321. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
  322. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
  323. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
  324. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
  325. package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
  326. package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
  327. package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
  328. package/deps/rocksdb/rocksdb/file/filename.h +13 -8
  329. package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
  330. package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
  331. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
  332. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
  333. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
  334. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
  335. package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
  336. package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
  337. package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
  338. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
  339. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
  340. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
  341. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
  342. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
  343. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
  344. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
  345. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
  346. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
  347. package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
  349. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
  350. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
  351. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
  352. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
  353. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
  354. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
  355. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
  356. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
  358. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
  359. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
  360. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
  362. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
  363. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
  364. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
  365. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
  367. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
  368. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
  369. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
  370. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
  371. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
  372. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
  373. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
  374. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
  375. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
  376. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
  377. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
  378. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
  381. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
  382. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
  383. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
  385. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
  386. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
  388. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
  390. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
  391. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
  392. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
  393. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
  396. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
  397. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
  417. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
  418. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
  419. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
  420. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
  422. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
  423. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
  425. package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
  426. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
  427. package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
  428. package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
  429. package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
  430. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
  431. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
  432. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
  433. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
  434. package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
  435. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
  436. package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
  437. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
  438. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
  439. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
  440. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
  441. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
  442. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
  443. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
  444. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
  445. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
  446. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
  447. package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
  448. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
  449. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
  450. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
  451. package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
  452. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
  453. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
  454. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
  455. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
  456. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
  457. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
  458. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
  459. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
  460. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
  461. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
  462. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
  463. package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
  464. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
  465. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
  466. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
  467. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
  468. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
  469. package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
  470. package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
  471. package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
  472. package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
  473. package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
  474. package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
  475. package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
  476. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
  477. package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
  478. package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
  479. package/deps/rocksdb/rocksdb/options/options.cc +49 -17
  480. package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
  481. package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
  482. package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
  483. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
  484. package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
  485. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
  486. package/deps/rocksdb/rocksdb/port/lang.h +52 -0
  487. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  488. package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
  489. package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
  490. package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
  491. package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
  492. package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
  493. package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
  494. package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
  495. package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
  496. package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
  497. package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
  498. package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
  499. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
  500. package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
  501. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
  502. package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
  503. package/deps/rocksdb/rocksdb/python.mk +9 -0
  504. package/deps/rocksdb/rocksdb/src.mk +82 -34
  505. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
  506. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
  507. package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
  508. package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
  509. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
  510. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
  511. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
  512. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
  513. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
  514. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
  515. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
  516. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
  517. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
  518. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
  519. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
  520. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
  521. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
  522. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
  523. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
  524. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
  525. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
  526. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
  527. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
  528. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
  529. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
  530. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
  531. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
  532. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
  533. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
  534. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
  535. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
  536. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
  537. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
  538. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
  539. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
  540. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  541. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
  542. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
  543. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
  544. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
  545. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
  547. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
  548. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
  549. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
  550. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
  551. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
  552. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
  553. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
  554. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
  555. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
  556. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
  557. package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
  558. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
  559. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
  560. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
  561. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
  562. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
  563. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
  564. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
  565. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
  566. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
  567. package/deps/rocksdb/rocksdb/table/format.cc +258 -104
  568. package/deps/rocksdb/rocksdb/table/format.h +120 -109
  569. package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
  570. package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
  571. package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
  572. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  573. package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
  574. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
  575. package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
  576. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
  577. package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
  578. package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
  579. package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
  580. package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
  581. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
  582. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
  583. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
  584. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
  585. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
  586. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
  587. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
  588. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
  589. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
  590. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
  591. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
  592. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
  593. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
  594. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
  595. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
  596. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
  597. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
  598. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
  599. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
  600. package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
  601. package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
  602. package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
  603. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
  604. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
  605. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
  606. package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
  607. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
  608. package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
  609. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
  610. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
  611. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
  612. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
  613. package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
  614. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
  615. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
  616. package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
  617. package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
  618. package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
  619. package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
  620. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
  621. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
  622. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
  623. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
  624. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
  625. package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
  626. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
  627. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
  628. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
  629. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
  630. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
  631. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
  632. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
  633. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
  634. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
  635. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
  636. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
  637. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
  638. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
  639. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
  640. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
  641. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
  642. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
  643. package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
  644. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
  645. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
  646. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
  647. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
  648. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
  649. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
  650. package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
  651. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
  652. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
  653. package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
  654. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
  655. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
  656. package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
  657. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
  658. package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
  659. package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
  660. package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
  661. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
  662. package/deps/rocksdb/rocksdb/util/channel.h +2 -0
  663. package/deps/rocksdb/rocksdb/util/coding.h +1 -33
  664. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
  665. package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
  666. package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
  667. package/deps/rocksdb/rocksdb/util/compression.h +212 -7
  668. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
  669. package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
  670. package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
  671. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
  672. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
  673. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
  674. package/deps/rocksdb/rocksdb/util/defer.h +30 -1
  675. package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
  676. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
  677. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
  678. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
  679. package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
  680. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
  681. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
  682. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
  683. package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
  684. package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
  685. package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
  686. package/deps/rocksdb/rocksdb/util/hash.h +31 -1
  687. package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
  688. package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
  689. package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
  690. package/deps/rocksdb/rocksdb/util/heap.h +6 -1
  691. package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
  692. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
  693. package/deps/rocksdb/rocksdb/util/math.h +74 -7
  694. package/deps/rocksdb/rocksdb/util/math128.h +13 -1
  695. package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
  696. package/deps/rocksdb/rocksdb/util/random.cc +9 -0
  697. package/deps/rocksdb/rocksdb/util/random.h +6 -0
  698. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
  699. package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
  700. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
  701. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
  702. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
  703. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
  704. package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
  705. package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
  706. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
  707. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
  708. package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
  709. package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
  710. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
  711. package/deps/rocksdb/rocksdb/util/status.cc +32 -29
  712. package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
  713. package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
  714. package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
  715. package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
  716. package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
  717. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
  718. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
  719. package/deps/rocksdb/rocksdb/util/timer.h +55 -46
  720. package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
  721. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
  722. package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
  723. package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
  724. package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
  725. package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
  726. package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
  727. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
  728. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
  729. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
  730. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
  731. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
  732. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
  733. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
  734. package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
  735. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
  736. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
  737. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
  738. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
  739. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
  740. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
  741. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
  742. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
  743. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
  744. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
  745. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
  746. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
  747. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
  748. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
  749. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
  750. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
  751. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
  752. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
  753. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
  754. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
  755. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
  756. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
  757. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
  758. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
  759. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
  760. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
  761. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
  762. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
  763. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
  764. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
  765. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
  766. package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
  767. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
  768. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
  769. package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
  770. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
  771. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
  772. package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
  773. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
  774. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
  775. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
  776. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
  777. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
  778. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
  779. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
  780. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
  781. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
  782. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
  783. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
  784. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
  785. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
  786. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
  787. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
  788. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
  789. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
  790. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
  791. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
  792. package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
  793. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
  794. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
  795. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
  796. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
  797. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
  798. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
  799. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
  800. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
  801. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
  802. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
  803. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
  804. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
  805. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
  806. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
  807. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  808. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
  809. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
  810. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
  811. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
  812. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
  813. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
  814. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
  815. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
  816. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
  817. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
  818. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
  819. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
  820. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
  821. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  822. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
  823. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
  824. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
  825. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
  826. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  827. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
  828. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
  829. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
  830. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
  831. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
  832. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
  833. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
  834. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
  835. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
  836. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
  837. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
  838. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
  839. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
  840. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
  841. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
  842. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
  843. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
  844. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
  845. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
  846. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
  847. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
  848. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
  849. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
  850. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
  851. package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
  852. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
  853. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
  854. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
  855. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
  856. package/deps/rocksdb/rocksdb.gyp +425 -446
  857. package/index.js +5 -87
  858. package/package-lock.json +23687 -0
  859. package/package.json +8 -9
  860. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  861. package/prebuilds/darwin-x64/node.napi.node +0 -0
  862. package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
  863. package/deps/rocksdb/rocksdb/README.md +0 -32
  864. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
  865. package/deps/rocksdb/rocksdb/hdfs/README +0 -23
  866. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
  867. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
  868. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
  869. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
  870. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
  871. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
  872. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
  873. package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
  874. package/deps/rocksdb/rocksdb/port/README +0 -10
  875. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
  876. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
  877. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
  878. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
  879. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
  880. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
  881. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
  882. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
  883. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
  884. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
  885. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
  886. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
  887. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
  888. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
  889. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
  890. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
  891. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
  892. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
  893. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
  894. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
  895. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
  896. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
  897. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
  898. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
  899. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
  900. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
  901. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
  902. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
  903. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
  904. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
  905. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
  906. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
  907. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
  908. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
  909. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
  910. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
  911. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
  912. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
  913. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
  914. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
  915. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
  916. package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
  917. package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
  918. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
  919. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
  920. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
  921. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  922. package/deps/snappy/snappy-1.1.7/README.md +0 -149
  923. package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
@@ -7,12 +7,14 @@
7
7
  // Use of this source code is governed by a BSD-style license that can be
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
  #include <cinttypes>
10
+ #include <deque>
10
11
 
11
12
  #include "db/builder.h"
12
13
  #include "db/db_impl/db_impl.h"
13
14
  #include "db/error_handler.h"
14
15
  #include "db/event_helpers.h"
15
16
  #include "file/sst_file_manager_impl.h"
17
+ #include "logging/logging.h"
16
18
  #include "monitoring/iostats_context_imp.h"
17
19
  #include "monitoring/perf_context_imp.h"
18
20
  #include "monitoring/thread_status_updater.h"
@@ -101,6 +103,8 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
101
103
  if (!logs_to_sync.empty()) {
102
104
  mutex_.Unlock();
103
105
 
106
+ assert(job_context);
107
+
104
108
  for (log::Writer* log : logs_to_sync) {
105
109
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
106
110
  "[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
@@ -118,9 +122,13 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
118
122
  }
119
123
  }
120
124
  if (io_s.ok()) {
121
- io_s = directories_.GetWalDir()->Fsync(IOOptions(), nullptr);
125
+ io_s = directories_.GetWalDir()->FsyncWithDirOptions(
126
+ IOOptions(), nullptr,
127
+ DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
122
128
  }
123
129
 
130
+ TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock",
131
+ /*arg=*/nullptr);
124
132
  mutex_.Lock();
125
133
 
126
134
  // "number <= current_log_number - 1" is equivalent to
@@ -131,16 +139,11 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
131
139
  MarkLogsNotSynced(current_log_number - 1);
132
140
  }
133
141
  if (!io_s.ok()) {
134
- if (total_log_size_ > 0) {
135
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
136
- } else {
137
- // If the WAL is empty, we use different error reason
138
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
139
- }
140
142
  TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
141
143
  return io_s;
142
144
  }
143
145
  }
146
+ TEST_SYNC_POINT("DBImpl::SyncClosedLogs:end");
144
147
  return io_s;
145
148
  }
146
149
 
@@ -154,50 +157,97 @@ Status DBImpl::FlushMemTableToOutputFile(
154
157
  Env::Priority thread_pri) {
155
158
  mutex_.AssertHeld();
156
159
  assert(cfd);
160
+ assert(cfd->imm());
157
161
  assert(cfd->imm()->NumNotFlushed() != 0);
158
162
  assert(cfd->imm()->IsFlushPending());
163
+ assert(versions_);
164
+ assert(versions_->GetColumnFamilySet());
165
+ // If there are more than one column families, we need to make sure that
166
+ // all the log files except the most recent one are synced. Otherwise if
167
+ // the host crashes after flushing and before WAL is persistent, the
168
+ // flushed SST may contain data from write batches whose updates to
169
+ // other (unflushed) column families are missing.
170
+ const bool needs_to_sync_closed_wals =
171
+ logfile_number_ > 0 &&
172
+ versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1;
173
+
174
+ // If needs_to_sync_closed_wals is true, we need to record the current
175
+ // maximum memtable ID of this column family so that a later PickMemtables()
176
+ // call will not pick memtables whose IDs are higher. This is due to the fact
177
+ // that SyncClosedLogs() may release the db mutex, and memtable switch can
178
+ // happen for this column family in the meantime. The newly created memtables
179
+ // have their data backed by unsynced WALs, thus they cannot be included in
180
+ // this flush job.
181
+ // Another reason why we must record the current maximum memtable ID of this
182
+ // column family: SyncClosedLogs() may release db mutex, thus it's possible
183
+ // for application to continue to insert into memtables increasing db's
184
+ // sequence number. The application may take a snapshot, but this snapshot is
185
+ // not included in `snapshot_seqs` which will be passed to flush job because
186
+ // `snapshot_seqs` has already been computed before this function starts.
187
+ // Recording the max memtable ID ensures that the flush job does not flush
188
+ // a memtable without knowing such snapshot(s).
189
+ uint64_t max_memtable_id = needs_to_sync_closed_wals
190
+ ? cfd->imm()->GetLatestMemTableID()
191
+ : port::kMaxUint64;
192
+
193
+ // If needs_to_sync_closed_wals is false, then the flush job will pick ALL
194
+ // existing memtables of the column family when PickMemTable() is called
195
+ // later. Although we won't call SyncClosedLogs() in this case, we may still
196
+ // call the callbacks of the listeners, i.e. NotifyOnFlushBegin() which also
197
+ // releases and re-acquires the db mutex. In the meantime, the application
198
+ // can still insert into the memtables and increase the db's sequence number.
199
+ // The application can take a snapshot, hoping that the latest visible state
200
+ // to this snapshto is preserved. This is hard to guarantee since db mutex
201
+ // not held. This newly-created snapshot is not included in `snapshot_seqs`
202
+ // and the flush job is unaware of its presence. Consequently, the flush job
203
+ // may drop certain keys when generating the L0, causing incorrect data to be
204
+ // returned for snapshot read using this snapshot.
205
+ // To address this, we make sure NotifyOnFlushBegin() executes after memtable
206
+ // picking so that no new snapshot can be taken between the two functions.
159
207
 
160
208
  FlushJob flush_job(
161
- dbname_, cfd, immutable_db_options_, mutable_cf_options,
162
- port::kMaxUint64 /* memtable_id */, file_options_for_compaction_,
163
- versions_.get(), &mutex_, &shutting_down_, snapshot_seqs,
164
- earliest_write_conflict_snapshot, snapshot_checker, job_context,
165
- log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U),
209
+ dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id,
210
+ file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_,
211
+ snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
212
+ job_context, log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U),
166
213
  GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
167
214
  &event_logger_, mutable_cf_options.report_bg_io_stats,
168
215
  true /* sync_output_directory */, true /* write_manifest */, thread_pri,
169
- io_tracer_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow());
216
+ io_tracer_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow(),
217
+ &blob_callback_);
170
218
  FileMetaData file_meta;
171
219
 
172
- TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
173
- flush_job.PickMemTable();
174
- TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables");
175
-
176
- #ifndef ROCKSDB_LITE
177
- // may temporarily unlock and lock the mutex.
178
- NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id);
179
- #endif // ROCKSDB_LITE
180
-
181
220
  Status s;
182
- IOStatus io_s = IOStatus::OK();
183
- if (logfile_number_ > 0 &&
184
- versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1) {
185
- // If there are more than one column families, we need to make sure that
186
- // all the log files except the most recent one are synced. Otherwise if
187
- // the host crashes after flushing and before WAL is persistent, the
188
- // flushed SST may contain data from write batches whose updates to
189
- // other column families are missing.
221
+ bool need_cancel = false;
222
+ IOStatus log_io_s = IOStatus::OK();
223
+ if (needs_to_sync_closed_wals) {
190
224
  // SyncClosedLogs() may unlock and re-lock the db_mutex.
191
- io_s = SyncClosedLogs(job_context);
192
- s = io_s;
193
- if (!io_s.ok() && !io_s.IsShutdownInProgress() &&
194
- !io_s.IsColumnFamilyDropped()) {
195
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
225
+ log_io_s = SyncClosedLogs(job_context);
226
+ if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
227
+ !log_io_s.IsColumnFamilyDropped()) {
228
+ error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
196
229
  }
197
230
  } else {
198
231
  TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip");
199
232
  }
233
+ s = log_io_s;
234
+
235
+ // If the log sync failed, we do not need to pick memtable. Otherwise,
236
+ // num_flush_not_started_ needs to be rollback.
237
+ TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
238
+ if (s.ok()) {
239
+ flush_job.PickMemTable();
240
+ need_cancel = true;
241
+ }
242
+ TEST_SYNC_POINT_CALLBACK(
243
+ "DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", &flush_job);
244
+
245
+ #ifndef ROCKSDB_LITE
246
+ // may temporarily unlock and lock the mutex.
247
+ NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id);
248
+ #endif // ROCKSDB_LITE
200
249
 
250
+ bool switched_to_mempurge = false;
201
251
  // Within flush_job.Run, rocksdb may call event listener to notify
202
252
  // file creation and deletion.
203
253
  //
@@ -205,12 +255,13 @@ Status DBImpl::FlushMemTableToOutputFile(
205
255
  // and EventListener callback will be called when the db_mutex
206
256
  // is unlocked by the current thread.
207
257
  if (s.ok()) {
208
- s = flush_job.Run(&logs_with_prep_tracker_, &file_meta);
209
- } else {
210
- flush_job.Cancel();
258
+ s = flush_job.Run(&logs_with_prep_tracker_, &file_meta,
259
+ &switched_to_mempurge);
260
+ need_cancel = false;
211
261
  }
212
- if (io_s.ok()) {
213
- io_s = flush_job.io_status();
262
+
263
+ if (!s.ok() && need_cancel) {
264
+ flush_job.Cancel();
214
265
  }
215
266
 
216
267
  if (s.ok()) {
@@ -235,47 +286,46 @@ Status DBImpl::FlushMemTableToOutputFile(
235
286
 
236
287
  const auto& blob_files = storage_info->GetBlobFiles();
237
288
  if (!blob_files.empty()) {
238
- ROCKS_LOG_BUFFER(log_buffer,
239
- "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64
240
- "\n",
241
- column_family_name.c_str(), blob_files.begin()->first,
242
- blob_files.rbegin()->first);
289
+ assert(blob_files.front());
290
+ assert(blob_files.back());
291
+
292
+ ROCKS_LOG_BUFFER(
293
+ log_buffer,
294
+ "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
295
+ column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
296
+ blob_files.back()->GetBlobFileNumber());
243
297
  }
244
298
  }
245
299
 
246
300
  if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) {
247
- if (!io_s.ok() && !io_s.IsShutdownInProgress() &&
248
- !io_s.IsColumnFamilyDropped()) {
301
+ if (log_io_s.ok()) {
249
302
  // Error while writing to MANIFEST.
250
303
  // In fact, versions_->io_status() can also be the result of renaming
251
304
  // CURRENT file. With current code, it's just difficult to tell. So just
252
305
  // be pessimistic and try write to a new MANIFEST.
253
306
  // TODO: distinguish between MANIFEST write and CURRENT renaming
254
307
  if (!versions_->io_status().ok()) {
255
- if (total_log_size_ > 0) {
256
- // If the WAL is empty, we use different error reason
257
- error_handler_.SetBGError(io_s,
258
- BackgroundErrorReason::kManifestWrite);
259
- } else {
260
- error_handler_.SetBGError(io_s,
261
- BackgroundErrorReason::kManifestWriteNoWAL);
262
- }
263
- } else if (total_log_size_ > 0) {
264
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
308
+ // If WAL sync is successful (either WAL size is 0 or there is no IO
309
+ // error), all the Manifest write will be map to soft error.
310
+ // TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor is
311
+ // needed.
312
+ error_handler_.SetBGError(s,
313
+ BackgroundErrorReason::kManifestWriteNoWAL);
265
314
  } else {
266
- // If the WAL is empty, we use different error reason
267
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
315
+ // If WAL sync is successful (either WAL size is 0 or there is no IO
316
+ // error), all the other SST file write errors will be set as
317
+ // kFlushNoWAL.
318
+ error_handler_.SetBGError(s, BackgroundErrorReason::kFlushNoWAL);
268
319
  }
269
320
  } else {
321
+ assert(s == log_io_s);
270
322
  Status new_bg_error = s;
271
323
  error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
272
324
  }
273
- } else {
274
- // If we got here, then we decided not to care about the i_os status (either
275
- // from never needing it or ignoring the flush job status
276
- io_s.PermitUncheckedError();
277
325
  }
278
- if (s.ok()) {
326
+ // If flush ran smoothly and no mempurge happened
327
+ // install new SST file path.
328
+ if (s.ok() && (!switched_to_mempurge)) {
279
329
  #ifndef ROCKSDB_LITE
280
330
  // may temporarily unlock and lock the mutex.
281
331
  NotifyOnFlushCompleted(cfd, mutable_cf_options,
@@ -320,13 +370,14 @@ Status DBImpl::FlushMemTablesToOutputFiles(
320
370
  &earliest_write_conflict_snapshot, &snapshot_checker);
321
371
  const auto& bg_flush_arg = bg_flush_args[0];
322
372
  ColumnFamilyData* cfd = bg_flush_arg.cfd_;
323
- MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
373
+ // intentional infrequent copy for each flush
374
+ MutableCFOptions mutable_cf_options_copy = *cfd->GetLatestMutableCFOptions();
324
375
  SuperVersionContext* superversion_context =
325
376
  bg_flush_arg.superversion_context_;
326
377
  Status s = FlushMemTableToOutputFile(
327
- cfd, mutable_cf_options, made_progress, job_context, superversion_context,
328
- snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
329
- log_buffer, thread_pri);
378
+ cfd, mutable_cf_options_copy, made_progress, job_context,
379
+ superversion_context, snapshot_seqs, earliest_write_conflict_snapshot,
380
+ snapshot_checker, log_buffer, thread_pri);
330
381
  return s;
331
382
  }
332
383
 
@@ -353,6 +404,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
353
404
  for (const auto cfd : cfds) {
354
405
  assert(cfd->imm()->NumNotFlushed() != 0);
355
406
  assert(cfd->imm()->IsFlushPending());
407
+ assert(cfd->GetFlushReason() == cfds[0]->GetFlushReason());
356
408
  }
357
409
  #endif /* !NDEBUG */
358
410
 
@@ -400,13 +452,15 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
400
452
  stats_, &event_logger_, mutable_cf_options.report_bg_io_stats,
401
453
  false /* sync_output_directory */, false /* write_manifest */,
402
454
  thread_pri, io_tracer_, db_id_, db_session_id_,
403
- cfd->GetFullHistoryTsLow()));
404
- jobs.back()->PickMemTable();
455
+ cfd->GetFullHistoryTsLow(), &blob_callback_));
405
456
  }
406
457
 
407
458
  std::vector<FileMetaData> file_meta(num_cfs);
459
+ // Use of deque<bool> because vector<bool>
460
+ // is specific and doesn't allow &v[i].
461
+ std::deque<bool> switched_to_mempurge(num_cfs, false);
408
462
  Status s;
409
- IOStatus io_s;
463
+ IOStatus log_io_s = IOStatus::OK();
410
464
  assert(num_cfs == static_cast<int>(jobs.size()));
411
465
 
412
466
  #ifndef ROCKSDB_LITE
@@ -421,27 +475,45 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
421
475
  if (logfile_number_ > 0) {
422
476
  // TODO (yanqin) investigate whether we should sync the closed logs for
423
477
  // single column family case.
424
- io_s = SyncClosedLogs(job_context);
425
- s = io_s;
478
+ log_io_s = SyncClosedLogs(job_context);
479
+ if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
480
+ !log_io_s.IsColumnFamilyDropped()) {
481
+ if (total_log_size_ > 0) {
482
+ error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
483
+ } else {
484
+ // If the WAL is empty, we use different error reason
485
+ error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlushNoWAL);
486
+ }
487
+ }
426
488
  }
489
+ s = log_io_s;
427
490
 
428
491
  // exec_status stores the execution status of flush_jobs as
429
492
  // <bool /* executed */, Status /* status code */>
430
493
  autovector<std::pair<bool, Status>> exec_status;
431
- autovector<IOStatus> io_status;
494
+ std::vector<bool> pick_status;
432
495
  for (int i = 0; i != num_cfs; ++i) {
433
496
  // Initially all jobs are not executed, with status OK.
434
497
  exec_status.emplace_back(false, Status::OK());
435
- io_status.emplace_back(IOStatus::OK());
498
+ pick_status.push_back(false);
499
+ }
500
+
501
+ if (s.ok()) {
502
+ for (int i = 0; i != num_cfs; ++i) {
503
+ jobs[i]->PickMemTable();
504
+ pick_status[i] = true;
505
+ }
436
506
  }
437
507
 
438
508
  if (s.ok()) {
509
+ assert(switched_to_mempurge.size() ==
510
+ static_cast<long unsigned int>(num_cfs));
439
511
  // TODO (yanqin): parallelize jobs with threads.
440
512
  for (int i = 1; i != num_cfs; ++i) {
441
513
  exec_status[i].second =
442
- jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i]);
514
+ jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i],
515
+ &(switched_to_mempurge.at(i)));
443
516
  exec_status[i].first = true;
444
- io_status[i] = jobs[i]->io_status();
445
517
  }
446
518
  if (num_cfs > 1) {
447
519
  TEST_SYNC_POINT(
@@ -451,10 +523,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
451
523
  }
452
524
  assert(exec_status.size() > 0);
453
525
  assert(!file_meta.empty());
454
- exec_status[0].second =
455
- jobs[0]->Run(&logs_with_prep_tracker_, &file_meta[0]);
526
+ exec_status[0].second = jobs[0]->Run(
527
+ &logs_with_prep_tracker_, file_meta.data() /* &file_meta[0] */,
528
+ switched_to_mempurge.empty() ? nullptr : &(switched_to_mempurge.at(0)));
456
529
  exec_status[0].first = true;
457
- io_status[0] = jobs[0]->io_status();
458
530
 
459
531
  Status error_status;
460
532
  for (const auto& e : exec_status) {
@@ -473,20 +545,6 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
473
545
  s = error_status.ok() ? s : error_status;
474
546
  }
475
547
 
476
- if (io_s.ok()) {
477
- IOStatus io_error = IOStatus::OK();
478
- for (int i = 0; i != static_cast<int>(io_status.size()); i++) {
479
- if (!io_status[i].ok() && !io_status[i].IsShutdownInProgress() &&
480
- !io_status[i].IsColumnFamilyDropped()) {
481
- io_error = io_status[i];
482
- }
483
- }
484
- io_s = io_error;
485
- if (s.ok() && !io_s.ok()) {
486
- s = io_s;
487
- }
488
- }
489
-
490
548
  if (s.IsColumnFamilyDropped()) {
491
549
  s = Status::OK();
492
550
  }
@@ -495,7 +553,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
495
553
  // Sync on all distinct output directories.
496
554
  for (auto dir : distinct_output_dirs) {
497
555
  if (dir != nullptr) {
498
- Status error_status = dir->Fsync(IOOptions(), nullptr);
556
+ Status error_status = dir->FsyncWithDirOptions(
557
+ IOOptions(), nullptr,
558
+ DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
499
559
  if (!error_status.ok()) {
500
560
  s = error_status;
501
561
  break;
@@ -508,12 +568,12 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
508
568
  // Have to cancel the flush jobs that have NOT executed because we need to
509
569
  // unref the versions.
510
570
  for (int i = 0; i != num_cfs; ++i) {
511
- if (!exec_status[i].first) {
571
+ if (pick_status[i] && !exec_status[i].first) {
512
572
  jobs[i]->Cancel();
513
573
  }
514
574
  }
515
575
  for (int i = 0; i != num_cfs; ++i) {
516
- if (exec_status[i].first && exec_status[i].second.ok()) {
576
+ if (exec_status[i].second.ok() && exec_status[i].first) {
517
577
  auto& mems = jobs[i]->GetMemTables();
518
578
  cfds[i]->imm()->RollbackMemtableFlush(mems,
519
579
  file_meta[i].fd.GetNumber());
@@ -522,7 +582,15 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
522
582
  }
523
583
 
524
584
  if (s.ok()) {
525
- auto wait_to_install_func = [&]() {
585
+ const auto wait_to_install_func =
586
+ [&]() -> std::pair<Status, bool /*continue to wait*/> {
587
+ if (!versions_->io_status().ok()) {
588
+ // Something went wrong elsewhere, we cannot count on waiting for our
589
+ // turn to write/sync to MANIFEST or CURRENT. Just return.
590
+ return std::make_pair(versions_->io_status(), false);
591
+ } else if (shutting_down_.load(std::memory_order_acquire)) {
592
+ return std::make_pair(Status::ShutdownInProgress(), false);
593
+ }
526
594
  bool ready = true;
527
595
  for (size_t i = 0; i != cfds.size(); ++i) {
528
596
  const auto& mems = jobs[i]->GetMemTables();
@@ -546,18 +614,46 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
546
614
  break;
547
615
  }
548
616
  }
549
- return ready;
617
+ return std::make_pair(Status::OK(), !ready);
550
618
  };
551
619
 
552
- bool resuming_from_bg_err = error_handler_.IsDBStopped();
553
- while ((!error_handler_.IsDBStopped() ||
554
- error_handler_.GetRecoveryError().ok()) &&
555
- !wait_to_install_func()) {
620
+ bool resuming_from_bg_err =
621
+ error_handler_.IsDBStopped() ||
622
+ (cfds[0]->GetFlushReason() == FlushReason::kErrorRecovery ||
623
+ cfds[0]->GetFlushReason() == FlushReason::kErrorRecoveryRetryFlush);
624
+ while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) {
625
+ std::pair<Status, bool> res = wait_to_install_func();
626
+
627
+ TEST_SYNC_POINT_CALLBACK(
628
+ "DBImpl::AtomicFlushMemTablesToOutputFiles:WaitToCommit", &res);
629
+
630
+ if (!res.first.ok()) {
631
+ s = res.first;
632
+ break;
633
+ } else if (!res.second) {
634
+ break;
635
+ }
556
636
  atomic_flush_install_cv_.Wait();
637
+
638
+ resuming_from_bg_err =
639
+ error_handler_.IsDBStopped() ||
640
+ (cfds[0]->GetFlushReason() == FlushReason::kErrorRecovery ||
641
+ cfds[0]->GetFlushReason() == FlushReason::kErrorRecoveryRetryFlush);
557
642
  }
558
643
 
559
- s = resuming_from_bg_err ? error_handler_.GetRecoveryError()
560
- : error_handler_.GetBGError();
644
+ if (!resuming_from_bg_err) {
645
+ // If not resuming from bg err, then we determine future action based on
646
+ // whether we hit background error.
647
+ if (s.ok()) {
648
+ s = error_handler_.GetBGError();
649
+ }
650
+ } else if (s.ok()) {
651
+ // If resuming from bg err, we still rely on wait_to_install_func()'s
652
+ // result to determine future action. If wait_to_install_func() returns
653
+ // non-ok already, then we should not proceed to flush result
654
+ // installation.
655
+ s = error_handler_.GetRecoveryError();
656
+ }
561
657
  }
562
658
 
563
659
  if (s.ok()) {
@@ -565,6 +661,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
565
661
  autovector<const autovector<MemTable*>*> mems_list;
566
662
  autovector<const MutableCFOptions*> mutable_cf_options_list;
567
663
  autovector<FileMetaData*> tmp_file_meta;
664
+ autovector<std::list<std::unique_ptr<FlushJobInfo>>*>
665
+ committed_flush_jobs_info;
568
666
  for (int i = 0; i != num_cfs; ++i) {
569
667
  const auto& mems = jobs[i]->GetMemTables();
570
668
  if (!cfds[i]->IsDropped() && !mems.empty()) {
@@ -572,13 +670,18 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
572
670
  mems_list.emplace_back(&mems);
573
671
  mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]);
574
672
  tmp_file_meta.emplace_back(&file_meta[i]);
673
+ #ifndef ROCKSDB_LITE
674
+ committed_flush_jobs_info.emplace_back(
675
+ jobs[i]->GetCommittedFlushJobsInfo());
676
+ #endif //! ROCKSDB_LITE
575
677
  }
576
678
  }
577
679
 
578
680
  s = InstallMemtableAtomicFlushResults(
579
681
  nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list,
580
682
  versions_.get(), &logs_with_prep_tracker_, &mutex_, tmp_file_meta,
581
- &job_context->memtables_to_free, directories_.GetDbDir(), log_buffer);
683
+ committed_flush_jobs_info, &job_context->memtables_to_free,
684
+ directories_.GetDbDir(), log_buffer);
582
685
  }
583
686
 
584
687
  if (s.ok()) {
@@ -609,11 +712,14 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
609
712
 
610
713
  const auto& blob_files = storage_info->GetBlobFiles();
611
714
  if (!blob_files.empty()) {
612
- ROCKS_LOG_BUFFER(log_buffer,
613
- "[%s] Blob file summary: head=%" PRIu64
614
- ", tail=%" PRIu64 "\n",
615
- column_family_name.c_str(), blob_files.begin()->first,
616
- blob_files.rbegin()->first);
715
+ assert(blob_files.front());
716
+ assert(blob_files.back());
717
+
718
+ ROCKS_LOG_BUFFER(
719
+ log_buffer,
720
+ "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
721
+ column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
722
+ blob_files.back()->GetBlobFileNumber());
617
723
  }
618
724
  }
619
725
  if (made_progress) {
@@ -624,6 +730,11 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
624
730
  immutable_db_options_.sst_file_manager.get());
625
731
  assert(all_mutable_cf_options.size() == static_cast<size_t>(num_cfs));
626
732
  for (int i = 0; s.ok() && i != num_cfs; ++i) {
733
+ // If mempurge happened instead of Flush,
734
+ // no NotifyOnFlushCompleted call (no SST file created).
735
+ if (switched_to_mempurge[i]) {
736
+ continue;
737
+ }
627
738
  if (cfds[i]->IsDropped()) {
628
739
  continue;
629
740
  }
@@ -651,28 +762,27 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
651
762
  // Need to undo atomic flush if something went wrong, i.e. s is not OK and
652
763
  // it is not because of CF drop.
653
764
  if (!s.ok() && !s.IsColumnFamilyDropped()) {
654
- if (!io_s.ok() && !io_s.IsColumnFamilyDropped()) {
765
+ if (log_io_s.ok()) {
655
766
  // Error while writing to MANIFEST.
656
767
  // In fact, versions_->io_status() can also be the result of renaming
657
768
  // CURRENT file. With current code, it's just difficult to tell. So just
658
769
  // be pessimistic and try write to a new MANIFEST.
659
770
  // TODO: distinguish between MANIFEST write and CURRENT renaming
660
771
  if (!versions_->io_status().ok()) {
661
- if (total_log_size_ > 0) {
662
- // If the WAL is empty, we use different error reason
663
- error_handler_.SetBGError(io_s,
664
- BackgroundErrorReason::kManifestWrite);
665
- } else {
666
- error_handler_.SetBGError(io_s,
667
- BackgroundErrorReason::kManifestWriteNoWAL);
668
- }
669
- } else if (total_log_size_ > 0) {
670
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
772
+ // If WAL sync is successful (either WAL size is 0 or there is no IO
773
+ // error), all the Manifest write will be map to soft error.
774
+ // TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor
775
+ // is needed.
776
+ error_handler_.SetBGError(s,
777
+ BackgroundErrorReason::kManifestWriteNoWAL);
671
778
  } else {
672
- // If the WAL is empty, we use different error reason
673
- error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
779
+ // If WAL sync is successful (either WAL size is 0 or there is no IO
780
+ // error), all the other SST file write errors will be set as
781
+ // kFlushNoWAL.
782
+ error_handler_.SetBGError(s, BackgroundErrorReason::kFlushNoWAL);
674
783
  }
675
784
  } else {
785
+ assert(s == log_io_s);
676
786
  Status new_bg_error = s;
677
787
  error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
678
788
  }
@@ -759,6 +869,8 @@ void DBImpl::NotifyOnFlushCompleted(
759
869
  for (auto listener : immutable_db_options_.listeners) {
760
870
  listener->OnFlushCompleted(this, *info);
761
871
  }
872
+ TEST_SYNC_POINT(
873
+ "DBImpl::NotifyOnFlushCompleted::PostAllOnFlushCompleted");
762
874
  }
763
875
  flush_jobs_info->clear();
764
876
  }
@@ -776,12 +888,20 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
776
888
  ColumnFamilyHandle* column_family,
777
889
  const Slice* begin_without_ts,
778
890
  const Slice* end_without_ts) {
891
+ if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
892
+ return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
893
+ }
894
+
895
+ if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
896
+ return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
897
+ }
898
+
779
899
  const Comparator* const ucmp = column_family->GetComparator();
780
900
  assert(ucmp);
781
901
  size_t ts_sz = ucmp->timestamp_size();
782
902
  if (ts_sz == 0) {
783
903
  return CompactRangeInternal(options, column_family, begin_without_ts,
784
- end_without_ts);
904
+ end_without_ts, "" /*trim_ts*/);
785
905
  }
786
906
 
787
907
  std::string begin_str;
@@ -803,12 +923,54 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
803
923
  Slice* end_with_ts = end_without_ts ? &end : nullptr;
804
924
 
805
925
  return CompactRangeInternal(options, column_family, begin_with_ts,
806
- end_with_ts);
926
+ end_with_ts, "" /*trim_ts*/);
927
+ }
928
+
929
+ Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
930
+ std::string ts_low) {
931
+ ColumnFamilyData* cfd = nullptr;
932
+ if (column_family == nullptr) {
933
+ cfd = default_cf_handle_->cfd();
934
+ } else {
935
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
936
+ assert(cfh != nullptr);
937
+ cfd = cfh->cfd();
938
+ }
939
+ assert(cfd != nullptr && cfd->user_comparator() != nullptr);
940
+ if (cfd->user_comparator()->timestamp_size() == 0) {
941
+ return Status::InvalidArgument(
942
+ "Timestamp is not enabled in this column family");
943
+ }
944
+ if (cfd->user_comparator()->timestamp_size() != ts_low.size()) {
945
+ return Status::InvalidArgument("ts_low size mismatch");
946
+ }
947
+ return IncreaseFullHistoryTsLowImpl(cfd, ts_low);
948
+ }
949
+
950
+ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
951
+ std::string ts_low) {
952
+ VersionEdit edit;
953
+ edit.SetColumnFamily(cfd->GetID());
954
+ edit.SetFullHistoryTsLow(ts_low);
955
+
956
+ InstrumentedMutexLock l(&mutex_);
957
+ std::string current_ts_low = cfd->GetFullHistoryTsLow();
958
+ const Comparator* ucmp = cfd->user_comparator();
959
+ assert(ucmp->timestamp_size() == ts_low.size() && !ts_low.empty());
960
+ if (!current_ts_low.empty() &&
961
+ ucmp->CompareTimestamp(ts_low, current_ts_low) < 0) {
962
+ return Status::InvalidArgument(
963
+ "Cannot decrease full_history_timestamp_low");
964
+ }
965
+
966
+ return versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit,
967
+ &mutex_);
807
968
  }
808
969
 
809
970
  Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
810
971
  ColumnFamilyHandle* column_family,
811
- const Slice* begin, const Slice* end) {
972
+ const Slice* begin, const Slice* end,
973
+ const std::string& trim_ts) {
812
974
  auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
813
975
  auto cfd = cfh->cfd();
814
976
 
@@ -817,6 +979,22 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
817
979
  }
818
980
 
819
981
  bool flush_needed = true;
982
+
983
+ // Update full_history_ts_low if it's set
984
+ if (options.full_history_ts_low != nullptr &&
985
+ !options.full_history_ts_low->empty()) {
986
+ std::string ts_low = options.full_history_ts_low->ToString();
987
+ if (begin != nullptr || end != nullptr) {
988
+ return Status::InvalidArgument(
989
+ "Cannot specify compaction range with full_history_ts_low");
990
+ }
991
+ Status s = IncreaseFullHistoryTsLowImpl(cfd, ts_low);
992
+ if (!s.ok()) {
993
+ LogFlush(immutable_db_options_.info_log);
994
+ return s;
995
+ }
996
+ }
997
+
820
998
  Status s;
821
999
  if (begin != nullptr && end != nullptr) {
822
1000
  // TODO(ajkr): We could also optimize away the flush in certain cases where
@@ -863,7 +1041,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
863
1041
  }
864
1042
  s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
865
1043
  final_output_level, options, begin, end, exclusive,
866
- false, port::kMaxUint64);
1044
+ false, port::kMaxUint64, trim_ts);
867
1045
  } else {
868
1046
  int first_overlapped_level = kInvalidLevel;
869
1047
  int max_overlapped_level = kInvalidLevel;
@@ -949,9 +1127,13 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
949
1127
  disallow_trivial_move = true;
950
1128
  }
951
1129
  }
1130
+ // trim_ts need real compaction to remove latest record
1131
+ if (!trim_ts.empty()) {
1132
+ disallow_trivial_move = true;
1133
+ }
952
1134
  s = RunManualCompaction(cfd, level, output_level, options, begin, end,
953
1135
  exclusive, disallow_trivial_move,
954
- max_file_num_to_ignore);
1136
+ max_file_num_to_ignore, trim_ts);
955
1137
  if (!s.ok()) {
956
1138
  break;
957
1139
  }
@@ -987,6 +1169,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
987
1169
  assert(temp_s.ok());
988
1170
  }
989
1171
  EnableManualCompaction();
1172
+ TEST_SYNC_POINT(
1173
+ "DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled");
990
1174
  }
991
1175
  LogFlush(immutable_db_options_.info_log);
992
1176
 
@@ -1026,7 +1210,7 @@ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
1026
1210
  assert(cfd);
1027
1211
 
1028
1212
  Status s;
1029
- JobContext job_context(0, true);
1213
+ JobContext job_context(next_job_id_.fetch_add(1), true);
1030
1214
  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
1031
1215
  immutable_db_options_.info_log.get());
1032
1216
 
@@ -1177,18 +1361,19 @@ Status DBImpl::CompactFilesImpl(
1177
1361
  assert(is_snapshot_supported_ || snapshots_.empty());
1178
1362
  CompactionJobStats compaction_job_stats;
1179
1363
  CompactionJob compaction_job(
1180
- job_context->job_id, c.get(), immutable_db_options_,
1364
+ job_context->job_id, c.get(), immutable_db_options_, mutable_db_options_,
1181
1365
  file_options_for_compaction_, versions_.get(), &shutting_down_,
1182
- preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
1366
+ log_buffer, directories_.GetDbDir(),
1183
1367
  GetDataDir(c->column_family_data(), c->output_path_id()),
1184
1368
  GetDataDir(c->column_family_data(), 0), stats_, &mutex_, &error_handler_,
1185
1369
  snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
1186
- table_cache_, &event_logger_,
1370
+ job_context, table_cache_, &event_logger_,
1187
1371
  c->mutable_cf_options()->paranoid_file_checks,
1188
1372
  c->mutable_cf_options()->report_bg_io_stats, dbname_,
1189
1373
  &compaction_job_stats, Env::Priority::USER, io_tracer_,
1190
- &manual_compaction_paused_, db_id_, db_session_id_,
1191
- c->column_family_data()->GetFullHistoryTsLow());
1374
+ &manual_compaction_paused_, nullptr, db_id_, db_session_id_,
1375
+ c->column_family_data()->GetFullHistoryTsLow(), c->trim_ts(),
1376
+ &blob_callback_);
1192
1377
 
1193
1378
  // Creating a compaction influences the compaction score because the score
1194
1379
  // takes running compactions into account (by skipping files that are already
@@ -1261,10 +1446,15 @@ Status DBImpl::CompactFilesImpl(
1261
1446
 
1262
1447
  if (output_file_names != nullptr) {
1263
1448
  for (const auto& newf : c->edit()->GetNewFiles()) {
1264
- (*output_file_names)
1265
- .push_back(TableFileName(c->immutable_cf_options()->cf_paths,
1266
- newf.second.fd.GetNumber(),
1267
- newf.second.fd.GetPathId()));
1449
+ output_file_names->push_back(TableFileName(
1450
+ c->immutable_options()->cf_paths, newf.second.fd.GetNumber(),
1451
+ newf.second.fd.GetPathId()));
1452
+ }
1453
+
1454
+ for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) {
1455
+ output_file_names->push_back(
1456
+ BlobFileName(c->immutable_options()->cf_paths.front().path,
1457
+ blob_file.GetBlobFileNumber()));
1268
1458
  }
1269
1459
  }
1270
1460
 
@@ -1325,6 +1515,8 @@ void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
1325
1515
  manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
1326
1516
  return;
1327
1517
  }
1518
+
1519
+ c->SetNotifyOnCompactionCompleted();
1328
1520
  Version* current = cfd->current();
1329
1521
  current->Ref();
1330
1522
  // release lock while notifying events
@@ -1360,10 +1552,11 @@ void DBImpl::NotifyOnCompactionCompleted(
1360
1552
  if (shutting_down_.load(std::memory_order_acquire)) {
1361
1553
  return;
1362
1554
  }
1363
- if (c->is_manual_compaction() &&
1364
- manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
1555
+
1556
+ if (c->ShouldNotifyOnCompactionCompleted() == false) {
1365
1557
  return;
1366
1558
  }
1559
+
1367
1560
  Version* current = cfd->current();
1368
1561
  current->Ref();
1369
1562
  // release lock while notifying events
@@ -1452,12 +1645,12 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1452
1645
  edit.SetColumnFamily(cfd->GetID());
1453
1646
  for (const auto& f : vstorage->LevelFiles(level)) {
1454
1647
  edit.DeleteFile(level, f->fd.GetNumber());
1455
- edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(),
1456
- f->fd.GetFileSize(), f->smallest, f->largest,
1457
- f->fd.smallest_seqno, f->fd.largest_seqno,
1458
- f->marked_for_compaction, f->oldest_blob_file_number,
1459
- f->oldest_ancester_time, f->file_creation_time,
1460
- f->file_checksum, f->file_checksum_func_name);
1648
+ edit.AddFile(
1649
+ to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(),
1650
+ f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
1651
+ f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
1652
+ f->oldest_ancester_time, f->file_creation_time, f->file_checksum,
1653
+ f->file_checksum_func_name, f->min_timestamp, f->max_timestamp);
1461
1654
  }
1462
1655
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1463
1656
  "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
@@ -1569,25 +1762,21 @@ Status DBImpl::RunManualCompaction(
1569
1762
  ColumnFamilyData* cfd, int input_level, int output_level,
1570
1763
  const CompactRangeOptions& compact_range_options, const Slice* begin,
1571
1764
  const Slice* end, bool exclusive, bool disallow_trivial_move,
1572
- uint64_t max_file_num_to_ignore) {
1765
+ uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
1573
1766
  assert(input_level == ColumnFamilyData::kCompactAllLevels ||
1574
1767
  input_level >= 0);
1575
1768
 
1576
1769
  InternalKey begin_storage, end_storage;
1577
- CompactionArg* ca;
1770
+ CompactionArg* ca = nullptr;
1578
1771
 
1579
1772
  bool scheduled = false;
1773
+ bool unscheduled = false;
1774
+ Env::Priority thread_pool_priority = Env::Priority::TOTAL;
1580
1775
  bool manual_conflict = false;
1581
- ManualCompactionState manual;
1582
- manual.cfd = cfd;
1583
- manual.input_level = input_level;
1584
- manual.output_level = output_level;
1585
- manual.output_path_id = compact_range_options.target_path_id;
1586
- manual.done = false;
1587
- manual.in_progress = false;
1588
- manual.incomplete = false;
1589
- manual.exclusive = exclusive;
1590
- manual.disallow_trivial_move = disallow_trivial_move;
1776
+
1777
+ ManualCompactionState manual(
1778
+ cfd, input_level, output_level, compact_range_options.target_path_id,
1779
+ exclusive, disallow_trivial_move, compact_range_options.canceled);
1591
1780
  // For universal compaction, we enforce every manual compaction to compact
1592
1781
  // all files.
1593
1782
  if (begin == nullptr ||
@@ -1611,10 +1800,24 @@ Status DBImpl::RunManualCompaction(
1611
1800
  TEST_SYNC_POINT("DBImpl::RunManualCompaction:1");
1612
1801
  InstrumentedMutexLock l(&mutex_);
1613
1802
 
1803
+ if (manual_compaction_paused_ > 0) {
1804
+ // Does not make sense to `AddManualCompaction()` in this scenario since
1805
+ // `DisableManualCompaction()` just waited for the manual compaction queue
1806
+ // to drain. So return immediately.
1807
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction:PausedAtStart");
1808
+ manual.status =
1809
+ Status::Incomplete(Status::SubCode::kManualCompactionPaused);
1810
+ manual.done = true;
1811
+ return manual.status;
1812
+ }
1813
+
1614
1814
  // When a manual compaction arrives, temporarily disable scheduling of
1615
1815
  // non-manual compactions and wait until the number of scheduled compaction
1616
- // jobs drops to zero. This is needed to ensure that this manual compaction
1617
- // can compact any range of keys/files.
1816
+ // jobs drops to zero. This used to be needed to ensure that this manual
1817
+ // compaction can compact any range of keys/files. Now it is optional
1818
+ // (see `CompactRangeOptions::exclusive_manual_compaction`). The use case for
1819
+ // `exclusive_manual_compaction=true` (the default) is unclear beyond not
1820
+ // trusting the new code.
1618
1821
  //
1619
1822
  // HasPendingManualCompaction() is true when at least one thread is inside
1620
1823
  // RunManualCompaction(), i.e. during that time no other compaction will
@@ -1628,8 +1831,20 @@ Status DBImpl::RunManualCompaction(
1628
1831
  AddManualCompaction(&manual);
1629
1832
  TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_);
1630
1833
  if (exclusive) {
1834
+ // Limitation: there's no way to wake up the below loop when user sets
1835
+ // `*manual.canceled`. So `CompactRangeOptions::exclusive_manual_compaction`
1836
+ // and `CompactRangeOptions::canceled` might not work well together.
1631
1837
  while (bg_bottom_compaction_scheduled_ > 0 ||
1632
1838
  bg_compaction_scheduled_ > 0) {
1839
+ if (manual_compaction_paused_ > 0 ||
1840
+ (manual.canceled != nullptr && *manual.canceled == true)) {
1841
+ // Pretend the error came from compaction so the below cleanup/error
1842
+ // handling code can process it.
1843
+ manual.done = true;
1844
+ manual.status =
1845
+ Status::Incomplete(Status::SubCode::kManualCompactionPaused);
1846
+ break;
1847
+ }
1633
1848
  TEST_SYNC_POINT("DBImpl::RunManualCompaction:WaitScheduled");
1634
1849
  ROCKS_LOG_INFO(
1635
1850
  immutable_db_options_.info_log,
@@ -1659,13 +1874,30 @@ Status DBImpl::RunManualCompaction(
1659
1874
  *manual.cfd->GetLatestMutableCFOptions(), mutable_db_options_,
1660
1875
  manual.input_level, manual.output_level, compact_range_options,
1661
1876
  manual.begin, manual.end, &manual.manual_end, &manual_conflict,
1662
- max_file_num_to_ignore)) == nullptr &&
1877
+ max_file_num_to_ignore, trim_ts)) == nullptr &&
1663
1878
  manual_conflict))) {
1664
1879
  // exclusive manual compactions should not see a conflict during
1665
1880
  // CompactRange
1666
1881
  assert(!exclusive || !manual_conflict);
1667
1882
  // Running either this or some other manual compaction
1668
1883
  bg_cv_.Wait();
1884
+ if (manual_compaction_paused_ > 0 && scheduled && !unscheduled) {
1885
+ assert(thread_pool_priority != Env::Priority::TOTAL);
1886
+ // unschedule all manual compactions
1887
+ auto unscheduled_task_num = env_->UnSchedule(
1888
+ GetTaskTag(TaskType::kManualCompaction), thread_pool_priority);
1889
+ if (unscheduled_task_num > 0) {
1890
+ ROCKS_LOG_INFO(
1891
+ immutable_db_options_.info_log,
1892
+ "[%s] Unscheduled %d number of manual compactions from the "
1893
+ "thread-pool",
1894
+ cfd->GetName().c_str(), unscheduled_task_num);
1895
+ // it may unschedule other manual compactions, notify others.
1896
+ bg_cv_.SignalAll();
1897
+ }
1898
+ unscheduled = true;
1899
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction:Unscheduled");
1900
+ }
1669
1901
  if (scheduled && manual.incomplete == true) {
1670
1902
  assert(!manual.in_progress);
1671
1903
  scheduled = false;
@@ -1688,15 +1920,25 @@ Status DBImpl::RunManualCompaction(
1688
1920
  assert(false);
1689
1921
  }
1690
1922
  manual.incomplete = false;
1691
- bg_compaction_scheduled_++;
1692
- Env::Priority thread_pool_pri = Env::Priority::LOW;
1693
1923
  if (compaction->bottommost_level() &&
1694
1924
  env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
1695
- thread_pool_pri = Env::Priority::BOTTOM;
1925
+ bg_bottom_compaction_scheduled_++;
1926
+ ca->compaction_pri_ = Env::Priority::BOTTOM;
1927
+ env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca,
1928
+ Env::Priority::BOTTOM,
1929
+ GetTaskTag(TaskType::kManualCompaction),
1930
+ &DBImpl::UnscheduleCompactionCallback);
1931
+ thread_pool_priority = Env::Priority::BOTTOM;
1932
+ } else {
1933
+ bg_compaction_scheduled_++;
1934
+ ca->compaction_pri_ = Env::Priority::LOW;
1935
+ env_->Schedule(&DBImpl::BGWorkCompaction, ca, Env::Priority::LOW,
1936
+ GetTaskTag(TaskType::kManualCompaction),
1937
+ &DBImpl::UnscheduleCompactionCallback);
1938
+ thread_pool_priority = Env::Priority::LOW;
1696
1939
  }
1697
- env_->Schedule(&DBImpl::BGWorkCompaction, ca, thread_pool_pri, this,
1698
- &DBImpl::UnscheduleCompactionCallback);
1699
1940
  scheduled = true;
1941
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction:Scheduled");
1700
1942
  }
1701
1943
  }
1702
1944
 
@@ -1704,6 +1946,13 @@ Status DBImpl::RunManualCompaction(
1704
1946
  assert(!manual.in_progress);
1705
1947
  assert(HasPendingManualCompaction());
1706
1948
  RemoveManualCompaction(&manual);
1949
+ // if the manual job is unscheduled, try schedule other jobs in case there's
1950
+ // any unscheduled compaction job which was blocked by exclusive manual
1951
+ // compaction.
1952
+ if (manual.status.IsIncomplete() &&
1953
+ manual.status.subcode() == Status::SubCode::kManualCompactionPaused) {
1954
+ MaybeScheduleFlushOrCompaction();
1955
+ }
1707
1956
  bg_cv_.SignalAll();
1708
1957
  return manual.status;
1709
1958
  }
@@ -2026,12 +2275,12 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
2026
2275
  // check whether one extra immutable memtable or an extra L0 file would
2027
2276
  // cause write stalling mode to be entered. It could still enter stall
2028
2277
  // mode due to pending compaction bytes, but that's less common
2029
- write_stall_condition =
2030
- ColumnFamilyData::GetWriteStallConditionAndCause(
2031
- cfd->imm()->NumNotFlushed() + 1,
2032
- vstorage->l0_delay_trigger_count() + 1,
2033
- vstorage->estimated_compaction_needed_bytes(), mutable_cf_options)
2034
- .first;
2278
+ write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
2279
+ cfd->imm()->NumNotFlushed() + 1,
2280
+ vstorage->l0_delay_trigger_count() + 1,
2281
+ vstorage->estimated_compaction_needed_bytes(),
2282
+ mutable_cf_options, *cfd->ioptions())
2283
+ .first;
2035
2284
  } while (write_stall_condition != WriteStallCondition::kNormal);
2036
2285
  }
2037
2286
  return Status::OK();
@@ -2054,21 +2303,27 @@ Status DBImpl::WaitForFlushMemTables(
2054
2303
  int num = static_cast<int>(cfds.size());
2055
2304
  // Wait until the compaction completes
2056
2305
  InstrumentedMutexLock l(&mutex_);
2306
+ Status s;
2057
2307
  // If the caller is trying to resume from bg error, then
2058
2308
  // error_handler_.IsDBStopped() is true.
2059
2309
  while (resuming_from_bg_err || !error_handler_.IsDBStopped()) {
2060
2310
  if (shutting_down_.load(std::memory_order_acquire)) {
2061
- return Status::ShutdownInProgress();
2311
+ s = Status::ShutdownInProgress();
2312
+ return s;
2062
2313
  }
2063
2314
  // If an error has occurred during resumption, then no need to wait.
2315
+ // But flush operation may fail because of this error, so need to
2316
+ // return the status.
2064
2317
  if (!error_handler_.GetRecoveryError().ok()) {
2318
+ s = error_handler_.GetRecoveryError();
2065
2319
  break;
2066
2320
  }
2067
2321
  // If BGWorkStopped, which indicate that there is a BG error and
2068
2322
  // 1) soft error but requires no BG work, 2) no in auto_recovery_
2069
2323
  if (!resuming_from_bg_err && error_handler_.IsBGWorkStopped() &&
2070
2324
  error_handler_.GetBGError().severity() < Status::Severity::kHardError) {
2071
- return error_handler_.GetBGError();
2325
+ s = error_handler_.GetBGError();
2326
+ return s;
2072
2327
  }
2073
2328
 
2074
2329
  // Number of column families that have been dropped.
@@ -2086,7 +2341,8 @@ Status DBImpl::WaitForFlushMemTables(
2086
2341
  }
2087
2342
  }
2088
2343
  if (1 == num_dropped && 1 == num) {
2089
- return Status::ColumnFamilyDropped();
2344
+ s = Status::ColumnFamilyDropped();
2345
+ return s;
2090
2346
  }
2091
2347
  // Column families involved in this flush request have either been dropped
2092
2348
  // or finished flush. Then it's time to finish waiting.
@@ -2095,7 +2351,6 @@ Status DBImpl::WaitForFlushMemTables(
2095
2351
  }
2096
2352
  bg_cv_.Wait();
2097
2353
  }
2098
- Status s;
2099
2354
  // If not resuming from bg error, and an error has caused the DB to stop,
2100
2355
  // then report the bg error to caller.
2101
2356
  if (!resuming_from_bg_err && error_handler_.IsDBStopped()) {
@@ -2121,6 +2376,10 @@ Status DBImpl::EnableAutoCompaction(
2121
2376
  void DBImpl::DisableManualCompaction() {
2122
2377
  InstrumentedMutexLock l(&mutex_);
2123
2378
  manual_compaction_paused_.fetch_add(1, std::memory_order_release);
2379
+
2380
+ // Wake up manual compactions waiting to start.
2381
+ bg_cv_.SignalAll();
2382
+
2124
2383
  // Wait for any pending manual compactions to finish (typically through
2125
2384
  // failing with `Status::Incomplete`) prior to returning. This way we are
2126
2385
  // guaranteed no pending manual compaction will commit while manual
@@ -2206,10 +2465,12 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
2206
2465
  return;
2207
2466
  }
2208
2467
 
2209
- while (bg_compaction_scheduled_ < bg_job_limits.max_compactions &&
2468
+ while (bg_compaction_scheduled_ + bg_bottom_compaction_scheduled_ <
2469
+ bg_job_limits.max_compactions &&
2210
2470
  unscheduled_compactions_ > 0) {
2211
2471
  CompactionArg* ca = new CompactionArg;
2212
2472
  ca->db = this;
2473
+ ca->compaction_pri_ = Env::Priority::LOW;
2213
2474
  ca->prepicked_compaction = nullptr;
2214
2475
  bg_compaction_scheduled_++;
2215
2476
  unscheduled_compactions_--;
@@ -2322,6 +2583,17 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req,
2322
2583
  assert(flush_req.size() == 1);
2323
2584
  ColumnFamilyData* cfd = flush_req[0].first;
2324
2585
  assert(cfd);
2586
+ // Note: SchedulePendingFlush is always preceded
2587
+ // with an imm()->FlushRequested() call. However,
2588
+ // we want to make this code snipper more resilient to
2589
+ // future changes. Therefore, we add the following if
2590
+ // statement - note that calling it twice (or more)
2591
+ // doesn't break anything.
2592
+ if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
2593
+ // If imm() contains silent memtables,
2594
+ // requesting a flush will mark the imm_needed as true.
2595
+ cfd->imm()->FlushRequested();
2596
+ }
2325
2597
  if (!cfd->queued_for_flush() && cfd->imm()->IsFlushPending()) {
2326
2598
  cfd->Ref();
2327
2599
  cfd->set_queued_for_flush(true);
@@ -2383,8 +2655,7 @@ void DBImpl::BGWorkBottomCompaction(void* arg) {
2383
2655
  IOSTATS_SET_THREAD_POOL_ID(Env::Priority::BOTTOM);
2384
2656
  TEST_SYNC_POINT("DBImpl::BGWorkBottomCompaction");
2385
2657
  auto* prepicked_compaction = ca.prepicked_compaction;
2386
- assert(prepicked_compaction && prepicked_compaction->compaction &&
2387
- !prepicked_compaction->manual_compaction_state);
2658
+ assert(prepicked_compaction && prepicked_compaction->compaction);
2388
2659
  ca.db->BackgroundCallCompaction(prepicked_compaction, Env::Priority::BOTTOM);
2389
2660
  delete prepicked_compaction;
2390
2661
  }
@@ -2397,10 +2668,27 @@ void DBImpl::BGWorkPurge(void* db) {
2397
2668
  }
2398
2669
 
2399
2670
  void DBImpl::UnscheduleCompactionCallback(void* arg) {
2400
- CompactionArg ca = *(reinterpret_cast<CompactionArg*>(arg));
2671
+ CompactionArg* ca_ptr = reinterpret_cast<CompactionArg*>(arg);
2672
+ Env::Priority compaction_pri = ca_ptr->compaction_pri_;
2673
+ if (Env::Priority::BOTTOM == compaction_pri) {
2674
+ // Decrement bg_bottom_compaction_scheduled_ if priority is BOTTOM
2675
+ ca_ptr->db->bg_bottom_compaction_scheduled_--;
2676
+ } else if (Env::Priority::LOW == compaction_pri) {
2677
+ // Decrement bg_compaction_scheduled_ if priority is LOW
2678
+ ca_ptr->db->bg_compaction_scheduled_--;
2679
+ }
2680
+ CompactionArg ca = *(ca_ptr);
2401
2681
  delete reinterpret_cast<CompactionArg*>(arg);
2402
2682
  if (ca.prepicked_compaction != nullptr) {
2683
+ // if it's a manual compaction, set status to ManualCompactionPaused
2684
+ if (ca.prepicked_compaction->manual_compaction_state) {
2685
+ ca.prepicked_compaction->manual_compaction_state->done = true;
2686
+ ca.prepicked_compaction->manual_compaction_state->status =
2687
+ Status::Incomplete(Status::SubCode::kManualCompactionPaused);
2688
+ }
2403
2689
  if (ca.prepicked_compaction->compaction != nullptr) {
2690
+ ca.prepicked_compaction->compaction->ReleaseCompactionFiles(
2691
+ Status::Incomplete(Status::SubCode::kManualCompactionPaused));
2404
2692
  delete ca.prepicked_compaction->compaction;
2405
2693
  }
2406
2694
  delete ca.prepicked_compaction;
@@ -2409,6 +2697,14 @@ void DBImpl::UnscheduleCompactionCallback(void* arg) {
2409
2697
  }
2410
2698
 
2411
2699
  void DBImpl::UnscheduleFlushCallback(void* arg) {
2700
+ // Decrement bg_flush_scheduled_ in flush callback
2701
+ reinterpret_cast<FlushThreadArg*>(arg)->db_->bg_flush_scheduled_--;
2702
+ Env::Priority flush_pri = reinterpret_cast<FlushThreadArg*>(arg)->thread_pri_;
2703
+ if (Env::Priority::LOW == flush_pri) {
2704
+ TEST_SYNC_POINT("DBImpl::UnscheduleLowFlushCallback");
2705
+ } else if (Env::Priority::HIGH == flush_pri) {
2706
+ TEST_SYNC_POINT("DBImpl::UnscheduleHighFlushCallback");
2707
+ }
2412
2708
  delete reinterpret_cast<FlushThreadArg*>(arg);
2413
2709
  TEST_SYNC_POINT("DBImpl::UnscheduleFlushCallback");
2414
2710
  }
@@ -2446,6 +2742,11 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
2446
2742
 
2447
2743
  for (const auto& iter : flush_req) {
2448
2744
  ColumnFamilyData* cfd = iter.first;
2745
+ if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
2746
+ // If imm() contains silent memtables,
2747
+ // requesting a flush will mark the imm_needed as true.
2748
+ cfd->imm()->FlushRequested();
2749
+ }
2449
2750
  if (cfd->IsDropped() || !cfd->imm()->IsFlushPending()) {
2450
2751
  // can't flush this CF, try next one
2451
2752
  column_families_not_to_flush.push_back(cfd);
@@ -2497,10 +2798,12 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
2497
2798
  bool made_progress = false;
2498
2799
  JobContext job_context(next_job_id_.fetch_add(1), true);
2499
2800
 
2500
- TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:start");
2801
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCallFlush:start", nullptr);
2501
2802
 
2502
2803
  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
2503
2804
  immutable_db_options_.info_log.get());
2805
+ TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:Start:1");
2806
+ TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:Start:2");
2504
2807
  {
2505
2808
  InstrumentedMutexLock l(&mutex_);
2506
2809
  assert(bg_flush_scheduled_);
@@ -2529,7 +2832,7 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
2529
2832
  s.ToString().c_str(), error_cnt);
2530
2833
  log_buffer.FlushBufferToLog();
2531
2834
  LogFlush(immutable_db_options_.info_log);
2532
- env_->SleepForMicroseconds(1000000);
2835
+ immutable_db_options_.clock->SleepForMicroseconds(1000000);
2533
2836
  mutex_.Lock();
2534
2837
  }
2535
2838
 
@@ -2602,7 +2905,8 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2602
2905
  if (s.IsBusy()) {
2603
2906
  bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
2604
2907
  mutex_.Unlock();
2605
- env_->SleepForMicroseconds(10000); // prevent hot loop
2908
+ immutable_db_options_.clock->SleepForMicroseconds(
2909
+ 10000); // prevent hot loop
2606
2910
  mutex_.Lock();
2607
2911
  } else if (!s.ok() && !s.IsShutdownInProgress() &&
2608
2912
  !s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) {
@@ -2620,9 +2924,10 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2620
2924
  "Accumulated background error counts: %" PRIu64,
2621
2925
  s.ToString().c_str(), error_cnt);
2622
2926
  LogFlush(immutable_db_options_.info_log);
2623
- env_->SleepForMicroseconds(1000000);
2927
+ immutable_db_options_.clock->SleepForMicroseconds(1000000);
2624
2928
  mutex_.Lock();
2625
2929
  } else if (s.IsManualCompactionPaused()) {
2930
+ assert(prepicked_compaction);
2626
2931
  ManualCompactionState* m = prepicked_compaction->manual_compaction_state;
2627
2932
  assert(m);
2628
2933
  ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused",
@@ -2631,9 +2936,9 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2631
2936
 
2632
2937
  ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
2633
2938
 
2634
- // If compaction failed, we want to delete all temporary files that we might
2635
- // have created (they might not be all recorded in job_context in case of a
2636
- // failure). Thus, we force full scan in FindObsoleteFiles()
2939
+ // If compaction failed, we want to delete all temporary files that we
2940
+ // might have created (they might not be all recorded in job_context in
2941
+ // case of a failure). Thus, we force full scan in FindObsoleteFiles()
2637
2942
  FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
2638
2943
  !s.IsManualCompactionPaused() &&
2639
2944
  !s.IsColumnFamilyDropped() &&
@@ -2660,6 +2965,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2660
2965
 
2661
2966
  assert(num_running_compactions_ > 0);
2662
2967
  num_running_compactions_--;
2968
+
2663
2969
  if (bg_thread_pri == Env::Priority::LOW) {
2664
2970
  bg_compaction_scheduled_--;
2665
2971
  } else {
@@ -2667,10 +2973,17 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2667
2973
  bg_bottom_compaction_scheduled_--;
2668
2974
  }
2669
2975
 
2670
- versions_->GetColumnFamilySet()->FreeDeadColumnFamilies();
2671
-
2672
2976
  // See if there's more work to be done
2673
2977
  MaybeScheduleFlushOrCompaction();
2978
+
2979
+ if (prepicked_compaction != nullptr &&
2980
+ prepicked_compaction->task_token != nullptr) {
2981
+ // Releasing task tokens affects (and asserts on) the DB state, so
2982
+ // must be done before we potentially signal the DB close process to
2983
+ // proceed below.
2984
+ prepicked_compaction->task_token.reset();
2985
+ }
2986
+
2674
2987
  if (made_progress ||
2675
2988
  (bg_compaction_scheduled_ == 0 &&
2676
2989
  bg_bottom_compaction_scheduled_ == 0) ||
@@ -2723,6 +3036,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
2723
3036
  } else if (is_manual &&
2724
3037
  manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
2725
3038
  status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
3039
+ } else if (is_manual && manual_compaction->canceled &&
3040
+ manual_compaction->canceled->load(std::memory_order_acquire)) {
3041
+ status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
2726
3042
  }
2727
3043
  } else {
2728
3044
  status = error_handler_.GetBGError();
@@ -2752,6 +3068,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
2752
3068
  manual_compaction->in_progress = true;
2753
3069
  }
2754
3070
 
3071
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction:InProgress");
3072
+
2755
3073
  std::unique_ptr<TaskLimiterToken> task_token;
2756
3074
 
2757
3075
  // InternalKey manual_end_storage;
@@ -2850,7 +3168,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
2850
3168
  c->column_family_data()
2851
3169
  ->current()
2852
3170
  ->storage_info()
2853
- ->ComputeCompactionScore(*(c->immutable_cf_options()),
3171
+ ->ComputeCompactionScore(*(c->immutable_options()),
2854
3172
  *(c->mutable_cf_options()));
2855
3173
  AddToCompactionQueue(cfd);
2856
3174
  ++unscheduled_compactions_;
@@ -2861,8 +3179,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
2861
3179
  status = Status::CompactionTooLarge();
2862
3180
  } else {
2863
3181
  // update statistics
2864
- RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION,
2865
- c->inputs(0)->size());
3182
+ size_t num_files = 0;
3183
+ for (auto& each_level : *c->inputs()) {
3184
+ num_files += each_level.files.size();
3185
+ }
3186
+ RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION, num_files);
3187
+
2866
3188
  // There are three things that can change compaction score:
2867
3189
  // 1) When flush or compaction finish. This case is covered by
2868
3190
  // InstallSuperVersionAndScheduleWork
@@ -2947,13 +3269,13 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
2947
3269
  for (size_t i = 0; i < c->num_input_files(l); i++) {
2948
3270
  FileMetaData* f = c->input(l, i);
2949
3271
  c->edit()->DeleteFile(c->level(l), f->fd.GetNumber());
2950
- c->edit()->AddFile(c->output_level(), f->fd.GetNumber(),
2951
- f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest,
2952
- f->largest, f->fd.smallest_seqno,
2953
- f->fd.largest_seqno, f->marked_for_compaction,
2954
- f->oldest_blob_file_number, f->oldest_ancester_time,
2955
- f->file_creation_time, f->file_checksum,
2956
- f->file_checksum_func_name);
3272
+ c->edit()->AddFile(
3273
+ c->output_level(), f->fd.GetNumber(), f->fd.GetPathId(),
3274
+ f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno,
3275
+ f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
3276
+ f->oldest_blob_file_number, f->oldest_ancester_time,
3277
+ f->file_creation_time, f->file_checksum, f->file_checksum_func_name,
3278
+ f->min_timestamp, f->max_timestamp);
2957
3279
 
2958
3280
  ROCKS_LOG_BUFFER(
2959
3281
  log_buffer,
@@ -3010,6 +3332,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3010
3332
  TEST_SYNC_POINT("DBImpl::BackgroundCompaction:ForwardToBottomPriPool");
3011
3333
  CompactionArg* ca = new CompactionArg;
3012
3334
  ca->db = this;
3335
+ ca->compaction_pri_ = Env::Priority::BOTTOM;
3013
3336
  ca->prepicked_compaction = new PrepickedCompaction;
3014
3337
  ca->prepicked_compaction->compaction = c.release();
3015
3338
  ca->prepicked_compaction->manual_compaction_state = nullptr;
@@ -3033,17 +3356,19 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3033
3356
  assert(is_snapshot_supported_ || snapshots_.empty());
3034
3357
  CompactionJob compaction_job(
3035
3358
  job_context->job_id, c.get(), immutable_db_options_,
3036
- file_options_for_compaction_, versions_.get(), &shutting_down_,
3037
- preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
3359
+ mutable_db_options_, file_options_for_compaction_, versions_.get(),
3360
+ &shutting_down_, log_buffer, directories_.GetDbDir(),
3038
3361
  GetDataDir(c->column_family_data(), c->output_path_id()),
3039
3362
  GetDataDir(c->column_family_data(), 0), stats_, &mutex_,
3040
3363
  &error_handler_, snapshot_seqs, earliest_write_conflict_snapshot,
3041
- snapshot_checker, table_cache_, &event_logger_,
3364
+ snapshot_checker, job_context, table_cache_, &event_logger_,
3042
3365
  c->mutable_cf_options()->paranoid_file_checks,
3043
3366
  c->mutable_cf_options()->report_bg_io_stats, dbname_,
3044
3367
  &compaction_job_stats, thread_pri, io_tracer_,
3045
- is_manual ? &manual_compaction_paused_ : nullptr, db_id_,
3046
- db_session_id_, c->column_family_data()->GetFullHistoryTsLow());
3368
+ is_manual ? &manual_compaction_paused_ : nullptr,
3369
+ is_manual ? manual_compaction->canceled : nullptr, db_id_,
3370
+ db_session_id_, c->column_family_data()->GetFullHistoryTsLow(),
3371
+ c->trim_ts(), &blob_callback_);
3047
3372
  compaction_job.Prepare();
3048
3373
 
3049
3374
  NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
@@ -3122,7 +3447,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3122
3447
  c->column_family_data()
3123
3448
  ->current()
3124
3449
  ->storage_info()
3125
- ->ComputeCompactionScore(*(c->immutable_cf_options()),
3450
+ ->ComputeCompactionScore(*(c->immutable_options()),
3126
3451
  *(c->mutable_cf_options()));
3127
3452
  if (!cfd->queued_for_compaction()) {
3128
3453
  AddToCompactionQueue(cfd);
@@ -3178,6 +3503,7 @@ bool DBImpl::HasPendingManualCompaction() {
3178
3503
  }
3179
3504
 
3180
3505
  void DBImpl::AddManualCompaction(DBImpl::ManualCompactionState* m) {
3506
+ assert(manual_compaction_paused_ == 0);
3181
3507
  manual_compaction_dequeue_.push_back(m);
3182
3508
  }
3183
3509
 
@@ -3263,7 +3589,7 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
3263
3589
  if (m->cfd != m1->cfd) {
3264
3590
  return false;
3265
3591
  }
3266
- return true;
3592
+ return false;
3267
3593
  }
3268
3594
 
3269
3595
  #ifndef ROCKSDB_LITE
@@ -3287,7 +3613,7 @@ void DBImpl::BuildCompactionJobInfo(
3287
3613
  for (const auto fmd : *c->inputs(i)) {
3288
3614
  const FileDescriptor& desc = fmd->fd;
3289
3615
  const uint64_t file_number = desc.GetNumber();
3290
- auto fn = TableFileName(c->immutable_cf_options()->cf_paths, file_number,
3616
+ auto fn = TableFileName(c->immutable_options()->cf_paths, file_number,
3291
3617
  desc.GetPathId());
3292
3618
  compaction_job_info->input_files.push_back(fn);
3293
3619
  compaction_job_info->input_file_infos.push_back(CompactionFileInfo{
@@ -3306,10 +3632,34 @@ void DBImpl::BuildCompactionJobInfo(
3306
3632
  const FileDescriptor& desc = meta.fd;
3307
3633
  const uint64_t file_number = desc.GetNumber();
3308
3634
  compaction_job_info->output_files.push_back(TableFileName(
3309
- c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId()));
3635
+ c->immutable_options()->cf_paths, file_number, desc.GetPathId()));
3310
3636
  compaction_job_info->output_file_infos.push_back(CompactionFileInfo{
3311
3637
  newf.first, file_number, meta.oldest_blob_file_number});
3312
3638
  }
3639
+ compaction_job_info->blob_compression_type =
3640
+ c->mutable_cf_options()->blob_compression_type;
3641
+
3642
+ // Update BlobFilesInfo.
3643
+ for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) {
3644
+ BlobFileAdditionInfo blob_file_addition_info(
3645
+ BlobFileName(c->immutable_options()->cf_paths.front().path,
3646
+ blob_file.GetBlobFileNumber()) /*blob_file_path*/,
3647
+ blob_file.GetBlobFileNumber(), blob_file.GetTotalBlobCount(),
3648
+ blob_file.GetTotalBlobBytes());
3649
+ compaction_job_info->blob_file_addition_infos.emplace_back(
3650
+ std::move(blob_file_addition_info));
3651
+ }
3652
+
3653
+ // Update BlobFilesGarbageInfo.
3654
+ for (const auto& blob_file : c->edit()->GetBlobFileGarbages()) {
3655
+ BlobFileGarbageInfo blob_file_garbage_info(
3656
+ BlobFileName(c->immutable_options()->cf_paths.front().path,
3657
+ blob_file.GetBlobFileNumber()) /*blob_file_path*/,
3658
+ blob_file.GetBlobFileNumber(), blob_file.GetGarbageBlobCount(),
3659
+ blob_file.GetGarbageBlobBytes());
3660
+ compaction_job_info->blob_file_garbage_infos.emplace_back(
3661
+ std::move(blob_file_garbage_info));
3662
+ }
3313
3663
  }
3314
3664
  #endif
3315
3665
 
@@ -3342,7 +3692,7 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
3342
3692
  if (UNLIKELY(sv_context->new_superversion == nullptr)) {
3343
3693
  sv_context->NewSuperVersion();
3344
3694
  }
3345
- cfd->InstallSuperVersion(sv_context, &mutex_, mutable_cf_options);
3695
+ cfd->InstallSuperVersion(sv_context, mutable_cf_options);
3346
3696
 
3347
3697
  // There may be a small data race here. The snapshot tricking bottommost
3348
3698
  // compaction may already be released here. But assuming there will always be
@@ -3418,4 +3768,22 @@ void DBImpl::GetSnapshotContext(
3418
3768
  }
3419
3769
  *snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot);
3420
3770
  }
3771
+
3772
+ Status DBImpl::WaitForCompact(bool wait_unscheduled) {
3773
+ // Wait until the compaction completes
3774
+
3775
+ // TODO: a bug here. This function actually does not necessarily
3776
+ // wait for compact. It actually waits for scheduled compaction
3777
+ // OR flush to finish.
3778
+
3779
+ InstrumentedMutexLock l(&mutex_);
3780
+ while ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
3781
+ bg_flush_scheduled_ ||
3782
+ (wait_unscheduled && unscheduled_compactions_)) &&
3783
+ (error_handler_.GetBGError().ok())) {
3784
+ bg_cv_.Wait();
3785
+ }
3786
+ return error_handler_.GetBGError();
3787
+ }
3788
+
3421
3789
  } // namespace ROCKSDB_NAMESPACE