@nxtedition/rocksdb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1088) hide show
  1. package/CHANGELOG.md +294 -0
  2. package/LICENSE +21 -0
  3. package/README.md +102 -0
  4. package/UPGRADING.md +91 -0
  5. package/binding.cc +1276 -0
  6. package/binding.gyp +73 -0
  7. package/binding.js +1 -0
  8. package/chained-batch.js +44 -0
  9. package/deps/rocksdb/build_version.cc +4 -0
  10. package/deps/rocksdb/rocksdb/CMakeLists.txt +1356 -0
  11. package/deps/rocksdb/rocksdb/COPYING +339 -0
  12. package/deps/rocksdb/rocksdb/LICENSE.Apache +202 -0
  13. package/deps/rocksdb/rocksdb/LICENSE.leveldb +29 -0
  14. package/deps/rocksdb/rocksdb/Makefile +2521 -0
  15. package/deps/rocksdb/rocksdb/TARGETS +2100 -0
  16. package/deps/rocksdb/rocksdb/cache/cache.cc +63 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +381 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +114 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_test.cc +775 -0
  20. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +769 -0
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.h +16 -0
  22. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +574 -0
  23. package/deps/rocksdb/rocksdb/cache/lru_cache.h +339 -0
  24. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +199 -0
  25. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +162 -0
  26. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +111 -0
  27. package/deps/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in +54 -0
  28. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  29. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  30. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  37. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +108 -0
  38. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +115 -0
  39. package/deps/rocksdb/rocksdb/db/blob/blob_constants.h +16 -0
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +154 -0
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.h +67 -0
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +206 -0
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +316 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +91 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +660 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +99 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +49 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +268 -0
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.cc +134 -0
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.h +57 -0
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc +173 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +55 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +164 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +423 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +81 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +771 -0
  57. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +184 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +145 -0
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +148 -0
  60. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +132 -0
  61. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +76 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +168 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +83 -0
  64. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +307 -0
  65. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +464 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +358 -0
  67. package/deps/rocksdb/rocksdb/db/builder.h +95 -0
  68. package/deps/rocksdb/rocksdb/db/c.cc +5281 -0
  69. package/deps/rocksdb/rocksdb/db/c_test.c +2883 -0
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +1602 -0
  71. package/deps/rocksdb/rocksdb/db/column_family.h +787 -0
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +3427 -0
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +425 -0
  74. package/deps/rocksdb/rocksdb/db/compacted_db_impl.cc +169 -0
  75. package/deps/rocksdb/rocksdb/db/compacted_db_impl.h +118 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +591 -0
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +389 -0
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +37 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +1023 -0
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +353 -0
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1254 -0
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1917 -0
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +208 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +1037 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1224 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1135 -0
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +318 -0
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +255 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +57 -0
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +510 -0
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +33 -0
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2190 -0
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1103 -0
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +32 -0
  95. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +44 -0
  96. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +660 -0
  97. package/deps/rocksdb/rocksdb/db/convenience.cc +78 -0
  98. package/deps/rocksdb/rocksdb/db/corruption_test.cc +921 -0
  99. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +359 -0
  100. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3820 -0
  101. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1058 -0
  102. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2128 -0
  103. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +851 -0
  104. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +6292 -0
  105. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +509 -0
  106. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +130 -0
  107. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +137 -0
  108. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1119 -0
  109. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5057 -0
  110. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +2274 -0
  111. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +3421 -0
  112. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +298 -0
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +151 -0
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +967 -0
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1806 -0
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +270 -0
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +146 -0
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +683 -0
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +333 -0
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2024 -0
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_secondary_test.cc +932 -0
  122. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +137 -0
  123. package/deps/rocksdb/rocksdb/db/db_info_dumper.h +15 -0
  124. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +178 -0
  125. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +592 -0
  126. package/deps/rocksdb/rocksdb/db/db_iter.cc +1493 -0
  127. package/deps/rocksdb/rocksdb/db/db_iter.h +390 -0
  128. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +657 -0
  129. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +3268 -0
  130. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +3197 -0
  131. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +299 -0
  132. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +513 -0
  133. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +329 -0
  134. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +241 -0
  135. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +671 -0
  136. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1022 -0
  137. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1723 -0
  138. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1694 -0
  139. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1261 -0
  140. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +164 -0
  141. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +488 -0
  142. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +567 -0
  143. package/deps/rocksdb/rocksdb/db/db_test.cc +6736 -0
  144. package/deps/rocksdb/rocksdb/db/db_test2.cc +5408 -0
  145. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1633 -0
  146. package/deps/rocksdb/rocksdb/db/db_test_util.h +1194 -0
  147. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +2235 -0
  148. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +1780 -0
  149. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2520 -0
  150. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +119 -0
  151. package/deps/rocksdb/rocksdb/db/db_write_test.cc +465 -0
  152. package/deps/rocksdb/rocksdb/db/dbformat.cc +222 -0
  153. package/deps/rocksdb/rocksdb/db/dbformat.h +786 -0
  154. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +206 -0
  155. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +580 -0
  156. package/deps/rocksdb/rocksdb/db/error_handler.cc +726 -0
  157. package/deps/rocksdb/rocksdb/db/error_handler.h +117 -0
  158. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +2598 -0
  159. package/deps/rocksdb/rocksdb/db/event_helpers.cc +233 -0
  160. package/deps/rocksdb/rocksdb/db/event_helpers.h +57 -0
  161. package/deps/rocksdb/rocksdb/db/experimental.cc +50 -0
  162. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1559 -0
  163. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +910 -0
  164. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +195 -0
  165. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2936 -0
  166. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +556 -0
  167. package/deps/rocksdb/rocksdb/db/file_indexer.cc +216 -0
  168. package/deps/rocksdb/rocksdb/db/file_indexer.h +142 -0
  169. package/deps/rocksdb/rocksdb/db/file_indexer_test.cc +350 -0
  170. package/deps/rocksdb/rocksdb/db/filename_test.cc +179 -0
  171. package/deps/rocksdb/rocksdb/db/flush_job.cc +514 -0
  172. package/deps/rocksdb/rocksdb/db/flush_job.h +169 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +647 -0
  174. package/deps/rocksdb/rocksdb/db/flush_scheduler.cc +86 -0
  175. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +54 -0
  176. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +1023 -0
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.h +163 -0
  178. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +377 -0
  179. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +282 -0
  180. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +75 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +632 -0
  182. package/deps/rocksdb/rocksdb/db/internal_stats.cc +1461 -0
  183. package/deps/rocksdb/rocksdb/db/internal_stats.h +712 -0
  184. package/deps/rocksdb/rocksdb/db/job_context.h +226 -0
  185. package/deps/rocksdb/rocksdb/db/listener_test.cc +1118 -0
  186. package/deps/rocksdb/rocksdb/db/log_format.h +48 -0
  187. package/deps/rocksdb/rocksdb/db/log_reader.cc +654 -0
  188. package/deps/rocksdb/rocksdb/db/log_reader.h +192 -0
  189. package/deps/rocksdb/rocksdb/db/log_test.cc +901 -0
  190. package/deps/rocksdb/rocksdb/db/log_writer.cc +164 -0
  191. package/deps/rocksdb/rocksdb/db/log_writer.h +115 -0
  192. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.cc +67 -0
  193. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +63 -0
  194. package/deps/rocksdb/rocksdb/db/lookup_key.h +66 -0
  195. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +54 -0
  196. package/deps/rocksdb/rocksdb/db/malloc_stats.h +24 -0
  197. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +296 -0
  198. package/deps/rocksdb/rocksdb/db/memtable.cc +1169 -0
  199. package/deps/rocksdb/rocksdb/db/memtable.h +554 -0
  200. package/deps/rocksdb/rocksdb/db/memtable_list.cc +888 -0
  201. package/deps/rocksdb/rocksdb/db/memtable_list.h +438 -0
  202. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +935 -0
  203. package/deps/rocksdb/rocksdb/db/merge_context.h +134 -0
  204. package/deps/rocksdb/rocksdb/db/merge_helper.cc +421 -0
  205. package/deps/rocksdb/rocksdb/db/merge_helper.h +197 -0
  206. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +290 -0
  207. package/deps/rocksdb/rocksdb/db/merge_operator.cc +86 -0
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +608 -0
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +338 -0
  210. package/deps/rocksdb/rocksdb/db/options_file_test.cc +119 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.cc +30 -0
  212. package/deps/rocksdb/rocksdb/db/output_validator.h +47 -0
  213. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +993 -0
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +113 -0
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +76 -0
  216. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +231 -0
  217. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +87 -0
  218. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1374 -0
  219. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +38 -0
  220. package/deps/rocksdb/rocksdb/db/prefix_test.cc +910 -0
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +489 -0
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +446 -0
  223. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +260 -0
  224. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +709 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +439 -0
  226. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +256 -0
  227. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +552 -0
  228. package/deps/rocksdb/rocksdb/db/read_callback.h +53 -0
  229. package/deps/rocksdb/rocksdb/db/repair.cc +722 -0
  230. package/deps/rocksdb/rocksdb/db/repair_test.cc +390 -0
  231. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +61 -0
  232. package/deps/rocksdb/rocksdb/db/snapshot_impl.cc +26 -0
  233. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +167 -0
  234. package/deps/rocksdb/rocksdb/db/table_cache.cc +704 -0
  235. package/deps/rocksdb/rocksdb/db/table_cache.h +233 -0
  236. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +75 -0
  237. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +107 -0
  238. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +517 -0
  239. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +318 -0
  240. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +128 -0
  241. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.cc +54 -0
  242. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.h +44 -0
  243. package/deps/rocksdb/rocksdb/db/version_builder.cc +1078 -0
  244. package/deps/rocksdb/rocksdb/db/version_builder.h +69 -0
  245. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +1551 -0
  246. package/deps/rocksdb/rocksdb/db/version_edit.cc +955 -0
  247. package/deps/rocksdb/rocksdb/db/version_edit.h +609 -0
  248. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +699 -0
  249. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +252 -0
  250. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +597 -0
  251. package/deps/rocksdb/rocksdb/db/version_set.cc +6333 -0
  252. package/deps/rocksdb/rocksdb/db/version_set.h +1485 -0
  253. package/deps/rocksdb/rocksdb/db/version_set_test.cc +3035 -0
  254. package/deps/rocksdb/rocksdb/db/wal_edit.cc +204 -0
  255. package/deps/rocksdb/rocksdb/db/wal_edit.h +166 -0
  256. package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +214 -0
  257. package/deps/rocksdb/rocksdb/db/wal_manager.cc +517 -0
  258. package/deps/rocksdb/rocksdb/db/wal_manager.h +119 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +340 -0
  260. package/deps/rocksdb/rocksdb/db/write_batch.cc +2174 -0
  261. package/deps/rocksdb/rocksdb/db/write_batch_base.cc +94 -0
  262. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +250 -0
  263. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +907 -0
  264. package/deps/rocksdb/rocksdb/db/write_callback.h +27 -0
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +457 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +128 -0
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +144 -0
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +135 -0
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +796 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +433 -0
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +14 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +341 -0
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +520 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +23 -0
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +337 -0
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +554 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +79 -0
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +173 -0
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +17 -0
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +38 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +763 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +222 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.cc +27 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +428 -0
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +218 -0
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +64 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2430 -0
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +237 -0
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +343 -0
  290. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +800 -0
  291. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +920 -0
  292. package/deps/rocksdb/rocksdb/env/env.cc +733 -0
  293. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +352 -0
  294. package/deps/rocksdb/rocksdb/env/env_chroot.cc +346 -0
  295. package/deps/rocksdb/rocksdb/env/env_chroot.h +22 -0
  296. package/deps/rocksdb/rocksdb/env/env_encryption.cc +1148 -0
  297. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +137 -0
  298. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +648 -0
  299. package/deps/rocksdb/rocksdb/env/env_posix.cc +514 -0
  300. package/deps/rocksdb/rocksdb/env/env_test.cc +2230 -0
  301. package/deps/rocksdb/rocksdb/env/file_system.cc +132 -0
  302. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +448 -0
  303. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +415 -0
  304. package/deps/rocksdb/rocksdb/env/fs_posix.cc +1086 -0
  305. package/deps/rocksdb/rocksdb/env/io_posix.cc +1499 -0
  306. package/deps/rocksdb/rocksdb/env/io_posix.h +402 -0
  307. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +140 -0
  308. package/deps/rocksdb/rocksdb/env/mock_env.cc +1066 -0
  309. package/deps/rocksdb/rocksdb/env/mock_env.h +41 -0
  310. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +85 -0
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +402 -0
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +150 -0
  313. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +717 -0
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +156 -0
  315. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +99 -0
  316. package/deps/rocksdb/rocksdb/file/file_util.cc +268 -0
  317. package/deps/rocksdb/rocksdb/file/file_util.h +96 -0
  318. package/deps/rocksdb/rocksdb/file/filename.cc +473 -0
  319. package/deps/rocksdb/rocksdb/file/filename.h +182 -0
  320. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +188 -0
  321. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +315 -0
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +142 -0
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +482 -0
  324. package/deps/rocksdb/rocksdb/file/read_write_util.cc +67 -0
  325. package/deps/rocksdb/rocksdb/file/read_write_util.h +34 -0
  326. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +169 -0
  327. package/deps/rocksdb/rocksdb/file/readahead_raf.h +29 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +237 -0
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +63 -0
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +552 -0
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +203 -0
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +523 -0
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +251 -0
  334. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +386 -0
  335. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +839 -0
  336. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +2218 -0
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +294 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +71 -0
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +214 -0
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +98 -0
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +137 -0
  342. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +40 -0
  343. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +46 -0
  344. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +359 -0
  345. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +499 -0
  346. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +138 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1697 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h +11 -0
  349. package/deps/rocksdb/rocksdb/include/rocksdb/db_dump_tool.h +45 -0
  350. package/deps/rocksdb/rocksdb/include/rocksdb/db_stress_tool.h +11 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1671 -0
  352. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +405 -0
  353. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +29 -0
  354. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +129 -0
  355. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1472 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +238 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +61 -0
  358. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +269 -0
  359. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +56 -0
  360. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +128 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +43 -0
  362. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +556 -0
  363. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +77 -0
  364. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +385 -0
  365. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +257 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +155 -0
  367. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +1702 -0
  368. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +237 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/perf_level.h +35 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +73 -0
  371. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +139 -0
  372. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +10 -0
  373. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +269 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +103 -0
  375. package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +48 -0
  376. package/deps/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h +19 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +136 -0
  378. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +47 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +145 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +135 -0
  381. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +592 -0
  382. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +69 -0
  383. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +608 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +711 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +280 -0
  386. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +188 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +58 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +48 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +121 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +74 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +86 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +535 -0
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +61 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/convenience.h +10 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +72 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +49 -0
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +175 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +180 -0
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/info_log_finder.h +19 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +288 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +71 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/leveldb_options.h +145 -0
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +43 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +55 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +50 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +205 -0
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +100 -0
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +19 -0
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +876 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +128 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +94 -0
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +504 -0
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +95 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +626 -0
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +432 -0
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +92 -0
  417. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +34 -0
  418. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +279 -0
  419. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +16 -0
  420. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +102 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +377 -0
  422. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +127 -0
  423. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +106 -0
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +300 -0
  425. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +165 -0
  426. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +684 -0
  427. package/deps/rocksdb/rocksdb/logging/env_logger.h +165 -0
  428. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +162 -0
  429. package/deps/rocksdb/rocksdb/logging/event_logger.cc +70 -0
  430. package/deps/rocksdb/rocksdb/logging/event_logger.h +203 -0
  431. package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +43 -0
  432. package/deps/rocksdb/rocksdb/logging/log_buffer.cc +92 -0
  433. package/deps/rocksdb/rocksdb/logging/log_buffer.h +56 -0
  434. package/deps/rocksdb/rocksdb/logging/logging.h +68 -0
  435. package/deps/rocksdb/rocksdb/logging/posix_logger.h +185 -0
  436. package/deps/rocksdb/rocksdb/memory/allocator.h +57 -0
  437. package/deps/rocksdb/rocksdb/memory/arena.cc +233 -0
  438. package/deps/rocksdb/rocksdb/memory/arena.h +141 -0
  439. package/deps/rocksdb/rocksdb/memory/arena_test.cc +204 -0
  440. package/deps/rocksdb/rocksdb/memory/concurrent_arena.cc +47 -0
  441. package/deps/rocksdb/rocksdb/memory/concurrent_arena.h +218 -0
  442. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +206 -0
  443. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +78 -0
  444. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +33 -0
  445. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +27 -0
  446. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +102 -0
  447. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +38 -0
  448. package/deps/rocksdb/rocksdb/memory/memory_usage.h +25 -0
  449. package/deps/rocksdb/rocksdb/memtable/alloc_tracker.cc +62 -0
  450. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +844 -0
  451. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +49 -0
  452. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +349 -0
  453. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +44 -0
  454. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +997 -0
  455. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +663 -0
  456. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +677 -0
  457. package/deps/rocksdb/rocksdb/memtable/skiplist.h +496 -0
  458. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +388 -0
  459. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +280 -0
  460. package/deps/rocksdb/rocksdb/memtable/stl_wrappers.h +33 -0
  461. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +301 -0
  462. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -0
  463. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +203 -0
  464. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +23 -0
  465. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +287 -0
  466. package/deps/rocksdb/rocksdb/monitoring/histogram.h +149 -0
  467. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +231 -0
  468. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +200 -0
  469. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +84 -0
  470. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.cc +49 -0
  471. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.h +74 -0
  472. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +71 -0
  473. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +98 -0
  474. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +62 -0
  475. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +60 -0
  476. package/deps/rocksdb/rocksdb/monitoring/iostats_context_test.cc +29 -0
  477. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +566 -0
  478. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +97 -0
  479. package/deps/rocksdb/rocksdb/monitoring/perf_level.cc +28 -0
  480. package/deps/rocksdb/rocksdb/monitoring/perf_level_imp.h +18 -0
  481. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +79 -0
  482. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.cc +169 -0
  483. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.h +83 -0
  484. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +431 -0
  485. package/deps/rocksdb/rocksdb/monitoring/statistics.h +138 -0
  486. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -0
  487. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +652 -0
  488. package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +163 -0
  489. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +314 -0
  490. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +233 -0
  491. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +43 -0
  492. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +206 -0
  493. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +134 -0
  494. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +32 -0
  495. package/deps/rocksdb/rocksdb/options/cf_options.cc +1026 -0
  496. package/deps/rocksdb/rocksdb/options/cf_options.h +308 -0
  497. package/deps/rocksdb/rocksdb/options/configurable.cc +681 -0
  498. package/deps/rocksdb/rocksdb/options/configurable_helper.h +251 -0
  499. package/deps/rocksdb/rocksdb/options/configurable_test.cc +757 -0
  500. package/deps/rocksdb/rocksdb/options/configurable_test.h +127 -0
  501. package/deps/rocksdb/rocksdb/options/customizable.cc +77 -0
  502. package/deps/rocksdb/rocksdb/options/customizable_helper.h +216 -0
  503. package/deps/rocksdb/rocksdb/options/customizable_test.cc +625 -0
  504. package/deps/rocksdb/rocksdb/options/db_options.cc +835 -0
  505. package/deps/rocksdb/rocksdb/options/db_options.h +126 -0
  506. package/deps/rocksdb/rocksdb/options/options.cc +664 -0
  507. package/deps/rocksdb/rocksdb/options/options_helper.cc +1391 -0
  508. package/deps/rocksdb/rocksdb/options/options_helper.h +118 -0
  509. package/deps/rocksdb/rocksdb/options/options_parser.cc +721 -0
  510. package/deps/rocksdb/rocksdb/options/options_parser.h +151 -0
  511. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +583 -0
  512. package/deps/rocksdb/rocksdb/options/options_test.cc +3794 -0
  513. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +106 -0
  514. package/deps/rocksdb/rocksdb/port/lang.h +16 -0
  515. package/deps/rocksdb/rocksdb/port/likely.h +18 -0
  516. package/deps/rocksdb/rocksdb/port/malloc.h +17 -0
  517. package/deps/rocksdb/rocksdb/port/port.h +21 -0
  518. package/deps/rocksdb/rocksdb/port/port_dirent.h +44 -0
  519. package/deps/rocksdb/rocksdb/port/port_example.h +101 -0
  520. package/deps/rocksdb/rocksdb/port/port_posix.cc +266 -0
  521. package/deps/rocksdb/rocksdb/port/port_posix.h +223 -0
  522. package/deps/rocksdb/rocksdb/port/stack_trace.cc +179 -0
  523. package/deps/rocksdb/rocksdb/port/stack_trace.h +28 -0
  524. package/deps/rocksdb/rocksdb/port/sys_time.h +47 -0
  525. package/deps/rocksdb/rocksdb/port/util_logger.h +20 -0
  526. package/deps/rocksdb/rocksdb/port/win/env_default.cc +45 -0
  527. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1449 -0
  528. package/deps/rocksdb/rocksdb/port/win/env_win.h +294 -0
  529. package/deps/rocksdb/rocksdb/port/win/io_win.cc +1084 -0
  530. package/deps/rocksdb/rocksdb/port/win/io_win.h +494 -0
  531. package/deps/rocksdb/rocksdb/port/win/port_win.cc +283 -0
  532. package/deps/rocksdb/rocksdb/port/win/port_win.h +411 -0
  533. package/deps/rocksdb/rocksdb/port/win/win_jemalloc.cc +79 -0
  534. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +194 -0
  535. package/deps/rocksdb/rocksdb/port/win/win_logger.h +67 -0
  536. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +183 -0
  537. package/deps/rocksdb/rocksdb/port/win/win_thread.h +122 -0
  538. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +221 -0
  539. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +26 -0
  540. package/deps/rocksdb/rocksdb/port/xpress.h +17 -0
  541. package/deps/rocksdb/rocksdb/src.mk +631 -0
  542. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +126 -0
  543. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +57 -0
  544. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +73 -0
  545. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.h +48 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/block.cc +1049 -0
  547. package/deps/rocksdb/rocksdb/table/block_based/block.h +720 -0
  548. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +348 -0
  549. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +119 -0
  550. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +434 -0
  551. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1835 -0
  552. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +193 -0
  553. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +839 -0
  554. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +95 -0
  555. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +383 -0
  556. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +251 -0
  557. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3563 -0
  558. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +681 -0
  559. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +190 -0
  560. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +347 -0
  561. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +201 -0
  562. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +78 -0
  563. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +66 -0
  564. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +32 -0
  565. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.cc +232 -0
  566. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.h +66 -0
  567. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +623 -0
  568. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  569. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +220 -0
  570. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +59 -0
  571. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +25 -0
  572. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc +93 -0
  573. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +136 -0
  574. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +717 -0
  575. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +180 -0
  576. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +102 -0
  577. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +55 -0
  578. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1407 -0
  579. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +168 -0
  580. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +88 -0
  581. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +41 -0
  582. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +344 -0
  583. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +139 -0
  584. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +333 -0
  585. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +147 -0
  586. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.h +49 -0
  587. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +248 -0
  588. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +444 -0
  589. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +54 -0
  590. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +85 -0
  591. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +56 -0
  592. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +22 -0
  593. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +40 -0
  594. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +521 -0
  595. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +144 -0
  596. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +424 -0
  597. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +163 -0
  598. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +142 -0
  599. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +186 -0
  600. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +51 -0
  601. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +64 -0
  602. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +38 -0
  603. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +120 -0
  604. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +59 -0
  605. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +324 -0
  606. package/deps/rocksdb/rocksdb/table/block_fetcher.h +129 -0
  607. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +534 -0
  608. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +277 -0
  609. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +543 -0
  610. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +136 -0
  611. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +663 -0
  612. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +107 -0
  613. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +81 -0
  614. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +404 -0
  615. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +101 -0
  616. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +585 -0
  617. package/deps/rocksdb/rocksdb/table/format.cc +422 -0
  618. package/deps/rocksdb/rocksdb/table/format.h +348 -0
  619. package/deps/rocksdb/rocksdb/table/get_context.cc +408 -0
  620. package/deps/rocksdb/rocksdb/table/get_context.h +212 -0
  621. package/deps/rocksdb/rocksdb/table/internal_iterator.h +205 -0
  622. package/deps/rocksdb/rocksdb/table/iter_heap.h +42 -0
  623. package/deps/rocksdb/rocksdb/table/iterator.cc +210 -0
  624. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +180 -0
  625. package/deps/rocksdb/rocksdb/table/merger_test.cc +180 -0
  626. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +481 -0
  627. package/deps/rocksdb/rocksdb/table/merging_iterator.h +64 -0
  628. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +541 -0
  629. package/deps/rocksdb/rocksdb/table/meta_blocks.h +154 -0
  630. package/deps/rocksdb/rocksdb/table/mock_table.cc +328 -0
  631. package/deps/rocksdb/rocksdb/table/mock_table.h +89 -0
  632. package/deps/rocksdb/rocksdb/table/multiget_context.h +282 -0
  633. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +116 -0
  634. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +44 -0
  635. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +34 -0
  636. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.cc +78 -0
  637. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +135 -0
  638. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +332 -0
  639. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +153 -0
  640. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +263 -0
  641. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +182 -0
  642. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +211 -0
  643. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +249 -0
  644. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +506 -0
  645. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +201 -0
  646. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +781 -0
  647. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +247 -0
  648. package/deps/rocksdb/rocksdb/table/scoped_arena_iterator.h +61 -0
  649. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +502 -0
  650. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +96 -0
  651. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +98 -0
  652. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +228 -0
  653. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +340 -0
  654. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +94 -0
  655. package/deps/rocksdb/rocksdb/table/table_builder.h +203 -0
  656. package/deps/rocksdb/rocksdb/table/table_factory.cc +38 -0
  657. package/deps/rocksdb/rocksdb/table/table_properties.cc +300 -0
  658. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +30 -0
  659. package/deps/rocksdb/rocksdb/table/table_reader.h +147 -0
  660. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +347 -0
  661. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +39 -0
  662. package/deps/rocksdb/rocksdb/table/table_test.cc +4769 -0
  663. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +215 -0
  664. package/deps/rocksdb/rocksdb/table/two_level_iterator.h +43 -0
  665. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +38 -0
  666. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +74 -0
  667. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +93 -0
  668. package/deps/rocksdb/rocksdb/test_util/sync_point.h +161 -0
  669. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +129 -0
  670. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +74 -0
  671. package/deps/rocksdb/rocksdb/test_util/testharness.cc +56 -0
  672. package/deps/rocksdb/rocksdb/test_util/testharness.h +53 -0
  673. package/deps/rocksdb/rocksdb/test_util/testutil.cc +566 -0
  674. package/deps/rocksdb/rocksdb/test_util/testutil.h +887 -0
  675. package/deps/rocksdb/rocksdb/test_util/testutil_test.cc +43 -0
  676. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +388 -0
  677. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.h +132 -0
  678. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +27 -0
  679. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +45 -0
  680. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +166 -0
  681. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +570 -0
  682. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +92 -0
  683. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +54 -0
  684. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +152 -0
  685. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +59 -0
  686. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +141 -0
  687. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +33 -0
  688. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +74 -0
  689. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +117 -0
  690. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +263 -0
  691. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +96 -0
  692. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +40 -0
  693. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +29 -0
  694. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +144 -0
  695. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +30 -0
  696. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +51 -0
  697. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +28 -0
  698. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +10 -0
  699. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +26 -0
  700. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +138 -0
  701. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +23 -0
  702. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +57 -0
  703. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +260 -0
  704. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +52 -0
  705. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +328 -0
  706. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +1703 -0
  707. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +16 -0
  708. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +304 -0
  709. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +39 -0
  710. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +26 -0
  711. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +318 -0
  712. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.cpp +12 -0
  713. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +57 -0
  714. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +219 -0
  715. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +207 -0
  716. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +164 -0
  717. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +57 -0
  718. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +77 -0
  719. package/deps/rocksdb/rocksdb/third-party/gcc/ppc-asm.h +390 -0
  720. package/deps/rocksdb/rocksdb/thirdparty.inc +268 -0
  721. package/deps/rocksdb/rocksdb/tools/CMakeLists.txt +30 -0
  722. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +110 -0
  723. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/__init__.py +2 -0
  724. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +2000 -0
  725. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.sh +156 -0
  726. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +734 -0
  727. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +2307 -0
  728. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +395 -0
  729. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +721 -0
  730. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +719 -0
  731. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.cc +25 -0
  732. package/deps/rocksdb/rocksdb/tools/db_bench.cc +21 -0
  733. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +7416 -0
  734. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +328 -0
  735. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +130 -0
  736. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +297 -0
  737. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +259 -0
  738. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_dump.cc +63 -0
  739. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +62 -0
  740. package/deps/rocksdb/rocksdb/tools/io_tracer_parser.cc +25 -0
  741. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +187 -0
  742. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +120 -0
  743. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.h +40 -0
  744. package/deps/rocksdb/rocksdb/tools/ldb.cc +21 -0
  745. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3609 -0
  746. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +665 -0
  747. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +746 -0
  748. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +159 -0
  749. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +221 -0
  750. package/deps/rocksdb/rocksdb/tools/sst_dump.cc +20 -0
  751. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +427 -0
  752. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +541 -0
  753. package/deps/rocksdb/rocksdb/tools/trace_analyzer.cc +25 -0
  754. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +752 -0
  755. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +2001 -0
  756. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +292 -0
  757. package/deps/rocksdb/rocksdb/tools/write_stress.cc +305 -0
  758. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +496 -0
  759. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +294 -0
  760. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +379 -0
  761. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +229 -0
  762. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +174 -0
  763. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +215 -0
  764. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +491 -0
  765. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +195 -0
  766. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +255 -0
  767. package/deps/rocksdb/rocksdb/util/autovector.h +367 -0
  768. package/deps/rocksdb/rocksdb/util/autovector_test.cc +330 -0
  769. package/deps/rocksdb/rocksdb/util/bloom_impl.h +485 -0
  770. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1191 -0
  771. package/deps/rocksdb/rocksdb/util/build_version.cc.in +5 -0
  772. package/deps/rocksdb/rocksdb/util/build_version.h +15 -0
  773. package/deps/rocksdb/rocksdb/util/cast_util.h +20 -0
  774. package/deps/rocksdb/rocksdb/util/channel.h +67 -0
  775. package/deps/rocksdb/rocksdb/util/coding.cc +89 -0
  776. package/deps/rocksdb/rocksdb/util/coding.h +419 -0
  777. package/deps/rocksdb/rocksdb/util/coding_lean.h +101 -0
  778. package/deps/rocksdb/rocksdb/util/coding_test.cc +217 -0
  779. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +92 -0
  780. package/deps/rocksdb/rocksdb/util/comparator.cc +219 -0
  781. package/deps/rocksdb/rocksdb/util/compression.h +1529 -0
  782. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +108 -0
  783. package/deps/rocksdb/rocksdb/util/compression_context_cache.h +47 -0
  784. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.cc +67 -0
  785. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.h +67 -0
  786. package/deps/rocksdb/rocksdb/util/core_local.h +83 -0
  787. package/deps/rocksdb/rocksdb/util/crc32c.cc +1283 -0
  788. package/deps/rocksdb/rocksdb/util/crc32c.h +51 -0
  789. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +169 -0
  790. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +50 -0
  791. package/deps/rocksdb/rocksdb/util/crc32c_ppc.c +94 -0
  792. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +19 -0
  793. package/deps/rocksdb/rocksdb/util/crc32c_ppc_asm.S +756 -0
  794. package/deps/rocksdb/rocksdb/util/crc32c_ppc_constants.h +900 -0
  795. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +180 -0
  796. package/deps/rocksdb/rocksdb/util/defer.h +52 -0
  797. package/deps/rocksdb/rocksdb/util/defer_test.cc +39 -0
  798. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +68 -0
  799. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +70 -0
  800. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +214 -0
  801. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +323 -0
  802. package/deps/rocksdb/rocksdb/util/fastrange.h +112 -0
  803. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +136 -0
  804. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +98 -0
  805. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +449 -0
  806. package/deps/rocksdb/rocksdb/util/filelock_test.cc +152 -0
  807. package/deps/rocksdb/rocksdb/util/filter_bench.cc +781 -0
  808. package/deps/rocksdb/rocksdb/util/gflags_compat.h +20 -0
  809. package/deps/rocksdb/rocksdb/util/hash.cc +83 -0
  810. package/deps/rocksdb/rocksdb/util/hash.h +107 -0
  811. package/deps/rocksdb/rocksdb/util/hash_map.h +67 -0
  812. package/deps/rocksdb/rocksdb/util/hash_test.cc +593 -0
  813. package/deps/rocksdb/rocksdb/util/heap.h +166 -0
  814. package/deps/rocksdb/rocksdb/util/heap_test.cc +139 -0
  815. package/deps/rocksdb/rocksdb/util/kv_map.h +33 -0
  816. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +86 -0
  817. package/deps/rocksdb/rocksdb/util/math.h +186 -0
  818. package/deps/rocksdb/rocksdb/util/math128.h +298 -0
  819. package/deps/rocksdb/rocksdb/util/murmurhash.cc +191 -0
  820. package/deps/rocksdb/rocksdb/util/murmurhash.h +42 -0
  821. package/deps/rocksdb/rocksdb/util/mutexlock.h +186 -0
  822. package/deps/rocksdb/rocksdb/util/ppc-opcode.h +27 -0
  823. package/deps/rocksdb/rocksdb/util/random.cc +56 -0
  824. package/deps/rocksdb/rocksdb/util/random.h +186 -0
  825. package/deps/rocksdb/rocksdb/util/random_test.cc +105 -0
  826. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +340 -0
  827. package/deps/rocksdb/rocksdb/util/rate_limiter.h +113 -0
  828. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +251 -0
  829. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +151 -0
  830. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +107 -0
  831. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1201 -0
  832. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +1062 -0
  833. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +931 -0
  834. package/deps/rocksdb/rocksdb/util/set_comparator.h +22 -0
  835. package/deps/rocksdb/rocksdb/util/slice.cc +243 -0
  836. package/deps/rocksdb/rocksdb/util/slice_test.cc +163 -0
  837. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +153 -0
  838. package/deps/rocksdb/rocksdb/util/status.cc +149 -0
  839. package/deps/rocksdb/rocksdb/util/stderr_logger.h +31 -0
  840. package/deps/rocksdb/rocksdb/util/stop_watch.h +118 -0
  841. package/deps/rocksdb/rocksdb/util/string_util.cc +422 -0
  842. package/deps/rocksdb/rocksdb/util/string_util.h +144 -0
  843. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +354 -0
  844. package/deps/rocksdb/rocksdb/util/thread_local.cc +554 -0
  845. package/deps/rocksdb/rocksdb/util/thread_local.h +101 -0
  846. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +583 -0
  847. package/deps/rocksdb/rocksdb/util/thread_operation.h +121 -0
  848. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +506 -0
  849. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +112 -0
  850. package/deps/rocksdb/rocksdb/util/timer.h +331 -0
  851. package/deps/rocksdb/rocksdb/util/timer_queue.h +230 -0
  852. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +72 -0
  853. package/deps/rocksdb/rocksdb/util/timer_test.cc +399 -0
  854. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +80 -0
  855. package/deps/rocksdb/rocksdb/util/vector_iterator.h +101 -0
  856. package/deps/rocksdb/rocksdb/util/work_queue.h +148 -0
  857. package/deps/rocksdb/rocksdb/util/work_queue_test.cc +268 -0
  858. package/deps/rocksdb/rocksdb/util/xxh3p.h +1392 -0
  859. package/deps/rocksdb/rocksdb/util/xxhash.cc +1158 -0
  860. package/deps/rocksdb/rocksdb/util/xxhash.h +598 -0
  861. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +2354 -0
  862. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db_test.cc +2955 -0
  863. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +488 -0
  864. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +199 -0
  865. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +112 -0
  866. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +266 -0
  867. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +52 -0
  868. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2167 -0
  869. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +500 -0
  870. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +113 -0
  871. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +147 -0
  872. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +66 -0
  873. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2386 -0
  874. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +281 -0
  875. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h +58 -0
  876. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +314 -0
  877. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +244 -0
  878. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +47 -0
  879. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +42 -0
  880. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc +375 -0
  881. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +327 -0
  882. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc +114 -0
  883. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc +187 -0
  884. package/deps/rocksdb/rocksdb/utilities/cassandra/format.cc +390 -0
  885. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +184 -0
  886. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +67 -0
  887. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +44 -0
  888. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +75 -0
  889. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +72 -0
  890. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +43 -0
  891. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +588 -0
  892. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +82 -0
  893. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +821 -0
  894. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +37 -0
  895. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +29 -0
  896. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +27 -0
  897. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +25 -0
  898. package/deps/rocksdb/rocksdb/utilities/debug.cc +82 -0
  899. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +1497 -0
  900. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +1146 -0
  901. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +262 -0
  902. package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +223 -0
  903. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +145 -0
  904. package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +44 -0
  905. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +490 -0
  906. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +242 -0
  907. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +581 -0
  908. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +437 -0
  909. package/deps/rocksdb/rocksdb/utilities/leveldb_options/leveldb_options.cc +56 -0
  910. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +275 -0
  911. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +52 -0
  912. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.cc +59 -0
  913. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +39 -0
  914. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +77 -0
  915. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +83 -0
  916. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +97 -0
  917. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +38 -0
  918. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +59 -0
  919. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +31 -0
  920. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +117 -0
  921. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +49 -0
  922. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +598 -0
  923. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +69 -0
  924. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +55 -0
  925. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +87 -0
  926. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +174 -0
  927. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +168 -0
  928. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +431 -0
  929. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +159 -0
  930. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +655 -0
  931. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +425 -0
  932. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +156 -0
  933. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +609 -0
  934. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +296 -0
  935. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file_buffer.h +127 -0
  936. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc +86 -0
  937. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +125 -0
  938. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h +238 -0
  939. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_bench.cc +308 -0
  940. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +168 -0
  941. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +160 -0
  942. package/deps/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h +174 -0
  943. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +360 -0
  944. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +456 -0
  945. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h +286 -0
  946. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc +167 -0
  947. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +339 -0
  948. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_util.h +67 -0
  949. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +140 -0
  950. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +142 -0
  951. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +285 -0
  952. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.h +231 -0
  953. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +494 -0
  954. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +356 -0
  955. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +224 -0
  956. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +122 -0
  957. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h +72 -0
  958. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +244 -0
  959. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +125 -0
  960. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +48 -0
  961. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +29 -0
  962. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h +82 -0
  963. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_tracker.h +209 -0
  964. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +720 -0
  965. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +223 -0
  966. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +181 -0
  967. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +319 -0
  968. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc +270 -0
  969. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.h +99 -0
  970. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +30 -0
  971. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +306 -0
  972. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.AGPLv3 +661 -0
  973. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.APACHEv2 +174 -0
  974. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.GPLv2 +339 -0
  975. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h +76 -0
  976. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h +138 -0
  977. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h +102 -0
  978. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc +139 -0
  979. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h +174 -0
  980. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc +222 -0
  981. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h +141 -0
  982. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc +525 -0
  983. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h +253 -0
  984. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +1007 -0
  985. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +560 -0
  986. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc +527 -0
  987. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc +265 -0
  988. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h +178 -0
  989. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc +520 -0
  990. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h +302 -0
  991. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc +120 -0
  992. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h +92 -0
  993. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc +213 -0
  994. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h +124 -0
  995. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/memory.h +215 -0
  996. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h +39 -0
  997. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h +130 -0
  998. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +82 -0
  999. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h +286 -0
  1000. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h +87 -0
  1001. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h +520 -0
  1002. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h +179 -0
  1003. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +172 -0
  1004. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h +27 -0
  1005. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc +132 -0
  1006. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc +153 -0
  1007. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h +98 -0
  1008. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +144 -0
  1009. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc +201 -0
  1010. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h +141 -0
  1011. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h +794 -0
  1012. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h +1295 -0
  1013. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h +165 -0
  1014. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h +76 -0
  1015. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +479 -0
  1016. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +130 -0
  1017. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc +156 -0
  1018. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h +146 -0
  1019. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +196 -0
  1020. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h +101 -0
  1021. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +111 -0
  1022. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +87 -0
  1023. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1418 -0
  1024. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +752 -0
  1025. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +232 -0
  1026. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +628 -0
  1027. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +228 -0
  1028. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +49 -0
  1029. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +678 -0
  1030. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +373 -0
  1031. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +135 -0
  1032. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h +26 -0
  1033. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +6350 -0
  1034. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +522 -0
  1035. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +188 -0
  1036. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +80 -0
  1037. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +3531 -0
  1038. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +483 -0
  1039. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +119 -0
  1040. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +999 -0
  1041. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +1109 -0
  1042. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +786 -0
  1043. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +1039 -0
  1044. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +341 -0
  1045. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +470 -0
  1046. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +108 -0
  1047. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +332 -0
  1048. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +353 -0
  1049. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +703 -0
  1050. package/deps/rocksdb/rocksdb/utilities/util_merge_operators_test.cc +99 -0
  1051. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +617 -0
  1052. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +345 -0
  1053. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +569 -0
  1054. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1867 -0
  1055. package/deps/rocksdb/rocksdb.gyp +475 -0
  1056. package/deps/snappy/freebsd/config.h +135 -0
  1057. package/deps/snappy/freebsd/snappy-stubs-public.h +100 -0
  1058. package/deps/snappy/linux/config.h +135 -0
  1059. package/deps/snappy/linux/snappy-stubs-public.h +100 -0
  1060. package/deps/snappy/mac/config.h +137 -0
  1061. package/deps/snappy/mac/snappy-stubs-public.h +100 -0
  1062. package/deps/snappy/openbsd/config.h +135 -0
  1063. package/deps/snappy/openbsd/snappy-stubs-public.h +100 -0
  1064. package/deps/snappy/snappy-1.1.7/COPYING +54 -0
  1065. package/deps/snappy/snappy-1.1.7/cmake/SnappyConfig.cmake +1 -0
  1066. package/deps/snappy/snappy-1.1.7/cmake/config.h.in +62 -0
  1067. package/deps/snappy/snappy-1.1.7/snappy-c.cc +90 -0
  1068. package/deps/snappy/snappy-1.1.7/snappy-c.h +138 -0
  1069. package/deps/snappy/snappy-1.1.7/snappy-internal.h +224 -0
  1070. package/deps/snappy/snappy-1.1.7/snappy-sinksource.cc +104 -0
  1071. package/deps/snappy/snappy-1.1.7/snappy-sinksource.h +182 -0
  1072. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.cc +42 -0
  1073. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.h +561 -0
  1074. package/deps/snappy/snappy-1.1.7/snappy-stubs-public.h.in +94 -0
  1075. package/deps/snappy/snappy-1.1.7/snappy-test.cc +612 -0
  1076. package/deps/snappy/snappy-1.1.7/snappy-test.h +573 -0
  1077. package/deps/snappy/snappy-1.1.7/snappy.cc +1515 -0
  1078. package/deps/snappy/snappy-1.1.7/snappy.h +203 -0
  1079. package/deps/snappy/snappy-1.1.7/snappy_unittest.cc +1410 -0
  1080. package/deps/snappy/snappy.gyp +90 -0
  1081. package/deps/snappy/solaris/config.h +135 -0
  1082. package/deps/snappy/solaris/snappy-stubs-public.h +100 -0
  1083. package/deps/snappy/win32/config.h +29 -0
  1084. package/deps/snappy/win32/snappy-stubs-public.h +100 -0
  1085. package/iterator.js +55 -0
  1086. package/leveldown.js +113 -0
  1087. package/package-lock.json +23687 -0
  1088. package/package.json +70 -0
@@ -0,0 +1,3421 @@
1
+ // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+ //
6
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
+ // Use of this source code is governed by a BSD-style license that can be
8
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
+ #include <cinttypes>
10
+
11
+ #include "db/builder.h"
12
+ #include "db/db_impl/db_impl.h"
13
+ #include "db/error_handler.h"
14
+ #include "db/event_helpers.h"
15
+ #include "file/sst_file_manager_impl.h"
16
+ #include "monitoring/iostats_context_imp.h"
17
+ #include "monitoring/perf_context_imp.h"
18
+ #include "monitoring/thread_status_updater.h"
19
+ #include "monitoring/thread_status_util.h"
20
+ #include "test_util/sync_point.h"
21
+ #include "util/cast_util.h"
22
+ #include "util/concurrent_task_limiter_impl.h"
23
+
24
+ namespace ROCKSDB_NAMESPACE {
25
+
26
+ bool DBImpl::EnoughRoomForCompaction(
27
+ ColumnFamilyData* cfd, const std::vector<CompactionInputFiles>& inputs,
28
+ bool* sfm_reserved_compact_space, LogBuffer* log_buffer) {
29
+ // Check if we have enough room to do the compaction
30
+ bool enough_room = true;
31
+ #ifndef ROCKSDB_LITE
32
+ auto sfm = static_cast<SstFileManagerImpl*>(
33
+ immutable_db_options_.sst_file_manager.get());
34
+ if (sfm) {
35
+ // Pass the current bg_error_ to SFM so it can decide what checks to
36
+ // perform. If this DB instance hasn't seen any error yet, the SFM can be
37
+ // optimistic and not do disk space checks
38
+ Status bg_error = error_handler_.GetBGError();
39
+ enough_room = sfm->EnoughRoomForCompaction(cfd, inputs, bg_error);
40
+ bg_error.PermitUncheckedError(); // bg_error is just a copy of the Status
41
+ // from the error_handler_
42
+ if (enough_room) {
43
+ *sfm_reserved_compact_space = true;
44
+ }
45
+ }
46
+ #else
47
+ (void)cfd;
48
+ (void)inputs;
49
+ (void)sfm_reserved_compact_space;
50
+ #endif // ROCKSDB_LITE
51
+ if (!enough_room) {
52
+ // Just in case tests want to change the value of enough_room
53
+ TEST_SYNC_POINT_CALLBACK(
54
+ "DBImpl::BackgroundCompaction():CancelledCompaction", &enough_room);
55
+ ROCKS_LOG_BUFFER(log_buffer,
56
+ "Cancelled compaction because not enough room");
57
+ RecordTick(stats_, COMPACTION_CANCELLED, 1);
58
+ }
59
+ return enough_room;
60
+ }
61
+
62
+ bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force,
63
+ std::unique_ptr<TaskLimiterToken>* token,
64
+ LogBuffer* log_buffer) {
65
+ assert(*token == nullptr);
66
+ auto limiter = static_cast<ConcurrentTaskLimiterImpl*>(
67
+ cfd->ioptions()->compaction_thread_limiter.get());
68
+ if (limiter == nullptr) {
69
+ return true;
70
+ }
71
+ *token = limiter->GetToken(force);
72
+ if (*token != nullptr) {
73
+ ROCKS_LOG_BUFFER(log_buffer,
74
+ "Thread limiter [%s] increase [%s] compaction task, "
75
+ "force: %s, tasks after: %d",
76
+ limiter->GetName().c_str(), cfd->GetName().c_str(),
77
+ force ? "true" : "false", limiter->GetOutstandingTask());
78
+ return true;
79
+ }
80
+ return false;
81
+ }
82
+
83
+ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
84
+ TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
85
+ mutex_.AssertHeld();
86
+ autovector<log::Writer*, 1> logs_to_sync;
87
+ uint64_t current_log_number = logfile_number_;
88
+ while (logs_.front().number < current_log_number &&
89
+ logs_.front().getting_synced) {
90
+ log_sync_cv_.Wait();
91
+ }
92
+ for (auto it = logs_.begin();
93
+ it != logs_.end() && it->number < current_log_number; ++it) {
94
+ auto& log = *it;
95
+ assert(!log.getting_synced);
96
+ log.getting_synced = true;
97
+ logs_to_sync.push_back(log.writer);
98
+ }
99
+
100
+ IOStatus io_s;
101
+ if (!logs_to_sync.empty()) {
102
+ mutex_.Unlock();
103
+
104
+ for (log::Writer* log : logs_to_sync) {
105
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
106
+ "[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
107
+ log->get_log_number());
108
+ io_s = log->file()->Sync(immutable_db_options_.use_fsync);
109
+ if (!io_s.ok()) {
110
+ break;
111
+ }
112
+
113
+ if (immutable_db_options_.recycle_log_file_num > 0) {
114
+ io_s = log->Close();
115
+ if (!io_s.ok()) {
116
+ break;
117
+ }
118
+ }
119
+ }
120
+ if (io_s.ok()) {
121
+ io_s = directories_.GetWalDir()->Fsync(IOOptions(), nullptr);
122
+ }
123
+
124
+ mutex_.Lock();
125
+
126
+ // "number <= current_log_number - 1" is equivalent to
127
+ // "number < current_log_number".
128
+ if (io_s.ok()) {
129
+ io_s = status_to_io_status(MarkLogsSynced(current_log_number - 1, true));
130
+ } else {
131
+ MarkLogsNotSynced(current_log_number - 1);
132
+ }
133
+ if (!io_s.ok()) {
134
+ if (total_log_size_ > 0) {
135
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
136
+ } else {
137
+ // If the WAL is empty, we use different error reason
138
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
139
+ }
140
+ TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
141
+ return io_s;
142
+ }
143
+ }
144
+ return io_s;
145
+ }
146
+
147
+ Status DBImpl::FlushMemTableToOutputFile(
148
+ ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
149
+ bool* made_progress, JobContext* job_context,
150
+ SuperVersionContext* superversion_context,
151
+ std::vector<SequenceNumber>& snapshot_seqs,
152
+ SequenceNumber earliest_write_conflict_snapshot,
153
+ SnapshotChecker* snapshot_checker, LogBuffer* log_buffer,
154
+ Env::Priority thread_pri) {
155
+ mutex_.AssertHeld();
156
+ assert(cfd);
157
+ assert(cfd->imm()->NumNotFlushed() != 0);
158
+ assert(cfd->imm()->IsFlushPending());
159
+
160
+ FlushJob flush_job(
161
+ dbname_, cfd, immutable_db_options_, mutable_cf_options,
162
+ port::kMaxUint64 /* memtable_id */, file_options_for_compaction_,
163
+ versions_.get(), &mutex_, &shutting_down_, snapshot_seqs,
164
+ earliest_write_conflict_snapshot, snapshot_checker, job_context,
165
+ log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U),
166
+ GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
167
+ &event_logger_, mutable_cf_options.report_bg_io_stats,
168
+ true /* sync_output_directory */, true /* write_manifest */, thread_pri,
169
+ io_tracer_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow());
170
+ FileMetaData file_meta;
171
+
172
+ TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
173
+ flush_job.PickMemTable();
174
+ TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables");
175
+
176
+ #ifndef ROCKSDB_LITE
177
+ // may temporarily unlock and lock the mutex.
178
+ NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id);
179
+ #endif // ROCKSDB_LITE
180
+
181
+ Status s;
182
+ IOStatus io_s = IOStatus::OK();
183
+ if (logfile_number_ > 0 &&
184
+ versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1) {
185
+ // If there are more than one column families, we need to make sure that
186
+ // all the log files except the most recent one are synced. Otherwise if
187
+ // the host crashes after flushing and before WAL is persistent, the
188
+ // flushed SST may contain data from write batches whose updates to
189
+ // other column families are missing.
190
+ // SyncClosedLogs() may unlock and re-lock the db_mutex.
191
+ io_s = SyncClosedLogs(job_context);
192
+ s = io_s;
193
+ if (!io_s.ok() && !io_s.IsShutdownInProgress() &&
194
+ !io_s.IsColumnFamilyDropped()) {
195
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
196
+ }
197
+ } else {
198
+ TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip");
199
+ }
200
+
201
+ // Within flush_job.Run, rocksdb may call event listener to notify
202
+ // file creation and deletion.
203
+ //
204
+ // Note that flush_job.Run will unlock and lock the db_mutex,
205
+ // and EventListener callback will be called when the db_mutex
206
+ // is unlocked by the current thread.
207
+ if (s.ok()) {
208
+ s = flush_job.Run(&logs_with_prep_tracker_, &file_meta);
209
+ } else {
210
+ flush_job.Cancel();
211
+ }
212
+ if (io_s.ok()) {
213
+ io_s = flush_job.io_status();
214
+ }
215
+
216
+ if (s.ok()) {
217
+ InstallSuperVersionAndScheduleWork(cfd, superversion_context,
218
+ mutable_cf_options);
219
+ if (made_progress) {
220
+ *made_progress = true;
221
+ }
222
+
223
+ const std::string& column_family_name = cfd->GetName();
224
+
225
+ Version* const current = cfd->current();
226
+ assert(current);
227
+
228
+ const VersionStorageInfo* const storage_info = current->storage_info();
229
+ assert(storage_info);
230
+
231
+ VersionStorageInfo::LevelSummaryStorage tmp;
232
+ ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
233
+ column_family_name.c_str(),
234
+ storage_info->LevelSummary(&tmp));
235
+
236
+ const auto& blob_files = storage_info->GetBlobFiles();
237
+ if (!blob_files.empty()) {
238
+ ROCKS_LOG_BUFFER(log_buffer,
239
+ "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64
240
+ "\n",
241
+ column_family_name.c_str(), blob_files.begin()->first,
242
+ blob_files.rbegin()->first);
243
+ }
244
+ }
245
+
246
+ if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) {
247
+ if (!io_s.ok() && !io_s.IsShutdownInProgress() &&
248
+ !io_s.IsColumnFamilyDropped()) {
249
+ // Error while writing to MANIFEST.
250
+ // In fact, versions_->io_status() can also be the result of renaming
251
+ // CURRENT file. With current code, it's just difficult to tell. So just
252
+ // be pessimistic and try write to a new MANIFEST.
253
+ // TODO: distinguish between MANIFEST write and CURRENT renaming
254
+ if (!versions_->io_status().ok()) {
255
+ if (total_log_size_ > 0) {
256
+ // If the WAL is empty, we use different error reason
257
+ error_handler_.SetBGError(io_s,
258
+ BackgroundErrorReason::kManifestWrite);
259
+ } else {
260
+ error_handler_.SetBGError(io_s,
261
+ BackgroundErrorReason::kManifestWriteNoWAL);
262
+ }
263
+ } else if (total_log_size_ > 0) {
264
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
265
+ } else {
266
+ // If the WAL is empty, we use different error reason
267
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
268
+ }
269
+ } else {
270
+ Status new_bg_error = s;
271
+ error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
272
+ }
273
+ } else {
274
+ // If we got here, then we decided not to care about the i_os status (either
275
+ // from never needing it or ignoring the flush job status
276
+ io_s.PermitUncheckedError();
277
+ }
278
+ if (s.ok()) {
279
+ #ifndef ROCKSDB_LITE
280
+ // may temporarily unlock and lock the mutex.
281
+ NotifyOnFlushCompleted(cfd, mutable_cf_options,
282
+ flush_job.GetCommittedFlushJobsInfo());
283
+ auto sfm = static_cast<SstFileManagerImpl*>(
284
+ immutable_db_options_.sst_file_manager.get());
285
+ if (sfm) {
286
+ // Notify sst_file_manager that a new file was added
287
+ std::string file_path = MakeTableFileName(
288
+ cfd->ioptions()->cf_paths[0].path, file_meta.fd.GetNumber());
289
+ // TODO (PR7798). We should only add the file to the FileManager if it
290
+ // exists. Otherwise, some tests may fail. Ignore the error in the
291
+ // interim.
292
+ sfm->OnAddFile(file_path).PermitUncheckedError();
293
+ if (sfm->IsMaxAllowedSpaceReached()) {
294
+ Status new_bg_error =
295
+ Status::SpaceLimit("Max allowed space was reached");
296
+ TEST_SYNC_POINT_CALLBACK(
297
+ "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached",
298
+ &new_bg_error);
299
+ error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
300
+ }
301
+ }
302
+ #endif // ROCKSDB_LITE
303
+ }
304
+ TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:Finish");
305
+ return s;
306
+ }
307
+
308
+ Status DBImpl::FlushMemTablesToOutputFiles(
309
+ const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
310
+ JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) {
311
+ if (immutable_db_options_.atomic_flush) {
312
+ return AtomicFlushMemTablesToOutputFiles(
313
+ bg_flush_args, made_progress, job_context, log_buffer, thread_pri);
314
+ }
315
+ assert(bg_flush_args.size() == 1);
316
+ std::vector<SequenceNumber> snapshot_seqs;
317
+ SequenceNumber earliest_write_conflict_snapshot;
318
+ SnapshotChecker* snapshot_checker;
319
+ GetSnapshotContext(job_context, &snapshot_seqs,
320
+ &earliest_write_conflict_snapshot, &snapshot_checker);
321
+ const auto& bg_flush_arg = bg_flush_args[0];
322
+ ColumnFamilyData* cfd = bg_flush_arg.cfd_;
323
+ MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
324
+ SuperVersionContext* superversion_context =
325
+ bg_flush_arg.superversion_context_;
326
+ Status s = FlushMemTableToOutputFile(
327
+ cfd, mutable_cf_options, made_progress, job_context, superversion_context,
328
+ snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
329
+ log_buffer, thread_pri);
330
+ return s;
331
+ }
332
+
333
+ /*
334
+ * Atomically flushes multiple column families.
335
+ *
336
+ * For each column family, all memtables with ID smaller than or equal to the
337
+ * ID specified in bg_flush_args will be flushed. Only after all column
338
+ * families finish flush will this function commit to MANIFEST. If any of the
339
+ * column families are not flushed successfully, this function does not have
340
+ * any side-effect on the state of the database.
341
+ */
342
+ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
343
+ const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
344
+ JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) {
345
+ mutex_.AssertHeld();
346
+
347
+ autovector<ColumnFamilyData*> cfds;
348
+ for (const auto& arg : bg_flush_args) {
349
+ cfds.emplace_back(arg.cfd_);
350
+ }
351
+
352
+ #ifndef NDEBUG
353
+ for (const auto cfd : cfds) {
354
+ assert(cfd->imm()->NumNotFlushed() != 0);
355
+ assert(cfd->imm()->IsFlushPending());
356
+ }
357
+ #endif /* !NDEBUG */
358
+
359
+ std::vector<SequenceNumber> snapshot_seqs;
360
+ SequenceNumber earliest_write_conflict_snapshot;
361
+ SnapshotChecker* snapshot_checker;
362
+ GetSnapshotContext(job_context, &snapshot_seqs,
363
+ &earliest_write_conflict_snapshot, &snapshot_checker);
364
+
365
+ autovector<FSDirectory*> distinct_output_dirs;
366
+ autovector<std::string> distinct_output_dir_paths;
367
+ std::vector<std::unique_ptr<FlushJob>> jobs;
368
+ std::vector<MutableCFOptions> all_mutable_cf_options;
369
+ int num_cfs = static_cast<int>(cfds.size());
370
+ all_mutable_cf_options.reserve(num_cfs);
371
+ for (int i = 0; i < num_cfs; ++i) {
372
+ auto cfd = cfds[i];
373
+ FSDirectory* data_dir = GetDataDir(cfd, 0U);
374
+ const std::string& curr_path = cfd->ioptions()->cf_paths[0].path;
375
+
376
+ // Add to distinct output directories if eligible. Use linear search. Since
377
+ // the number of elements in the vector is not large, performance should be
378
+ // tolerable.
379
+ bool found = false;
380
+ for (const auto& path : distinct_output_dir_paths) {
381
+ if (path == curr_path) {
382
+ found = true;
383
+ break;
384
+ }
385
+ }
386
+ if (!found) {
387
+ distinct_output_dir_paths.emplace_back(curr_path);
388
+ distinct_output_dirs.emplace_back(data_dir);
389
+ }
390
+
391
+ all_mutable_cf_options.emplace_back(*cfd->GetLatestMutableCFOptions());
392
+ const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back();
393
+ uint64_t max_memtable_id = bg_flush_args[i].max_memtable_id_;
394
+ jobs.emplace_back(new FlushJob(
395
+ dbname_, cfd, immutable_db_options_, mutable_cf_options,
396
+ max_memtable_id, file_options_for_compaction_, versions_.get(), &mutex_,
397
+ &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot,
398
+ snapshot_checker, job_context, log_buffer, directories_.GetDbDir(),
399
+ data_dir, GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
400
+ stats_, &event_logger_, mutable_cf_options.report_bg_io_stats,
401
+ false /* sync_output_directory */, false /* write_manifest */,
402
+ thread_pri, io_tracer_, db_id_, db_session_id_,
403
+ cfd->GetFullHistoryTsLow()));
404
+ jobs.back()->PickMemTable();
405
+ }
406
+
407
+ std::vector<FileMetaData> file_meta(num_cfs);
408
+ Status s;
409
+ IOStatus io_s;
410
+ assert(num_cfs == static_cast<int>(jobs.size()));
411
+
412
+ #ifndef ROCKSDB_LITE
413
+ for (int i = 0; i != num_cfs; ++i) {
414
+ const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.at(i);
415
+ // may temporarily unlock and lock the mutex.
416
+ NotifyOnFlushBegin(cfds[i], &file_meta[i], mutable_cf_options,
417
+ job_context->job_id);
418
+ }
419
+ #endif /* !ROCKSDB_LITE */
420
+
421
+ if (logfile_number_ > 0) {
422
+ // TODO (yanqin) investigate whether we should sync the closed logs for
423
+ // single column family case.
424
+ io_s = SyncClosedLogs(job_context);
425
+ s = io_s;
426
+ }
427
+
428
+ // exec_status stores the execution status of flush_jobs as
429
+ // <bool /* executed */, Status /* status code */>
430
+ autovector<std::pair<bool, Status>> exec_status;
431
+ autovector<IOStatus> io_status;
432
+ for (int i = 0; i != num_cfs; ++i) {
433
+ // Initially all jobs are not executed, with status OK.
434
+ exec_status.emplace_back(false, Status::OK());
435
+ io_status.emplace_back(IOStatus::OK());
436
+ }
437
+
438
+ if (s.ok()) {
439
+ // TODO (yanqin): parallelize jobs with threads.
440
+ for (int i = 1; i != num_cfs; ++i) {
441
+ exec_status[i].second =
442
+ jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i]);
443
+ exec_status[i].first = true;
444
+ io_status[i] = jobs[i]->io_status();
445
+ }
446
+ if (num_cfs > 1) {
447
+ TEST_SYNC_POINT(
448
+ "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:1");
449
+ TEST_SYNC_POINT(
450
+ "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2");
451
+ }
452
+ assert(exec_status.size() > 0);
453
+ assert(!file_meta.empty());
454
+ exec_status[0].second =
455
+ jobs[0]->Run(&logs_with_prep_tracker_, &file_meta[0]);
456
+ exec_status[0].first = true;
457
+ io_status[0] = jobs[0]->io_status();
458
+
459
+ Status error_status;
460
+ for (const auto& e : exec_status) {
461
+ if (!e.second.ok()) {
462
+ s = e.second;
463
+ if (!e.second.IsShutdownInProgress() &&
464
+ !e.second.IsColumnFamilyDropped()) {
465
+ // If a flush job did not return OK, and the CF is not dropped, and
466
+ // the DB is not shutting down, then we have to return this result to
467
+ // caller later.
468
+ error_status = e.second;
469
+ }
470
+ }
471
+ }
472
+
473
+ s = error_status.ok() ? s : error_status;
474
+ }
475
+
476
+ if (io_s.ok()) {
477
+ IOStatus io_error = IOStatus::OK();
478
+ for (int i = 0; i != static_cast<int>(io_status.size()); i++) {
479
+ if (!io_status[i].ok() && !io_status[i].IsShutdownInProgress() &&
480
+ !io_status[i].IsColumnFamilyDropped()) {
481
+ io_error = io_status[i];
482
+ }
483
+ }
484
+ io_s = io_error;
485
+ if (s.ok() && !io_s.ok()) {
486
+ s = io_s;
487
+ }
488
+ }
489
+
490
+ if (s.IsColumnFamilyDropped()) {
491
+ s = Status::OK();
492
+ }
493
+
494
+ if (s.ok() || s.IsShutdownInProgress()) {
495
+ // Sync on all distinct output directories.
496
+ for (auto dir : distinct_output_dirs) {
497
+ if (dir != nullptr) {
498
+ Status error_status = dir->Fsync(IOOptions(), nullptr);
499
+ if (!error_status.ok()) {
500
+ s = error_status;
501
+ break;
502
+ }
503
+ }
504
+ }
505
+ } else {
506
+ // Need to undo atomic flush if something went wrong, i.e. s is not OK and
507
+ // it is not because of CF drop.
508
+ // Have to cancel the flush jobs that have NOT executed because we need to
509
+ // unref the versions.
510
+ for (int i = 0; i != num_cfs; ++i) {
511
+ if (!exec_status[i].first) {
512
+ jobs[i]->Cancel();
513
+ }
514
+ }
515
+ for (int i = 0; i != num_cfs; ++i) {
516
+ if (exec_status[i].first && exec_status[i].second.ok()) {
517
+ auto& mems = jobs[i]->GetMemTables();
518
+ cfds[i]->imm()->RollbackMemtableFlush(mems,
519
+ file_meta[i].fd.GetNumber());
520
+ }
521
+ }
522
+ }
523
+
524
+ if (s.ok()) {
525
+ auto wait_to_install_func = [&]() {
526
+ bool ready = true;
527
+ for (size_t i = 0; i != cfds.size(); ++i) {
528
+ const auto& mems = jobs[i]->GetMemTables();
529
+ if (cfds[i]->IsDropped()) {
530
+ // If the column family is dropped, then do not wait.
531
+ continue;
532
+ } else if (!mems.empty() &&
533
+ cfds[i]->imm()->GetEarliestMemTableID() < mems[0]->GetID()) {
534
+ // If a flush job needs to install the flush result for mems and
535
+ // mems[0] is not the earliest memtable, it means another thread must
536
+ // be installing flush results for the same column family, then the
537
+ // current thread needs to wait.
538
+ ready = false;
539
+ break;
540
+ } else if (mems.empty() && cfds[i]->imm()->GetEarliestMemTableID() <=
541
+ bg_flush_args[i].max_memtable_id_) {
542
+ // If a flush job does not need to install flush results, then it has
543
+ // to wait until all memtables up to max_memtable_id_ (inclusive) are
544
+ // installed.
545
+ ready = false;
546
+ break;
547
+ }
548
+ }
549
+ return ready;
550
+ };
551
+
552
+ bool resuming_from_bg_err = error_handler_.IsDBStopped();
553
+ while ((!error_handler_.IsDBStopped() ||
554
+ error_handler_.GetRecoveryError().ok()) &&
555
+ !wait_to_install_func()) {
556
+ atomic_flush_install_cv_.Wait();
557
+ }
558
+
559
+ s = resuming_from_bg_err ? error_handler_.GetRecoveryError()
560
+ : error_handler_.GetBGError();
561
+ }
562
+
563
+ if (s.ok()) {
564
+ autovector<ColumnFamilyData*> tmp_cfds;
565
+ autovector<const autovector<MemTable*>*> mems_list;
566
+ autovector<const MutableCFOptions*> mutable_cf_options_list;
567
+ autovector<FileMetaData*> tmp_file_meta;
568
+ for (int i = 0; i != num_cfs; ++i) {
569
+ const auto& mems = jobs[i]->GetMemTables();
570
+ if (!cfds[i]->IsDropped() && !mems.empty()) {
571
+ tmp_cfds.emplace_back(cfds[i]);
572
+ mems_list.emplace_back(&mems);
573
+ mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]);
574
+ tmp_file_meta.emplace_back(&file_meta[i]);
575
+ }
576
+ }
577
+
578
+ s = InstallMemtableAtomicFlushResults(
579
+ nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list,
580
+ versions_.get(), &logs_with_prep_tracker_, &mutex_, tmp_file_meta,
581
+ &job_context->memtables_to_free, directories_.GetDbDir(), log_buffer);
582
+ }
583
+
584
+ if (s.ok()) {
585
+ assert(num_cfs ==
586
+ static_cast<int>(job_context->superversion_contexts.size()));
587
+ for (int i = 0; i != num_cfs; ++i) {
588
+ assert(cfds[i]);
589
+
590
+ if (cfds[i]->IsDropped()) {
591
+ continue;
592
+ }
593
+ InstallSuperVersionAndScheduleWork(cfds[i],
594
+ &job_context->superversion_contexts[i],
595
+ all_mutable_cf_options[i]);
596
+
597
+ const std::string& column_family_name = cfds[i]->GetName();
598
+
599
+ Version* const current = cfds[i]->current();
600
+ assert(current);
601
+
602
+ const VersionStorageInfo* const storage_info = current->storage_info();
603
+ assert(storage_info);
604
+
605
+ VersionStorageInfo::LevelSummaryStorage tmp;
606
+ ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
607
+ column_family_name.c_str(),
608
+ storage_info->LevelSummary(&tmp));
609
+
610
+ const auto& blob_files = storage_info->GetBlobFiles();
611
+ if (!blob_files.empty()) {
612
+ ROCKS_LOG_BUFFER(log_buffer,
613
+ "[%s] Blob file summary: head=%" PRIu64
614
+ ", tail=%" PRIu64 "\n",
615
+ column_family_name.c_str(), blob_files.begin()->first,
616
+ blob_files.rbegin()->first);
617
+ }
618
+ }
619
+ if (made_progress) {
620
+ *made_progress = true;
621
+ }
622
+ #ifndef ROCKSDB_LITE
623
+ auto sfm = static_cast<SstFileManagerImpl*>(
624
+ immutable_db_options_.sst_file_manager.get());
625
+ assert(all_mutable_cf_options.size() == static_cast<size_t>(num_cfs));
626
+ for (int i = 0; s.ok() && i != num_cfs; ++i) {
627
+ if (cfds[i]->IsDropped()) {
628
+ continue;
629
+ }
630
+ NotifyOnFlushCompleted(cfds[i], all_mutable_cf_options[i],
631
+ jobs[i]->GetCommittedFlushJobsInfo());
632
+ if (sfm) {
633
+ std::string file_path = MakeTableFileName(
634
+ cfds[i]->ioptions()->cf_paths[0].path, file_meta[i].fd.GetNumber());
635
+ // TODO (PR7798). We should only add the file to the FileManager if it
636
+ // exists. Otherwise, some tests may fail. Ignore the error in the
637
+ // interim.
638
+ sfm->OnAddFile(file_path).PermitUncheckedError();
639
+ if (sfm->IsMaxAllowedSpaceReached() &&
640
+ error_handler_.GetBGError().ok()) {
641
+ Status new_bg_error =
642
+ Status::SpaceLimit("Max allowed space was reached");
643
+ error_handler_.SetBGError(new_bg_error,
644
+ BackgroundErrorReason::kFlush);
645
+ }
646
+ }
647
+ }
648
+ #endif // ROCKSDB_LITE
649
+ }
650
+
651
+ // Need to undo atomic flush if something went wrong, i.e. s is not OK and
652
+ // it is not because of CF drop.
653
+ if (!s.ok() && !s.IsColumnFamilyDropped()) {
654
+ if (!io_s.ok() && !io_s.IsColumnFamilyDropped()) {
655
+ // Error while writing to MANIFEST.
656
+ // In fact, versions_->io_status() can also be the result of renaming
657
+ // CURRENT file. With current code, it's just difficult to tell. So just
658
+ // be pessimistic and try write to a new MANIFEST.
659
+ // TODO: distinguish between MANIFEST write and CURRENT renaming
660
+ if (!versions_->io_status().ok()) {
661
+ if (total_log_size_ > 0) {
662
+ // If the WAL is empty, we use different error reason
663
+ error_handler_.SetBGError(io_s,
664
+ BackgroundErrorReason::kManifestWrite);
665
+ } else {
666
+ error_handler_.SetBGError(io_s,
667
+ BackgroundErrorReason::kManifestWriteNoWAL);
668
+ }
669
+ } else if (total_log_size_ > 0) {
670
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
671
+ } else {
672
+ // If the WAL is empty, we use different error reason
673
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
674
+ }
675
+ } else {
676
+ Status new_bg_error = s;
677
+ error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
678
+ }
679
+ }
680
+
681
+ return s;
682
+ }
683
+
684
+ void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
685
+ const MutableCFOptions& mutable_cf_options,
686
+ int job_id) {
687
+ #ifndef ROCKSDB_LITE
688
+ if (immutable_db_options_.listeners.size() == 0U) {
689
+ return;
690
+ }
691
+ mutex_.AssertHeld();
692
+ if (shutting_down_.load(std::memory_order_acquire)) {
693
+ return;
694
+ }
695
+ bool triggered_writes_slowdown =
696
+ (cfd->current()->storage_info()->NumLevelFiles(0) >=
697
+ mutable_cf_options.level0_slowdown_writes_trigger);
698
+ bool triggered_writes_stop =
699
+ (cfd->current()->storage_info()->NumLevelFiles(0) >=
700
+ mutable_cf_options.level0_stop_writes_trigger);
701
+ // release lock while notifying events
702
+ mutex_.Unlock();
703
+ {
704
+ FlushJobInfo info{};
705
+ info.cf_id = cfd->GetID();
706
+ info.cf_name = cfd->GetName();
707
+ // TODO(yhchiang): make db_paths dynamic in case flush does not
708
+ // go to L0 in the future.
709
+ const uint64_t file_number = file_meta->fd.GetNumber();
710
+ info.file_path =
711
+ MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_number);
712
+ info.file_number = file_number;
713
+ info.thread_id = env_->GetThreadID();
714
+ info.job_id = job_id;
715
+ info.triggered_writes_slowdown = triggered_writes_slowdown;
716
+ info.triggered_writes_stop = triggered_writes_stop;
717
+ info.smallest_seqno = file_meta->fd.smallest_seqno;
718
+ info.largest_seqno = file_meta->fd.largest_seqno;
719
+ info.flush_reason = cfd->GetFlushReason();
720
+ for (auto listener : immutable_db_options_.listeners) {
721
+ listener->OnFlushBegin(this, info);
722
+ }
723
+ }
724
+ mutex_.Lock();
725
+ // no need to signal bg_cv_ as it will be signaled at the end of the
726
+ // flush process.
727
+ #else
728
+ (void)cfd;
729
+ (void)file_meta;
730
+ (void)mutable_cf_options;
731
+ (void)job_id;
732
+ #endif // ROCKSDB_LITE
733
+ }
734
+
735
+ void DBImpl::NotifyOnFlushCompleted(
736
+ ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
737
+ std::list<std::unique_ptr<FlushJobInfo>>* flush_jobs_info) {
738
+ #ifndef ROCKSDB_LITE
739
+ assert(flush_jobs_info != nullptr);
740
+ if (immutable_db_options_.listeners.size() == 0U) {
741
+ return;
742
+ }
743
+ mutex_.AssertHeld();
744
+ if (shutting_down_.load(std::memory_order_acquire)) {
745
+ return;
746
+ }
747
+ bool triggered_writes_slowdown =
748
+ (cfd->current()->storage_info()->NumLevelFiles(0) >=
749
+ mutable_cf_options.level0_slowdown_writes_trigger);
750
+ bool triggered_writes_stop =
751
+ (cfd->current()->storage_info()->NumLevelFiles(0) >=
752
+ mutable_cf_options.level0_stop_writes_trigger);
753
+ // release lock while notifying events
754
+ mutex_.Unlock();
755
+ {
756
+ for (auto& info : *flush_jobs_info) {
757
+ info->triggered_writes_slowdown = triggered_writes_slowdown;
758
+ info->triggered_writes_stop = triggered_writes_stop;
759
+ for (auto listener : immutable_db_options_.listeners) {
760
+ listener->OnFlushCompleted(this, *info);
761
+ }
762
+ }
763
+ flush_jobs_info->clear();
764
+ }
765
+ mutex_.Lock();
766
+ // no need to signal bg_cv_ as it will be signaled at the end of the
767
+ // flush process.
768
+ #else
769
+ (void)cfd;
770
+ (void)mutable_cf_options;
771
+ (void)flush_jobs_info;
772
+ #endif // ROCKSDB_LITE
773
+ }
774
+
775
+ Status DBImpl::CompactRange(const CompactRangeOptions& options,
776
+ ColumnFamilyHandle* column_family,
777
+ const Slice* begin_without_ts,
778
+ const Slice* end_without_ts) {
779
+ const Comparator* const ucmp = column_family->GetComparator();
780
+ assert(ucmp);
781
+ size_t ts_sz = ucmp->timestamp_size();
782
+ if (ts_sz == 0) {
783
+ return CompactRangeInternal(options, column_family, begin_without_ts,
784
+ end_without_ts);
785
+ }
786
+
787
+ std::string begin_str;
788
+ std::string end_str;
789
+
790
+ // CompactRange compact all keys: [begin, end] inclusively. Add maximum
791
+ // timestamp to include all `begin` keys, and add minimal timestamp to include
792
+ // all `end` keys.
793
+ if (begin_without_ts != nullptr) {
794
+ AppendKeyWithMaxTimestamp(&begin_str, *begin_without_ts, ts_sz);
795
+ }
796
+ if (end_without_ts != nullptr) {
797
+ AppendKeyWithMinTimestamp(&end_str, *end_without_ts, ts_sz);
798
+ }
799
+ Slice begin(begin_str);
800
+ Slice end(end_str);
801
+
802
+ Slice* begin_with_ts = begin_without_ts ? &begin : nullptr;
803
+ Slice* end_with_ts = end_without_ts ? &end : nullptr;
804
+
805
+ return CompactRangeInternal(options, column_family, begin_with_ts,
806
+ end_with_ts);
807
+ }
808
+
809
+ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
810
+ ColumnFamilyHandle* column_family,
811
+ const Slice* begin, const Slice* end) {
812
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
813
+ auto cfd = cfh->cfd();
814
+
815
+ if (options.target_path_id >= cfd->ioptions()->cf_paths.size()) {
816
+ return Status::InvalidArgument("Invalid target path ID");
817
+ }
818
+
819
+ bool flush_needed = true;
820
+ Status s;
821
+ if (begin != nullptr && end != nullptr) {
822
+ // TODO(ajkr): We could also optimize away the flush in certain cases where
823
+ // one/both sides of the interval are unbounded. But it requires more
824
+ // changes to RangesOverlapWithMemtables.
825
+ Range range(*begin, *end);
826
+ SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
827
+ s = cfd->RangesOverlapWithMemtables(
828
+ {range}, super_version, immutable_db_options_.allow_data_in_errors,
829
+ &flush_needed);
830
+ CleanupSuperVersion(super_version);
831
+ }
832
+
833
+ if (s.ok() && flush_needed) {
834
+ FlushOptions fo;
835
+ fo.allow_write_stall = options.allow_write_stall;
836
+ if (immutable_db_options_.atomic_flush) {
837
+ autovector<ColumnFamilyData*> cfds;
838
+ mutex_.Lock();
839
+ SelectColumnFamiliesForAtomicFlush(&cfds);
840
+ mutex_.Unlock();
841
+ s = AtomicFlushMemTables(cfds, fo, FlushReason::kManualCompaction,
842
+ false /* writes_stopped */);
843
+ } else {
844
+ s = FlushMemTable(cfd, fo, FlushReason::kManualCompaction,
845
+ false /* writes_stopped*/);
846
+ }
847
+ if (!s.ok()) {
848
+ LogFlush(immutable_db_options_.info_log);
849
+ return s;
850
+ }
851
+ }
852
+
853
+ constexpr int kInvalidLevel = -1;
854
+ int final_output_level = kInvalidLevel;
855
+ bool exclusive = options.exclusive_manual_compaction;
856
+ if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal &&
857
+ cfd->NumberLevels() > 1) {
858
+ // Always compact all files together.
859
+ final_output_level = cfd->NumberLevels() - 1;
860
+ // if bottom most level is reserved
861
+ if (immutable_db_options_.allow_ingest_behind) {
862
+ final_output_level--;
863
+ }
864
+ s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
865
+ final_output_level, options, begin, end, exclusive,
866
+ false, port::kMaxUint64);
867
+ } else {
868
+ int first_overlapped_level = kInvalidLevel;
869
+ int max_overlapped_level = kInvalidLevel;
870
+ {
871
+ SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
872
+ Version* current_version = super_version->current;
873
+ ReadOptions ro;
874
+ ro.total_order_seek = true;
875
+ bool overlap;
876
+ for (int level = 0;
877
+ level < current_version->storage_info()->num_non_empty_levels();
878
+ level++) {
879
+ overlap = true;
880
+ if (begin != nullptr && end != nullptr) {
881
+ Status status = current_version->OverlapWithLevelIterator(
882
+ ro, file_options_, *begin, *end, level, &overlap);
883
+ if (!status.ok()) {
884
+ overlap = current_version->storage_info()->OverlapInLevel(
885
+ level, begin, end);
886
+ }
887
+ } else {
888
+ overlap = current_version->storage_info()->OverlapInLevel(level,
889
+ begin, end);
890
+ }
891
+ if (overlap) {
892
+ if (first_overlapped_level == kInvalidLevel) {
893
+ first_overlapped_level = level;
894
+ }
895
+ max_overlapped_level = level;
896
+ }
897
+ }
898
+ CleanupSuperVersion(super_version);
899
+ }
900
+ if (s.ok() && first_overlapped_level != kInvalidLevel) {
901
+ // max_file_num_to_ignore can be used to filter out newly created SST
902
+ // files, useful for bottom level compaction in a manual compaction
903
+ uint64_t max_file_num_to_ignore = port::kMaxUint64;
904
+ uint64_t next_file_number = versions_->current_next_file_number();
905
+ final_output_level = max_overlapped_level;
906
+ int output_level;
907
+ for (int level = first_overlapped_level; level <= max_overlapped_level;
908
+ level++) {
909
+ bool disallow_trivial_move = false;
910
+ // in case the compaction is universal or if we're compacting the
911
+ // bottom-most level, the output level will be the same as input one.
912
+ // level 0 can never be the bottommost level (i.e. if all files are in
913
+ // level 0, we will compact to level 1)
914
+ if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
915
+ cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
916
+ output_level = level;
917
+ } else if (level == max_overlapped_level && level > 0) {
918
+ if (options.bottommost_level_compaction ==
919
+ BottommostLevelCompaction::kSkip) {
920
+ // Skip bottommost level compaction
921
+ continue;
922
+ } else if (options.bottommost_level_compaction ==
923
+ BottommostLevelCompaction::kIfHaveCompactionFilter &&
924
+ cfd->ioptions()->compaction_filter == nullptr &&
925
+ cfd->ioptions()->compaction_filter_factory == nullptr) {
926
+ // Skip bottommost level compaction since we don't have a compaction
927
+ // filter
928
+ continue;
929
+ }
930
+ output_level = level;
931
+ // update max_file_num_to_ignore only for bottom level compaction
932
+ // because data in newly compacted files in middle levels may still
933
+ // need to be pushed down
934
+ max_file_num_to_ignore = next_file_number;
935
+ } else {
936
+ output_level = level + 1;
937
+ if (cfd->ioptions()->compaction_style == kCompactionStyleLevel &&
938
+ cfd->ioptions()->level_compaction_dynamic_level_bytes &&
939
+ level == 0) {
940
+ output_level = ColumnFamilyData::kCompactToBaseLevel;
941
+ }
942
+ // if it's a BottommostLevel compaction and `kForce*` compaction is
943
+ // set, disallow trivial move
944
+ if (level == max_overlapped_level &&
945
+ (options.bottommost_level_compaction ==
946
+ BottommostLevelCompaction::kForce ||
947
+ options.bottommost_level_compaction ==
948
+ BottommostLevelCompaction::kForceOptimized)) {
949
+ disallow_trivial_move = true;
950
+ }
951
+ }
952
+ s = RunManualCompaction(cfd, level, output_level, options, begin, end,
953
+ exclusive, disallow_trivial_move,
954
+ max_file_num_to_ignore);
955
+ if (!s.ok()) {
956
+ break;
957
+ }
958
+ if (output_level == ColumnFamilyData::kCompactToBaseLevel) {
959
+ final_output_level = cfd->NumberLevels() - 1;
960
+ } else if (output_level > final_output_level) {
961
+ final_output_level = output_level;
962
+ }
963
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1");
964
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2");
965
+ }
966
+ }
967
+ }
968
+ if (!s.ok() || final_output_level == kInvalidLevel) {
969
+ LogFlush(immutable_db_options_.info_log);
970
+ return s;
971
+ }
972
+
973
+ if (options.change_level) {
974
+ TEST_SYNC_POINT("DBImpl::CompactRange:BeforeRefit:1");
975
+ TEST_SYNC_POINT("DBImpl::CompactRange:BeforeRefit:2");
976
+
977
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
978
+ "[RefitLevel] waiting for background threads to stop");
979
+ DisableManualCompaction();
980
+ s = PauseBackgroundWork();
981
+ if (s.ok()) {
982
+ TEST_SYNC_POINT("DBImpl::CompactRange:PreRefitLevel");
983
+ s = ReFitLevel(cfd, final_output_level, options.target_level);
984
+ TEST_SYNC_POINT("DBImpl::CompactRange:PostRefitLevel");
985
+ // ContinueBackgroundWork always return Status::OK().
986
+ Status temp_s = ContinueBackgroundWork();
987
+ assert(temp_s.ok());
988
+ }
989
+ EnableManualCompaction();
990
+ }
991
+ LogFlush(immutable_db_options_.info_log);
992
+
993
+ {
994
+ InstrumentedMutexLock l(&mutex_);
995
+ // an automatic compaction that has been scheduled might have been
996
+ // preempted by the manual compactions. Need to schedule it back.
997
+ MaybeScheduleFlushOrCompaction();
998
+ }
999
+
1000
+ return s;
1001
+ }
1002
+
1003
+ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
1004
+ ColumnFamilyHandle* column_family,
1005
+ const std::vector<std::string>& input_file_names,
1006
+ const int output_level, const int output_path_id,
1007
+ std::vector<std::string>* const output_file_names,
1008
+ CompactionJobInfo* compaction_job_info) {
1009
+ #ifdef ROCKSDB_LITE
1010
+ (void)compact_options;
1011
+ (void)column_family;
1012
+ (void)input_file_names;
1013
+ (void)output_level;
1014
+ (void)output_path_id;
1015
+ (void)output_file_names;
1016
+ (void)compaction_job_info;
1017
+ // not supported in lite version
1018
+ return Status::NotSupported("Not supported in ROCKSDB LITE");
1019
+ #else
1020
+ if (column_family == nullptr) {
1021
+ return Status::InvalidArgument("ColumnFamilyHandle must be non-null.");
1022
+ }
1023
+
1024
+ auto cfd =
1025
+ static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
1026
+ assert(cfd);
1027
+
1028
+ Status s;
1029
+ JobContext job_context(0, true);
1030
+ LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
1031
+ immutable_db_options_.info_log.get());
1032
+
1033
+ // Perform CompactFiles
1034
+ TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile2");
1035
+ {
1036
+ InstrumentedMutexLock l(&mutex_);
1037
+
1038
+ // This call will unlock/lock the mutex to wait for current running
1039
+ // IngestExternalFile() calls to finish.
1040
+ WaitForIngestFile();
1041
+
1042
+ // We need to get current after `WaitForIngestFile`, because
1043
+ // `IngestExternalFile` may add files that overlap with `input_file_names`
1044
+ auto* current = cfd->current();
1045
+ current->Ref();
1046
+
1047
+ s = CompactFilesImpl(compact_options, cfd, current, input_file_names,
1048
+ output_file_names, output_level, output_path_id,
1049
+ &job_context, &log_buffer, compaction_job_info);
1050
+
1051
+ current->Unref();
1052
+ }
1053
+
1054
+ // Find and delete obsolete files
1055
+ {
1056
+ InstrumentedMutexLock l(&mutex_);
1057
+ // If !s.ok(), this means that Compaction failed. In that case, we want
1058
+ // to delete all obsolete files we might have created and we force
1059
+ // FindObsoleteFiles(). This is because job_context does not
1060
+ // catch all created files if compaction failed.
1061
+ FindObsoleteFiles(&job_context, !s.ok());
1062
+ } // release the mutex
1063
+
1064
+ // delete unnecessary files if any, this is done outside the mutex
1065
+ if (job_context.HaveSomethingToClean() ||
1066
+ job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
1067
+ // Have to flush the info logs before bg_compaction_scheduled_--
1068
+ // because if bg_flush_scheduled_ becomes 0 and the lock is
1069
+ // released, the deconstructor of DB can kick in and destroy all the
1070
+ // states of DB so info_log might not be available after that point.
1071
+ // It also applies to access other states that DB owns.
1072
+ log_buffer.FlushBufferToLog();
1073
+ if (job_context.HaveSomethingToDelete()) {
1074
+ // no mutex is locked here. No need to Unlock() and Lock() here.
1075
+ PurgeObsoleteFiles(job_context);
1076
+ }
1077
+ job_context.Clean();
1078
+ }
1079
+
1080
+ return s;
1081
+ #endif // ROCKSDB_LITE
1082
+ }
1083
+
1084
+ #ifndef ROCKSDB_LITE
1085
+ Status DBImpl::CompactFilesImpl(
1086
+ const CompactionOptions& compact_options, ColumnFamilyData* cfd,
1087
+ Version* version, const std::vector<std::string>& input_file_names,
1088
+ std::vector<std::string>* const output_file_names, const int output_level,
1089
+ int output_path_id, JobContext* job_context, LogBuffer* log_buffer,
1090
+ CompactionJobInfo* compaction_job_info) {
1091
+ mutex_.AssertHeld();
1092
+
1093
+ if (shutting_down_.load(std::memory_order_acquire)) {
1094
+ return Status::ShutdownInProgress();
1095
+ }
1096
+ if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
1097
+ return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
1098
+ }
1099
+
1100
+ std::unordered_set<uint64_t> input_set;
1101
+ for (const auto& file_name : input_file_names) {
1102
+ input_set.insert(TableFileNameToNumber(file_name));
1103
+ }
1104
+
1105
+ ColumnFamilyMetaData cf_meta;
1106
+ // TODO(yhchiang): can directly use version here if none of the
1107
+ // following functions call is pluggable to external developers.
1108
+ version->GetColumnFamilyMetaData(&cf_meta);
1109
+
1110
+ if (output_path_id < 0) {
1111
+ if (cfd->ioptions()->cf_paths.size() == 1U) {
1112
+ output_path_id = 0;
1113
+ } else {
1114
+ return Status::NotSupported(
1115
+ "Automatic output path selection is not "
1116
+ "yet supported in CompactFiles()");
1117
+ }
1118
+ }
1119
+
1120
+ Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
1121
+ &input_set, cf_meta, output_level);
1122
+ if (!s.ok()) {
1123
+ return s;
1124
+ }
1125
+
1126
+ std::vector<CompactionInputFiles> input_files;
1127
+ s = cfd->compaction_picker()->GetCompactionInputsFromFileNumbers(
1128
+ &input_files, &input_set, version->storage_info(), compact_options);
1129
+ if (!s.ok()) {
1130
+ return s;
1131
+ }
1132
+
1133
+ for (const auto& inputs : input_files) {
1134
+ if (cfd->compaction_picker()->AreFilesInCompaction(inputs.files)) {
1135
+ return Status::Aborted(
1136
+ "Some of the necessary compaction input "
1137
+ "files are already being compacted");
1138
+ }
1139
+ }
1140
+ bool sfm_reserved_compact_space = false;
1141
+ // First check if we have enough room to do the compaction
1142
+ bool enough_room = EnoughRoomForCompaction(
1143
+ cfd, input_files, &sfm_reserved_compact_space, log_buffer);
1144
+
1145
+ if (!enough_room) {
1146
+ // m's vars will get set properly at the end of this function,
1147
+ // as long as status == CompactionTooLarge
1148
+ return Status::CompactionTooLarge();
1149
+ }
1150
+
1151
+ // At this point, CompactFiles will be run.
1152
+ bg_compaction_scheduled_++;
1153
+
1154
+ std::unique_ptr<Compaction> c;
1155
+ assert(cfd->compaction_picker());
1156
+ c.reset(cfd->compaction_picker()->CompactFiles(
1157
+ compact_options, input_files, output_level, version->storage_info(),
1158
+ *cfd->GetLatestMutableCFOptions(), mutable_db_options_, output_path_id));
1159
+ // we already sanitized the set of input files and checked for conflicts
1160
+ // without releasing the lock, so we're guaranteed a compaction can be formed.
1161
+ assert(c != nullptr);
1162
+
1163
+ c->SetInputVersion(version);
1164
+ // deletion compaction currently not allowed in CompactFiles.
1165
+ assert(!c->deletion_compaction());
1166
+
1167
+ std::vector<SequenceNumber> snapshot_seqs;
1168
+ SequenceNumber earliest_write_conflict_snapshot;
1169
+ SnapshotChecker* snapshot_checker;
1170
+ GetSnapshotContext(job_context, &snapshot_seqs,
1171
+ &earliest_write_conflict_snapshot, &snapshot_checker);
1172
+
1173
+ std::unique_ptr<std::list<uint64_t>::iterator> pending_outputs_inserted_elem(
1174
+ new std::list<uint64_t>::iterator(
1175
+ CaptureCurrentFileNumberInPendingOutputs()));
1176
+
1177
+ assert(is_snapshot_supported_ || snapshots_.empty());
1178
+ CompactionJobStats compaction_job_stats;
1179
+ CompactionJob compaction_job(
1180
+ job_context->job_id, c.get(), immutable_db_options_,
1181
+ file_options_for_compaction_, versions_.get(), &shutting_down_,
1182
+ preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
1183
+ GetDataDir(c->column_family_data(), c->output_path_id()),
1184
+ GetDataDir(c->column_family_data(), 0), stats_, &mutex_, &error_handler_,
1185
+ snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
1186
+ table_cache_, &event_logger_,
1187
+ c->mutable_cf_options()->paranoid_file_checks,
1188
+ c->mutable_cf_options()->report_bg_io_stats, dbname_,
1189
+ &compaction_job_stats, Env::Priority::USER, io_tracer_,
1190
+ &manual_compaction_paused_, db_id_, db_session_id_,
1191
+ c->column_family_data()->GetFullHistoryTsLow());
1192
+
1193
+ // Creating a compaction influences the compaction score because the score
1194
+ // takes running compactions into account (by skipping files that are already
1195
+ // being compacted). Since we just changed compaction score, we recalculate it
1196
+ // here.
1197
+ version->storage_info()->ComputeCompactionScore(*cfd->ioptions(),
1198
+ *c->mutable_cf_options());
1199
+
1200
+ compaction_job.Prepare();
1201
+
1202
+ mutex_.Unlock();
1203
+ TEST_SYNC_POINT("CompactFilesImpl:0");
1204
+ TEST_SYNC_POINT("CompactFilesImpl:1");
1205
+ // Ignore the status here, as it will be checked in the Install down below...
1206
+ compaction_job.Run().PermitUncheckedError();
1207
+ TEST_SYNC_POINT("CompactFilesImpl:2");
1208
+ TEST_SYNC_POINT("CompactFilesImpl:3");
1209
+ mutex_.Lock();
1210
+
1211
+ Status status = compaction_job.Install(*c->mutable_cf_options());
1212
+ if (status.ok()) {
1213
+ assert(compaction_job.io_status().ok());
1214
+ InstallSuperVersionAndScheduleWork(c->column_family_data(),
1215
+ &job_context->superversion_contexts[0],
1216
+ *c->mutable_cf_options());
1217
+ }
1218
+ // status above captures any error during compaction_job.Install, so its ok
1219
+ // not check compaction_job.io_status() explicitly if we're not calling
1220
+ // SetBGError
1221
+ compaction_job.io_status().PermitUncheckedError();
1222
+ c->ReleaseCompactionFiles(s);
1223
+ #ifndef ROCKSDB_LITE
1224
+ // Need to make sure SstFileManager does its bookkeeping
1225
+ auto sfm = static_cast<SstFileManagerImpl*>(
1226
+ immutable_db_options_.sst_file_manager.get());
1227
+ if (sfm && sfm_reserved_compact_space) {
1228
+ sfm->OnCompactionCompletion(c.get());
1229
+ }
1230
+ #endif // ROCKSDB_LITE
1231
+
1232
+ ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
1233
+
1234
+ if (compaction_job_info != nullptr) {
1235
+ BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats,
1236
+ job_context->job_id, version, compaction_job_info);
1237
+ }
1238
+
1239
+ if (status.ok()) {
1240
+ // Done
1241
+ } else if (status.IsColumnFamilyDropped() || status.IsShutdownInProgress()) {
1242
+ // Ignore compaction errors found during shutting down
1243
+ } else if (status.IsManualCompactionPaused()) {
1244
+ // Don't report stopping manual compaction as error
1245
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1246
+ "[%s] [JOB %d] Stopping manual compaction",
1247
+ c->column_family_data()->GetName().c_str(),
1248
+ job_context->job_id);
1249
+ } else {
1250
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
1251
+ "[%s] [JOB %d] Compaction error: %s",
1252
+ c->column_family_data()->GetName().c_str(),
1253
+ job_context->job_id, status.ToString().c_str());
1254
+ IOStatus io_s = compaction_job.io_status();
1255
+ if (!io_s.ok()) {
1256
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction);
1257
+ } else {
1258
+ error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
1259
+ }
1260
+ }
1261
+
1262
+ if (output_file_names != nullptr) {
1263
+ for (const auto& newf : c->edit()->GetNewFiles()) {
1264
+ (*output_file_names)
1265
+ .push_back(TableFileName(c->immutable_cf_options()->cf_paths,
1266
+ newf.second.fd.GetNumber(),
1267
+ newf.second.fd.GetPathId()));
1268
+ }
1269
+ }
1270
+
1271
+ c.reset();
1272
+
1273
+ bg_compaction_scheduled_--;
1274
+ if (bg_compaction_scheduled_ == 0) {
1275
+ bg_cv_.SignalAll();
1276
+ }
1277
+ MaybeScheduleFlushOrCompaction();
1278
+ TEST_SYNC_POINT("CompactFilesImpl:End");
1279
+
1280
+ return status;
1281
+ }
1282
+ #endif // ROCKSDB_LITE
1283
+
1284
+ Status DBImpl::PauseBackgroundWork() {
1285
+ InstrumentedMutexLock guard_lock(&mutex_);
1286
+ bg_compaction_paused_++;
1287
+ while (bg_bottom_compaction_scheduled_ > 0 || bg_compaction_scheduled_ > 0 ||
1288
+ bg_flush_scheduled_ > 0) {
1289
+ bg_cv_.Wait();
1290
+ }
1291
+ bg_work_paused_++;
1292
+ return Status::OK();
1293
+ }
1294
+
1295
+ Status DBImpl::ContinueBackgroundWork() {
1296
+ InstrumentedMutexLock guard_lock(&mutex_);
1297
+ if (bg_work_paused_ == 0) {
1298
+ return Status::InvalidArgument();
1299
+ }
1300
+ assert(bg_work_paused_ > 0);
1301
+ assert(bg_compaction_paused_ > 0);
1302
+ bg_compaction_paused_--;
1303
+ bg_work_paused_--;
1304
+ // It's sufficient to check just bg_work_paused_ here since
1305
+ // bg_work_paused_ is always no greater than bg_compaction_paused_
1306
+ if (bg_work_paused_ == 0) {
1307
+ MaybeScheduleFlushOrCompaction();
1308
+ }
1309
+ return Status::OK();
1310
+ }
1311
+
1312
+ void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
1313
+ const Status& st,
1314
+ const CompactionJobStats& job_stats,
1315
+ int job_id) {
1316
+ #ifndef ROCKSDB_LITE
1317
+ if (immutable_db_options_.listeners.empty()) {
1318
+ return;
1319
+ }
1320
+ mutex_.AssertHeld();
1321
+ if (shutting_down_.load(std::memory_order_acquire)) {
1322
+ return;
1323
+ }
1324
+ if (c->is_manual_compaction() &&
1325
+ manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
1326
+ return;
1327
+ }
1328
+ Version* current = cfd->current();
1329
+ current->Ref();
1330
+ // release lock while notifying events
1331
+ mutex_.Unlock();
1332
+ TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex");
1333
+ {
1334
+ CompactionJobInfo info{};
1335
+ BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, current, &info);
1336
+ for (auto listener : immutable_db_options_.listeners) {
1337
+ listener->OnCompactionBegin(this, info);
1338
+ }
1339
+ info.status.PermitUncheckedError();
1340
+ }
1341
+ mutex_.Lock();
1342
+ current->Unref();
1343
+ #else
1344
+ (void)cfd;
1345
+ (void)c;
1346
+ (void)st;
1347
+ (void)job_stats;
1348
+ (void)job_id;
1349
+ #endif // ROCKSDB_LITE
1350
+ }
1351
+
1352
+ void DBImpl::NotifyOnCompactionCompleted(
1353
+ ColumnFamilyData* cfd, Compaction* c, const Status& st,
1354
+ const CompactionJobStats& compaction_job_stats, const int job_id) {
1355
+ #ifndef ROCKSDB_LITE
1356
+ if (immutable_db_options_.listeners.size() == 0U) {
1357
+ return;
1358
+ }
1359
+ mutex_.AssertHeld();
1360
+ if (shutting_down_.load(std::memory_order_acquire)) {
1361
+ return;
1362
+ }
1363
+ if (c->is_manual_compaction() &&
1364
+ manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
1365
+ return;
1366
+ }
1367
+ Version* current = cfd->current();
1368
+ current->Ref();
1369
+ // release lock while notifying events
1370
+ mutex_.Unlock();
1371
+ TEST_SYNC_POINT("DBImpl::NotifyOnCompactionCompleted::UnlockMutex");
1372
+ {
1373
+ CompactionJobInfo info{};
1374
+ BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, current,
1375
+ &info);
1376
+ for (auto listener : immutable_db_options_.listeners) {
1377
+ listener->OnCompactionCompleted(this, info);
1378
+ }
1379
+ }
1380
+ mutex_.Lock();
1381
+ current->Unref();
1382
+ // no need to signal bg_cv_ as it will be signaled at the end of the
1383
+ // flush process.
1384
+ #else
1385
+ (void)cfd;
1386
+ (void)c;
1387
+ (void)st;
1388
+ (void)compaction_job_stats;
1389
+ (void)job_id;
1390
+ #endif // ROCKSDB_LITE
1391
+ }
1392
+
1393
+ // REQUIREMENT: block all background work by calling PauseBackgroundWork()
1394
+ // before calling this function
1395
+ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1396
+ assert(level < cfd->NumberLevels());
1397
+ if (target_level >= cfd->NumberLevels()) {
1398
+ return Status::InvalidArgument("Target level exceeds number of levels");
1399
+ }
1400
+
1401
+ SuperVersionContext sv_context(/* create_superversion */ true);
1402
+
1403
+ InstrumentedMutexLock guard_lock(&mutex_);
1404
+
1405
+ // only allow one thread refitting
1406
+ if (refitting_level_) {
1407
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1408
+ "[ReFitLevel] another thread is refitting");
1409
+ return Status::NotSupported("another thread is refitting");
1410
+ }
1411
+ refitting_level_ = true;
1412
+
1413
+ const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
1414
+ // move to a smaller level
1415
+ int to_level = target_level;
1416
+ if (target_level < 0) {
1417
+ to_level = FindMinimumEmptyLevelFitting(cfd, mutable_cf_options, level);
1418
+ }
1419
+
1420
+ auto* vstorage = cfd->current()->storage_info();
1421
+ if (to_level != level) {
1422
+ if (to_level > level) {
1423
+ if (level == 0) {
1424
+ refitting_level_ = false;
1425
+ return Status::NotSupported(
1426
+ "Cannot change from level 0 to other levels.");
1427
+ }
1428
+ // Check levels are empty for a trivial move
1429
+ for (int l = level + 1; l <= to_level; l++) {
1430
+ if (vstorage->NumLevelFiles(l) > 0) {
1431
+ refitting_level_ = false;
1432
+ return Status::NotSupported(
1433
+ "Levels between source and target are not empty for a move.");
1434
+ }
1435
+ }
1436
+ } else {
1437
+ // to_level < level
1438
+ // Check levels are empty for a trivial move
1439
+ for (int l = to_level; l < level; l++) {
1440
+ if (vstorage->NumLevelFiles(l) > 0) {
1441
+ refitting_level_ = false;
1442
+ return Status::NotSupported(
1443
+ "Levels between source and target are not empty for a move.");
1444
+ }
1445
+ }
1446
+ }
1447
+ ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1448
+ "[%s] Before refitting:\n%s", cfd->GetName().c_str(),
1449
+ cfd->current()->DebugString().data());
1450
+
1451
+ VersionEdit edit;
1452
+ edit.SetColumnFamily(cfd->GetID());
1453
+ for (const auto& f : vstorage->LevelFiles(level)) {
1454
+ edit.DeleteFile(level, f->fd.GetNumber());
1455
+ edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(),
1456
+ f->fd.GetFileSize(), f->smallest, f->largest,
1457
+ f->fd.smallest_seqno, f->fd.largest_seqno,
1458
+ f->marked_for_compaction, f->oldest_blob_file_number,
1459
+ f->oldest_ancester_time, f->file_creation_time,
1460
+ f->file_checksum, f->file_checksum_func_name);
1461
+ }
1462
+ ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1463
+ "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
1464
+ edit.DebugString().data());
1465
+
1466
+ Status status = versions_->LogAndApply(cfd, mutable_cf_options, &edit,
1467
+ &mutex_, directories_.GetDbDir());
1468
+
1469
+ InstallSuperVersionAndScheduleWork(cfd, &sv_context, mutable_cf_options);
1470
+
1471
+ ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] LogAndApply: %s\n",
1472
+ cfd->GetName().c_str(), status.ToString().data());
1473
+
1474
+ if (status.ok()) {
1475
+ ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1476
+ "[%s] After refitting:\n%s", cfd->GetName().c_str(),
1477
+ cfd->current()->DebugString().data());
1478
+ }
1479
+ sv_context.Clean();
1480
+ refitting_level_ = false;
1481
+
1482
+ return status;
1483
+ }
1484
+
1485
+ refitting_level_ = false;
1486
+ return Status::OK();
1487
+ }
1488
+
1489
+ int DBImpl::NumberLevels(ColumnFamilyHandle* column_family) {
1490
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
1491
+ return cfh->cfd()->NumberLevels();
1492
+ }
1493
+
1494
+ int DBImpl::MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) {
1495
+ return 0;
1496
+ }
1497
+
1498
+ int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) {
1499
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
1500
+ InstrumentedMutexLock l(&mutex_);
1501
+ return cfh->cfd()
1502
+ ->GetSuperVersion()
1503
+ ->mutable_cf_options.level0_stop_writes_trigger;
1504
+ }
1505
+
1506
+ Status DBImpl::Flush(const FlushOptions& flush_options,
1507
+ ColumnFamilyHandle* column_family) {
1508
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
1509
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush start.",
1510
+ cfh->GetName().c_str());
1511
+ Status s;
1512
+ if (immutable_db_options_.atomic_flush) {
1513
+ s = AtomicFlushMemTables({cfh->cfd()}, flush_options,
1514
+ FlushReason::kManualFlush);
1515
+ } else {
1516
+ s = FlushMemTable(cfh->cfd(), flush_options, FlushReason::kManualFlush);
1517
+ }
1518
+
1519
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1520
+ "[%s] Manual flush finished, status: %s\n",
1521
+ cfh->GetName().c_str(), s.ToString().c_str());
1522
+ return s;
1523
+ }
1524
+
1525
+ Status DBImpl::Flush(const FlushOptions& flush_options,
1526
+ const std::vector<ColumnFamilyHandle*>& column_families) {
1527
+ Status s;
1528
+ if (!immutable_db_options_.atomic_flush) {
1529
+ for (auto cfh : column_families) {
1530
+ s = Flush(flush_options, cfh);
1531
+ if (!s.ok()) {
1532
+ break;
1533
+ }
1534
+ }
1535
+ } else {
1536
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1537
+ "Manual atomic flush start.\n"
1538
+ "=====Column families:=====");
1539
+ for (auto cfh : column_families) {
1540
+ auto cfhi = static_cast<ColumnFamilyHandleImpl*>(cfh);
1541
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s",
1542
+ cfhi->GetName().c_str());
1543
+ }
1544
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1545
+ "=====End of column families list=====");
1546
+ autovector<ColumnFamilyData*> cfds;
1547
+ std::for_each(column_families.begin(), column_families.end(),
1548
+ [&cfds](ColumnFamilyHandle* elem) {
1549
+ auto cfh = static_cast<ColumnFamilyHandleImpl*>(elem);
1550
+ cfds.emplace_back(cfh->cfd());
1551
+ });
1552
+ s = AtomicFlushMemTables(cfds, flush_options, FlushReason::kManualFlush);
1553
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1554
+ "Manual atomic flush finished, status: %s\n"
1555
+ "=====Column families:=====",
1556
+ s.ToString().c_str());
1557
+ for (auto cfh : column_families) {
1558
+ auto cfhi = static_cast<ColumnFamilyHandleImpl*>(cfh);
1559
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s",
1560
+ cfhi->GetName().c_str());
1561
+ }
1562
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1563
+ "=====End of column families list=====");
1564
+ }
1565
+ return s;
1566
+ }
1567
+
1568
+ Status DBImpl::RunManualCompaction(
1569
+ ColumnFamilyData* cfd, int input_level, int output_level,
1570
+ const CompactRangeOptions& compact_range_options, const Slice* begin,
1571
+ const Slice* end, bool exclusive, bool disallow_trivial_move,
1572
+ uint64_t max_file_num_to_ignore) {
1573
+ assert(input_level == ColumnFamilyData::kCompactAllLevels ||
1574
+ input_level >= 0);
1575
+
1576
+ InternalKey begin_storage, end_storage;
1577
+ CompactionArg* ca;
1578
+
1579
+ bool scheduled = false;
1580
+ bool manual_conflict = false;
1581
+ ManualCompactionState manual;
1582
+ manual.cfd = cfd;
1583
+ manual.input_level = input_level;
1584
+ manual.output_level = output_level;
1585
+ manual.output_path_id = compact_range_options.target_path_id;
1586
+ manual.done = false;
1587
+ manual.in_progress = false;
1588
+ manual.incomplete = false;
1589
+ manual.exclusive = exclusive;
1590
+ manual.disallow_trivial_move = disallow_trivial_move;
1591
+ // For universal compaction, we enforce every manual compaction to compact
1592
+ // all files.
1593
+ if (begin == nullptr ||
1594
+ cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
1595
+ cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
1596
+ manual.begin = nullptr;
1597
+ } else {
1598
+ begin_storage.SetMinPossibleForUserKey(*begin);
1599
+ manual.begin = &begin_storage;
1600
+ }
1601
+ if (end == nullptr ||
1602
+ cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
1603
+ cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
1604
+ manual.end = nullptr;
1605
+ } else {
1606
+ end_storage.SetMaxPossibleForUserKey(*end);
1607
+ manual.end = &end_storage;
1608
+ }
1609
+
1610
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction:0");
1611
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction:1");
1612
+ InstrumentedMutexLock l(&mutex_);
1613
+
1614
+ // When a manual compaction arrives, temporarily disable scheduling of
1615
+ // non-manual compactions and wait until the number of scheduled compaction
1616
+ // jobs drops to zero. This is needed to ensure that this manual compaction
1617
+ // can compact any range of keys/files.
1618
+ //
1619
+ // HasPendingManualCompaction() is true when at least one thread is inside
1620
+ // RunManualCompaction(), i.e. during that time no other compaction will
1621
+ // get scheduled (see MaybeScheduleFlushOrCompaction).
1622
+ //
1623
+ // Note that the following loop doesn't stop more that one thread calling
1624
+ // RunManualCompaction() from getting to the second while loop below.
1625
+ // However, only one of them will actually schedule compaction, while
1626
+ // others will wait on a condition variable until it completes.
1627
+
1628
+ AddManualCompaction(&manual);
1629
+ TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_);
1630
+ if (exclusive) {
1631
+ while (bg_bottom_compaction_scheduled_ > 0 ||
1632
+ bg_compaction_scheduled_ > 0) {
1633
+ TEST_SYNC_POINT("DBImpl::RunManualCompaction:WaitScheduled");
1634
+ ROCKS_LOG_INFO(
1635
+ immutable_db_options_.info_log,
1636
+ "[%s] Manual compaction waiting for all other scheduled background "
1637
+ "compactions to finish",
1638
+ cfd->GetName().c_str());
1639
+ bg_cv_.Wait();
1640
+ }
1641
+ }
1642
+
1643
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1644
+ "[%s] Manual compaction starting", cfd->GetName().c_str());
1645
+
1646
+ LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
1647
+ immutable_db_options_.info_log.get());
1648
+ // We don't check bg_error_ here, because if we get the error in compaction,
1649
+ // the compaction will set manual.status to bg_error_ and set manual.done to
1650
+ // true.
1651
+ while (!manual.done) {
1652
+ assert(HasPendingManualCompaction());
1653
+ manual_conflict = false;
1654
+ Compaction* compaction = nullptr;
1655
+ if (ShouldntRunManualCompaction(&manual) || (manual.in_progress == true) ||
1656
+ scheduled ||
1657
+ (((manual.manual_end = &manual.tmp_storage1) != nullptr) &&
1658
+ ((compaction = manual.cfd->CompactRange(
1659
+ *manual.cfd->GetLatestMutableCFOptions(), mutable_db_options_,
1660
+ manual.input_level, manual.output_level, compact_range_options,
1661
+ manual.begin, manual.end, &manual.manual_end, &manual_conflict,
1662
+ max_file_num_to_ignore)) == nullptr &&
1663
+ manual_conflict))) {
1664
+ // exclusive manual compactions should not see a conflict during
1665
+ // CompactRange
1666
+ assert(!exclusive || !manual_conflict);
1667
+ // Running either this or some other manual compaction
1668
+ bg_cv_.Wait();
1669
+ if (scheduled && manual.incomplete == true) {
1670
+ assert(!manual.in_progress);
1671
+ scheduled = false;
1672
+ manual.incomplete = false;
1673
+ }
1674
+ } else if (!scheduled) {
1675
+ if (compaction == nullptr) {
1676
+ manual.done = true;
1677
+ bg_cv_.SignalAll();
1678
+ continue;
1679
+ }
1680
+ ca = new CompactionArg;
1681
+ ca->db = this;
1682
+ ca->prepicked_compaction = new PrepickedCompaction;
1683
+ ca->prepicked_compaction->manual_compaction_state = &manual;
1684
+ ca->prepicked_compaction->compaction = compaction;
1685
+ if (!RequestCompactionToken(
1686
+ cfd, true, &ca->prepicked_compaction->task_token, &log_buffer)) {
1687
+ // Don't throttle manual compaction, only count outstanding tasks.
1688
+ assert(false);
1689
+ }
1690
+ manual.incomplete = false;
1691
+ bg_compaction_scheduled_++;
1692
+ Env::Priority thread_pool_pri = Env::Priority::LOW;
1693
+ if (compaction->bottommost_level() &&
1694
+ env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
1695
+ thread_pool_pri = Env::Priority::BOTTOM;
1696
+ }
1697
+ env_->Schedule(&DBImpl::BGWorkCompaction, ca, thread_pool_pri, this,
1698
+ &DBImpl::UnscheduleCompactionCallback);
1699
+ scheduled = true;
1700
+ }
1701
+ }
1702
+
1703
+ log_buffer.FlushBufferToLog();
1704
+ assert(!manual.in_progress);
1705
+ assert(HasPendingManualCompaction());
1706
+ RemoveManualCompaction(&manual);
1707
+ bg_cv_.SignalAll();
1708
+ return manual.status;
1709
+ }
1710
+
1711
+ void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
1712
+ FlushRequest* req) {
1713
+ assert(req != nullptr);
1714
+ req->reserve(cfds.size());
1715
+ for (const auto cfd : cfds) {
1716
+ if (nullptr == cfd) {
1717
+ // cfd may be null, see DBImpl::ScheduleFlushes
1718
+ continue;
1719
+ }
1720
+ uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID();
1721
+ req->emplace_back(cfd, max_memtable_id);
1722
+ }
1723
+ }
1724
+
1725
+ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
1726
+ const FlushOptions& flush_options,
1727
+ FlushReason flush_reason, bool writes_stopped) {
1728
+ // This method should not be called if atomic_flush is true.
1729
+ assert(!immutable_db_options_.atomic_flush);
1730
+ Status s;
1731
+ if (!flush_options.allow_write_stall) {
1732
+ bool flush_needed = true;
1733
+ s = WaitUntilFlushWouldNotStallWrites(cfd, &flush_needed);
1734
+ TEST_SYNC_POINT("DBImpl::FlushMemTable:StallWaitDone");
1735
+ if (!s.ok() || !flush_needed) {
1736
+ return s;
1737
+ }
1738
+ }
1739
+
1740
+ autovector<FlushRequest> flush_reqs;
1741
+ autovector<uint64_t> memtable_ids_to_wait;
1742
+ {
1743
+ WriteContext context;
1744
+ InstrumentedMutexLock guard_lock(&mutex_);
1745
+
1746
+ WriteThread::Writer w;
1747
+ WriteThread::Writer nonmem_w;
1748
+ if (!writes_stopped) {
1749
+ write_thread_.EnterUnbatched(&w, &mutex_);
1750
+ if (two_write_queues_) {
1751
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1752
+ }
1753
+ }
1754
+ WaitForPendingWrites();
1755
+
1756
+ if (flush_reason != FlushReason::kErrorRecoveryRetryFlush &&
1757
+ (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) {
1758
+ // Note that, when flush reason is kErrorRecoveryRetryFlush, during the
1759
+ // auto retry resume, we want to avoid creating new small memtables.
1760
+ // Therefore, SwitchMemtable will not be called. Also, since ResumeImpl
1761
+ // will iterate through all the CFs and call FlushMemtable during auto
1762
+ // retry resume, it is possible that in some CFs,
1763
+ // cfd->imm()->NumNotFlushed() = 0. In this case, so no flush request will
1764
+ // be created and scheduled, status::OK() will be returned.
1765
+ s = SwitchMemtable(cfd, &context);
1766
+ }
1767
+ const uint64_t flush_memtable_id = port::kMaxUint64;
1768
+ if (s.ok()) {
1769
+ if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
1770
+ !cached_recoverable_state_empty_.load()) {
1771
+ FlushRequest req{{cfd, flush_memtable_id}};
1772
+ flush_reqs.emplace_back(std::move(req));
1773
+ memtable_ids_to_wait.emplace_back(cfd->imm()->GetLatestMemTableID());
1774
+ }
1775
+ if (immutable_db_options_.persist_stats_to_disk &&
1776
+ flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
1777
+ ColumnFamilyData* cfd_stats =
1778
+ versions_->GetColumnFamilySet()->GetColumnFamily(
1779
+ kPersistentStatsColumnFamilyName);
1780
+ if (cfd_stats != nullptr && cfd_stats != cfd &&
1781
+ !cfd_stats->mem()->IsEmpty()) {
1782
+ // only force flush stats CF when it will be the only CF lagging
1783
+ // behind after the current flush
1784
+ bool stats_cf_flush_needed = true;
1785
+ for (auto* loop_cfd : *versions_->GetColumnFamilySet()) {
1786
+ if (loop_cfd == cfd_stats || loop_cfd == cfd) {
1787
+ continue;
1788
+ }
1789
+ if (loop_cfd->GetLogNumber() <= cfd_stats->GetLogNumber()) {
1790
+ stats_cf_flush_needed = false;
1791
+ }
1792
+ }
1793
+ if (stats_cf_flush_needed) {
1794
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1795
+ "Force flushing stats CF with manual flush of %s "
1796
+ "to avoid holding old logs",
1797
+ cfd->GetName().c_str());
1798
+ s = SwitchMemtable(cfd_stats, &context);
1799
+ FlushRequest req{{cfd_stats, flush_memtable_id}};
1800
+ flush_reqs.emplace_back(std::move(req));
1801
+ memtable_ids_to_wait.emplace_back(
1802
+ cfd->imm()->GetLatestMemTableID());
1803
+ }
1804
+ }
1805
+ }
1806
+ }
1807
+
1808
+ if (s.ok() && !flush_reqs.empty()) {
1809
+ for (const auto& req : flush_reqs) {
1810
+ assert(req.size() == 1);
1811
+ ColumnFamilyData* loop_cfd = req[0].first;
1812
+ loop_cfd->imm()->FlushRequested();
1813
+ }
1814
+ // If the caller wants to wait for this flush to complete, it indicates
1815
+ // that the caller expects the ColumnFamilyData not to be free'ed by
1816
+ // other threads which may drop the column family concurrently.
1817
+ // Therefore, we increase the cfd's ref count.
1818
+ if (flush_options.wait) {
1819
+ for (const auto& req : flush_reqs) {
1820
+ assert(req.size() == 1);
1821
+ ColumnFamilyData* loop_cfd = req[0].first;
1822
+ loop_cfd->Ref();
1823
+ }
1824
+ }
1825
+ for (const auto& req : flush_reqs) {
1826
+ SchedulePendingFlush(req, flush_reason);
1827
+ }
1828
+ MaybeScheduleFlushOrCompaction();
1829
+ }
1830
+
1831
+ if (!writes_stopped) {
1832
+ write_thread_.ExitUnbatched(&w);
1833
+ if (two_write_queues_) {
1834
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1835
+ }
1836
+ }
1837
+ }
1838
+ TEST_SYNC_POINT("DBImpl::FlushMemTable:AfterScheduleFlush");
1839
+ TEST_SYNC_POINT("DBImpl::FlushMemTable:BeforeWaitForBgFlush");
1840
+ if (s.ok() && flush_options.wait) {
1841
+ autovector<ColumnFamilyData*> cfds;
1842
+ autovector<const uint64_t*> flush_memtable_ids;
1843
+ assert(flush_reqs.size() == memtable_ids_to_wait.size());
1844
+ for (size_t i = 0; i < flush_reqs.size(); ++i) {
1845
+ assert(flush_reqs[i].size() == 1);
1846
+ cfds.push_back(flush_reqs[i][0].first);
1847
+ flush_memtable_ids.push_back(&(memtable_ids_to_wait[i]));
1848
+ }
1849
+ s = WaitForFlushMemTables(
1850
+ cfds, flush_memtable_ids,
1851
+ (flush_reason == FlushReason::kErrorRecovery ||
1852
+ flush_reason == FlushReason::kErrorRecoveryRetryFlush));
1853
+ InstrumentedMutexLock lock_guard(&mutex_);
1854
+ for (auto* tmp_cfd : cfds) {
1855
+ tmp_cfd->UnrefAndTryDelete();
1856
+ }
1857
+ }
1858
+ TEST_SYNC_POINT("DBImpl::FlushMemTable:FlushMemTableFinished");
1859
+ return s;
1860
+ }
1861
+
1862
+ // Flush all elements in 'column_family_datas'
1863
+ // and atomically record the result to the MANIFEST.
1864
+ Status DBImpl::AtomicFlushMemTables(
1865
+ const autovector<ColumnFamilyData*>& column_family_datas,
1866
+ const FlushOptions& flush_options, FlushReason flush_reason,
1867
+ bool writes_stopped) {
1868
+ Status s;
1869
+ if (!flush_options.allow_write_stall) {
1870
+ int num_cfs_to_flush = 0;
1871
+ for (auto cfd : column_family_datas) {
1872
+ bool flush_needed = true;
1873
+ s = WaitUntilFlushWouldNotStallWrites(cfd, &flush_needed);
1874
+ if (!s.ok()) {
1875
+ return s;
1876
+ } else if (flush_needed) {
1877
+ ++num_cfs_to_flush;
1878
+ }
1879
+ }
1880
+ if (0 == num_cfs_to_flush) {
1881
+ return s;
1882
+ }
1883
+ }
1884
+ FlushRequest flush_req;
1885
+ autovector<ColumnFamilyData*> cfds;
1886
+ {
1887
+ WriteContext context;
1888
+ InstrumentedMutexLock guard_lock(&mutex_);
1889
+
1890
+ WriteThread::Writer w;
1891
+ WriteThread::Writer nonmem_w;
1892
+ if (!writes_stopped) {
1893
+ write_thread_.EnterUnbatched(&w, &mutex_);
1894
+ if (two_write_queues_) {
1895
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1896
+ }
1897
+ }
1898
+ WaitForPendingWrites();
1899
+
1900
+ for (auto cfd : column_family_datas) {
1901
+ if (cfd->IsDropped()) {
1902
+ continue;
1903
+ }
1904
+ if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
1905
+ !cached_recoverable_state_empty_.load()) {
1906
+ cfds.emplace_back(cfd);
1907
+ }
1908
+ }
1909
+ for (auto cfd : cfds) {
1910
+ if ((cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) ||
1911
+ flush_reason == FlushReason::kErrorRecoveryRetryFlush) {
1912
+ continue;
1913
+ }
1914
+ cfd->Ref();
1915
+ s = SwitchMemtable(cfd, &context);
1916
+ cfd->UnrefAndTryDelete();
1917
+ if (!s.ok()) {
1918
+ break;
1919
+ }
1920
+ }
1921
+ if (s.ok()) {
1922
+ AssignAtomicFlushSeq(cfds);
1923
+ for (auto cfd : cfds) {
1924
+ cfd->imm()->FlushRequested();
1925
+ }
1926
+ // If the caller wants to wait for this flush to complete, it indicates
1927
+ // that the caller expects the ColumnFamilyData not to be free'ed by
1928
+ // other threads which may drop the column family concurrently.
1929
+ // Therefore, we increase the cfd's ref count.
1930
+ if (flush_options.wait) {
1931
+ for (auto cfd : cfds) {
1932
+ cfd->Ref();
1933
+ }
1934
+ }
1935
+ GenerateFlushRequest(cfds, &flush_req);
1936
+ SchedulePendingFlush(flush_req, flush_reason);
1937
+ MaybeScheduleFlushOrCompaction();
1938
+ }
1939
+
1940
+ if (!writes_stopped) {
1941
+ write_thread_.ExitUnbatched(&w);
1942
+ if (two_write_queues_) {
1943
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1944
+ }
1945
+ }
1946
+ }
1947
+ TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:AfterScheduleFlush");
1948
+ TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush");
1949
+ if (s.ok() && flush_options.wait) {
1950
+ autovector<const uint64_t*> flush_memtable_ids;
1951
+ for (auto& iter : flush_req) {
1952
+ flush_memtable_ids.push_back(&(iter.second));
1953
+ }
1954
+ s = WaitForFlushMemTables(
1955
+ cfds, flush_memtable_ids,
1956
+ (flush_reason == FlushReason::kErrorRecovery ||
1957
+ flush_reason == FlushReason::kErrorRecoveryRetryFlush));
1958
+ InstrumentedMutexLock lock_guard(&mutex_);
1959
+ for (auto* cfd : cfds) {
1960
+ cfd->UnrefAndTryDelete();
1961
+ }
1962
+ }
1963
+ return s;
1964
+ }
1965
+
1966
+ // Calling FlushMemTable(), whether from DB::Flush() or from Backup Engine, can
1967
+ // cause write stall, for example if one memtable is being flushed already.
1968
+ // This method tries to avoid write stall (similar to CompactRange() behavior)
1969
+ // it emulates how the SuperVersion / LSM would change if flush happens, checks
1970
+ // it against various constrains and delays flush if it'd cause write stall.
1971
+ // Called should check status and flush_needed to see if flush already happened.
1972
+ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
1973
+ bool* flush_needed) {
1974
+ {
1975
+ *flush_needed = true;
1976
+ InstrumentedMutexLock l(&mutex_);
1977
+ uint64_t orig_active_memtable_id = cfd->mem()->GetID();
1978
+ WriteStallCondition write_stall_condition = WriteStallCondition::kNormal;
1979
+ do {
1980
+ if (write_stall_condition != WriteStallCondition::kNormal) {
1981
+ // Same error handling as user writes: Don't wait if there's a
1982
+ // background error, even if it's a soft error. We might wait here
1983
+ // indefinitely as the pending flushes/compactions may never finish
1984
+ // successfully, resulting in the stall condition lasting indefinitely
1985
+ if (error_handler_.IsBGWorkStopped()) {
1986
+ return error_handler_.GetBGError();
1987
+ }
1988
+
1989
+ TEST_SYNC_POINT("DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait");
1990
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1991
+ "[%s] WaitUntilFlushWouldNotStallWrites"
1992
+ " waiting on stall conditions to clear",
1993
+ cfd->GetName().c_str());
1994
+ bg_cv_.Wait();
1995
+ }
1996
+ if (cfd->IsDropped()) {
1997
+ return Status::ColumnFamilyDropped();
1998
+ }
1999
+ if (shutting_down_.load(std::memory_order_acquire)) {
2000
+ return Status::ShutdownInProgress();
2001
+ }
2002
+
2003
+ uint64_t earliest_memtable_id =
2004
+ std::min(cfd->mem()->GetID(), cfd->imm()->GetEarliestMemTableID());
2005
+ if (earliest_memtable_id > orig_active_memtable_id) {
2006
+ // We waited so long that the memtable we were originally waiting on was
2007
+ // flushed.
2008
+ *flush_needed = false;
2009
+ return Status::OK();
2010
+ }
2011
+
2012
+ const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
2013
+ const auto* vstorage = cfd->current()->storage_info();
2014
+
2015
+ // Skip stalling check if we're below auto-flush and auto-compaction
2016
+ // triggers. If it stalled in these conditions, that'd mean the stall
2017
+ // triggers are so low that stalling is needed for any background work. In
2018
+ // that case we shouldn't wait since background work won't be scheduled.
2019
+ if (cfd->imm()->NumNotFlushed() <
2020
+ cfd->ioptions()->min_write_buffer_number_to_merge &&
2021
+ vstorage->l0_delay_trigger_count() <
2022
+ mutable_cf_options.level0_file_num_compaction_trigger) {
2023
+ break;
2024
+ }
2025
+
2026
+ // check whether one extra immutable memtable or an extra L0 file would
2027
+ // cause write stalling mode to be entered. It could still enter stall
2028
+ // mode due to pending compaction bytes, but that's less common
2029
+ write_stall_condition =
2030
+ ColumnFamilyData::GetWriteStallConditionAndCause(
2031
+ cfd->imm()->NumNotFlushed() + 1,
2032
+ vstorage->l0_delay_trigger_count() + 1,
2033
+ vstorage->estimated_compaction_needed_bytes(), mutable_cf_options)
2034
+ .first;
2035
+ } while (write_stall_condition != WriteStallCondition::kNormal);
2036
+ }
2037
+ return Status::OK();
2038
+ }
2039
+
2040
+ // Wait for memtables to be flushed for multiple column families.
2041
+ // let N = cfds.size()
2042
+ // for i in [0, N),
2043
+ // 1) if flush_memtable_ids[i] is not null, then the memtables with lower IDs
2044
+ // have to be flushed for THIS column family;
2045
+ // 2) if flush_memtable_ids[i] is null, then all memtables in THIS column
2046
+ // family have to be flushed.
2047
+ // Finish waiting when ALL column families finish flushing memtables.
2048
+ // resuming_from_bg_err indicates whether the caller is trying to resume from
2049
+ // background error or in normal processing.
2050
+ Status DBImpl::WaitForFlushMemTables(
2051
+ const autovector<ColumnFamilyData*>& cfds,
2052
+ const autovector<const uint64_t*>& flush_memtable_ids,
2053
+ bool resuming_from_bg_err) {
2054
+ int num = static_cast<int>(cfds.size());
2055
+ // Wait until the compaction completes
2056
+ InstrumentedMutexLock l(&mutex_);
2057
+ // If the caller is trying to resume from bg error, then
2058
+ // error_handler_.IsDBStopped() is true.
2059
+ while (resuming_from_bg_err || !error_handler_.IsDBStopped()) {
2060
+ if (shutting_down_.load(std::memory_order_acquire)) {
2061
+ return Status::ShutdownInProgress();
2062
+ }
2063
+ // If an error has occurred during resumption, then no need to wait.
2064
+ if (!error_handler_.GetRecoveryError().ok()) {
2065
+ break;
2066
+ }
2067
+ // If BGWorkStopped, which indicate that there is a BG error and
2068
+ // 1) soft error but requires no BG work, 2) no in auto_recovery_
2069
+ if (!resuming_from_bg_err && error_handler_.IsBGWorkStopped() &&
2070
+ error_handler_.GetBGError().severity() < Status::Severity::kHardError) {
2071
+ return error_handler_.GetBGError();
2072
+ }
2073
+
2074
+ // Number of column families that have been dropped.
2075
+ int num_dropped = 0;
2076
+ // Number of column families that have finished flush.
2077
+ int num_finished = 0;
2078
+ for (int i = 0; i < num; ++i) {
2079
+ if (cfds[i]->IsDropped()) {
2080
+ ++num_dropped;
2081
+ } else if (cfds[i]->imm()->NumNotFlushed() == 0 ||
2082
+ (flush_memtable_ids[i] != nullptr &&
2083
+ cfds[i]->imm()->GetEarliestMemTableID() >
2084
+ *flush_memtable_ids[i])) {
2085
+ ++num_finished;
2086
+ }
2087
+ }
2088
+ if (1 == num_dropped && 1 == num) {
2089
+ return Status::ColumnFamilyDropped();
2090
+ }
2091
+ // Column families involved in this flush request have either been dropped
2092
+ // or finished flush. Then it's time to finish waiting.
2093
+ if (num_dropped + num_finished == num) {
2094
+ break;
2095
+ }
2096
+ bg_cv_.Wait();
2097
+ }
2098
+ Status s;
2099
+ // If not resuming from bg error, and an error has caused the DB to stop,
2100
+ // then report the bg error to caller.
2101
+ if (!resuming_from_bg_err && error_handler_.IsDBStopped()) {
2102
+ s = error_handler_.GetBGError();
2103
+ }
2104
+ return s;
2105
+ }
2106
+
2107
+ Status DBImpl::EnableAutoCompaction(
2108
+ const std::vector<ColumnFamilyHandle*>& column_family_handles) {
2109
+ Status s;
2110
+ for (auto cf_ptr : column_family_handles) {
2111
+ Status status =
2112
+ this->SetOptions(cf_ptr, {{"disable_auto_compactions", "false"}});
2113
+ if (!status.ok()) {
2114
+ s = status;
2115
+ }
2116
+ }
2117
+
2118
+ return s;
2119
+ }
2120
+
2121
+ void DBImpl::DisableManualCompaction() {
2122
+ InstrumentedMutexLock l(&mutex_);
2123
+ manual_compaction_paused_.fetch_add(1, std::memory_order_release);
2124
+ // Wait for any pending manual compactions to finish (typically through
2125
+ // failing with `Status::Incomplete`) prior to returning. This way we are
2126
+ // guaranteed no pending manual compaction will commit while manual
2127
+ // compactions are "disabled".
2128
+ while (HasPendingManualCompaction()) {
2129
+ bg_cv_.Wait();
2130
+ }
2131
+ }
2132
+
2133
+ void DBImpl::EnableManualCompaction() {
2134
+ InstrumentedMutexLock l(&mutex_);
2135
+ assert(manual_compaction_paused_ > 0);
2136
+ manual_compaction_paused_.fetch_sub(1, std::memory_order_release);
2137
+ }
2138
+
2139
+ void DBImpl::MaybeScheduleFlushOrCompaction() {
2140
+ mutex_.AssertHeld();
2141
+ if (!opened_successfully_) {
2142
+ // Compaction may introduce data race to DB open
2143
+ return;
2144
+ }
2145
+ if (bg_work_paused_ > 0) {
2146
+ // we paused the background work
2147
+ return;
2148
+ } else if (error_handler_.IsBGWorkStopped() &&
2149
+ !error_handler_.IsRecoveryInProgress()) {
2150
+ // There has been a hard error and this call is not part of the recovery
2151
+ // sequence. Bail out here so we don't get into an endless loop of
2152
+ // scheduling BG work which will again call this function
2153
+ return;
2154
+ } else if (shutting_down_.load(std::memory_order_acquire)) {
2155
+ // DB is being deleted; no more background compactions
2156
+ return;
2157
+ }
2158
+ auto bg_job_limits = GetBGJobLimits();
2159
+ bool is_flush_pool_empty =
2160
+ env_->GetBackgroundThreads(Env::Priority::HIGH) == 0;
2161
+ while (!is_flush_pool_empty && unscheduled_flushes_ > 0 &&
2162
+ bg_flush_scheduled_ < bg_job_limits.max_flushes) {
2163
+ bg_flush_scheduled_++;
2164
+ FlushThreadArg* fta = new FlushThreadArg;
2165
+ fta->db_ = this;
2166
+ fta->thread_pri_ = Env::Priority::HIGH;
2167
+ env_->Schedule(&DBImpl::BGWorkFlush, fta, Env::Priority::HIGH, this,
2168
+ &DBImpl::UnscheduleFlushCallback);
2169
+ --unscheduled_flushes_;
2170
+ TEST_SYNC_POINT_CALLBACK(
2171
+ "DBImpl::MaybeScheduleFlushOrCompaction:AfterSchedule:0",
2172
+ &unscheduled_flushes_);
2173
+ }
2174
+
2175
+ // special case -- if high-pri (flush) thread pool is empty, then schedule
2176
+ // flushes in low-pri (compaction) thread pool.
2177
+ if (is_flush_pool_empty) {
2178
+ while (unscheduled_flushes_ > 0 &&
2179
+ bg_flush_scheduled_ + bg_compaction_scheduled_ <
2180
+ bg_job_limits.max_flushes) {
2181
+ bg_flush_scheduled_++;
2182
+ FlushThreadArg* fta = new FlushThreadArg;
2183
+ fta->db_ = this;
2184
+ fta->thread_pri_ = Env::Priority::LOW;
2185
+ env_->Schedule(&DBImpl::BGWorkFlush, fta, Env::Priority::LOW, this,
2186
+ &DBImpl::UnscheduleFlushCallback);
2187
+ --unscheduled_flushes_;
2188
+ }
2189
+ }
2190
+
2191
+ if (bg_compaction_paused_ > 0) {
2192
+ // we paused the background compaction
2193
+ return;
2194
+ } else if (error_handler_.IsBGWorkStopped()) {
2195
+ // Compaction is not part of the recovery sequence from a hard error. We
2196
+ // might get here because recovery might do a flush and install a new
2197
+ // super version, which will try to schedule pending compactions. Bail
2198
+ // out here and let the higher level recovery handle compactions
2199
+ return;
2200
+ }
2201
+
2202
+ if (HasExclusiveManualCompaction()) {
2203
+ // only manual compactions are allowed to run. don't schedule automatic
2204
+ // compactions
2205
+ TEST_SYNC_POINT("DBImpl::MaybeScheduleFlushOrCompaction:Conflict");
2206
+ return;
2207
+ }
2208
+
2209
+ while (bg_compaction_scheduled_ < bg_job_limits.max_compactions &&
2210
+ unscheduled_compactions_ > 0) {
2211
+ CompactionArg* ca = new CompactionArg;
2212
+ ca->db = this;
2213
+ ca->prepicked_compaction = nullptr;
2214
+ bg_compaction_scheduled_++;
2215
+ unscheduled_compactions_--;
2216
+ env_->Schedule(&DBImpl::BGWorkCompaction, ca, Env::Priority::LOW, this,
2217
+ &DBImpl::UnscheduleCompactionCallback);
2218
+ }
2219
+ }
2220
+
2221
+ DBImpl::BGJobLimits DBImpl::GetBGJobLimits() const {
2222
+ mutex_.AssertHeld();
2223
+ return GetBGJobLimits(mutable_db_options_.max_background_flushes,
2224
+ mutable_db_options_.max_background_compactions,
2225
+ mutable_db_options_.max_background_jobs,
2226
+ write_controller_.NeedSpeedupCompaction());
2227
+ }
2228
+
2229
+ DBImpl::BGJobLimits DBImpl::GetBGJobLimits(int max_background_flushes,
2230
+ int max_background_compactions,
2231
+ int max_background_jobs,
2232
+ bool parallelize_compactions) {
2233
+ BGJobLimits res;
2234
+ if (max_background_flushes == -1 && max_background_compactions == -1) {
2235
+ // for our first stab implementing max_background_jobs, simply allocate a
2236
+ // quarter of the threads to flushes.
2237
+ res.max_flushes = std::max(1, max_background_jobs / 4);
2238
+ res.max_compactions = std::max(1, max_background_jobs - res.max_flushes);
2239
+ } else {
2240
+ // compatibility code in case users haven't migrated to max_background_jobs,
2241
+ // which automatically computes flush/compaction limits
2242
+ res.max_flushes = std::max(1, max_background_flushes);
2243
+ res.max_compactions = std::max(1, max_background_compactions);
2244
+ }
2245
+ if (!parallelize_compactions) {
2246
+ // throttle background compactions until we deem necessary
2247
+ res.max_compactions = 1;
2248
+ }
2249
+ return res;
2250
+ }
2251
+
2252
+ void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) {
2253
+ assert(!cfd->queued_for_compaction());
2254
+ cfd->Ref();
2255
+ compaction_queue_.push_back(cfd);
2256
+ cfd->set_queued_for_compaction(true);
2257
+ }
2258
+
2259
+ ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() {
2260
+ assert(!compaction_queue_.empty());
2261
+ auto cfd = *compaction_queue_.begin();
2262
+ compaction_queue_.pop_front();
2263
+ assert(cfd->queued_for_compaction());
2264
+ cfd->set_queued_for_compaction(false);
2265
+ return cfd;
2266
+ }
2267
+
2268
+ DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() {
2269
+ assert(!flush_queue_.empty());
2270
+ FlushRequest flush_req = flush_queue_.front();
2271
+ flush_queue_.pop_front();
2272
+ if (!immutable_db_options_.atomic_flush) {
2273
+ assert(flush_req.size() == 1);
2274
+ }
2275
+ for (const auto& elem : flush_req) {
2276
+ if (!immutable_db_options_.atomic_flush) {
2277
+ ColumnFamilyData* cfd = elem.first;
2278
+ assert(cfd);
2279
+ assert(cfd->queued_for_flush());
2280
+ cfd->set_queued_for_flush(false);
2281
+ }
2282
+ }
2283
+ // TODO: need to unset flush reason?
2284
+ return flush_req;
2285
+ }
2286
+
2287
+ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
2288
+ std::unique_ptr<TaskLimiterToken>* token, LogBuffer* log_buffer) {
2289
+ assert(!compaction_queue_.empty());
2290
+ assert(*token == nullptr);
2291
+ autovector<ColumnFamilyData*> throttled_candidates;
2292
+ ColumnFamilyData* cfd = nullptr;
2293
+ while (!compaction_queue_.empty()) {
2294
+ auto first_cfd = *compaction_queue_.begin();
2295
+ compaction_queue_.pop_front();
2296
+ assert(first_cfd->queued_for_compaction());
2297
+ if (!RequestCompactionToken(first_cfd, false, token, log_buffer)) {
2298
+ throttled_candidates.push_back(first_cfd);
2299
+ continue;
2300
+ }
2301
+ cfd = first_cfd;
2302
+ cfd->set_queued_for_compaction(false);
2303
+ break;
2304
+ }
2305
+ // Add throttled compaction candidates back to queue in the original order.
2306
+ for (auto iter = throttled_candidates.rbegin();
2307
+ iter != throttled_candidates.rend(); ++iter) {
2308
+ compaction_queue_.push_front(*iter);
2309
+ }
2310
+ return cfd;
2311
+ }
2312
+
2313
+ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req,
2314
+ FlushReason flush_reason) {
2315
+ mutex_.AssertHeld();
2316
+ if (flush_req.empty()) {
2317
+ return;
2318
+ }
2319
+ if (!immutable_db_options_.atomic_flush) {
2320
+ // For the non-atomic flush case, we never schedule multiple column
2321
+ // families in the same flush request.
2322
+ assert(flush_req.size() == 1);
2323
+ ColumnFamilyData* cfd = flush_req[0].first;
2324
+ assert(cfd);
2325
+ if (!cfd->queued_for_flush() && cfd->imm()->IsFlushPending()) {
2326
+ cfd->Ref();
2327
+ cfd->set_queued_for_flush(true);
2328
+ cfd->SetFlushReason(flush_reason);
2329
+ ++unscheduled_flushes_;
2330
+ flush_queue_.push_back(flush_req);
2331
+ }
2332
+ } else {
2333
+ for (auto& iter : flush_req) {
2334
+ ColumnFamilyData* cfd = iter.first;
2335
+ cfd->Ref();
2336
+ cfd->SetFlushReason(flush_reason);
2337
+ }
2338
+ ++unscheduled_flushes_;
2339
+ flush_queue_.push_back(flush_req);
2340
+ }
2341
+ }
2342
+
2343
+ void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
2344
+ mutex_.AssertHeld();
2345
+ if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) {
2346
+ AddToCompactionQueue(cfd);
2347
+ ++unscheduled_compactions_;
2348
+ }
2349
+ }
2350
+
2351
+ void DBImpl::SchedulePendingPurge(std::string fname, std::string dir_to_sync,
2352
+ FileType type, uint64_t number, int job_id) {
2353
+ mutex_.AssertHeld();
2354
+ PurgeFileInfo file_info(fname, dir_to_sync, type, number, job_id);
2355
+ purge_files_.insert({{number, std::move(file_info)}});
2356
+ }
2357
+
2358
+ void DBImpl::BGWorkFlush(void* arg) {
2359
+ FlushThreadArg fta = *(reinterpret_cast<FlushThreadArg*>(arg));
2360
+ delete reinterpret_cast<FlushThreadArg*>(arg);
2361
+
2362
+ IOSTATS_SET_THREAD_POOL_ID(fta.thread_pri_);
2363
+ TEST_SYNC_POINT("DBImpl::BGWorkFlush");
2364
+ static_cast_with_check<DBImpl>(fta.db_)->BackgroundCallFlush(fta.thread_pri_);
2365
+ TEST_SYNC_POINT("DBImpl::BGWorkFlush:done");
2366
+ }
2367
+
2368
+ void DBImpl::BGWorkCompaction(void* arg) {
2369
+ CompactionArg ca = *(reinterpret_cast<CompactionArg*>(arg));
2370
+ delete reinterpret_cast<CompactionArg*>(arg);
2371
+ IOSTATS_SET_THREAD_POOL_ID(Env::Priority::LOW);
2372
+ TEST_SYNC_POINT("DBImpl::BGWorkCompaction");
2373
+ auto prepicked_compaction =
2374
+ static_cast<PrepickedCompaction*>(ca.prepicked_compaction);
2375
+ static_cast_with_check<DBImpl>(ca.db)->BackgroundCallCompaction(
2376
+ prepicked_compaction, Env::Priority::LOW);
2377
+ delete prepicked_compaction;
2378
+ }
2379
+
2380
+ void DBImpl::BGWorkBottomCompaction(void* arg) {
2381
+ CompactionArg ca = *(static_cast<CompactionArg*>(arg));
2382
+ delete static_cast<CompactionArg*>(arg);
2383
+ IOSTATS_SET_THREAD_POOL_ID(Env::Priority::BOTTOM);
2384
+ TEST_SYNC_POINT("DBImpl::BGWorkBottomCompaction");
2385
+ auto* prepicked_compaction = ca.prepicked_compaction;
2386
+ assert(prepicked_compaction && prepicked_compaction->compaction &&
2387
+ !prepicked_compaction->manual_compaction_state);
2388
+ ca.db->BackgroundCallCompaction(prepicked_compaction, Env::Priority::BOTTOM);
2389
+ delete prepicked_compaction;
2390
+ }
2391
+
2392
+ void DBImpl::BGWorkPurge(void* db) {
2393
+ IOSTATS_SET_THREAD_POOL_ID(Env::Priority::HIGH);
2394
+ TEST_SYNC_POINT("DBImpl::BGWorkPurge:start");
2395
+ reinterpret_cast<DBImpl*>(db)->BackgroundCallPurge();
2396
+ TEST_SYNC_POINT("DBImpl::BGWorkPurge:end");
2397
+ }
2398
+
2399
+ void DBImpl::UnscheduleCompactionCallback(void* arg) {
2400
+ CompactionArg ca = *(reinterpret_cast<CompactionArg*>(arg));
2401
+ delete reinterpret_cast<CompactionArg*>(arg);
2402
+ if (ca.prepicked_compaction != nullptr) {
2403
+ if (ca.prepicked_compaction->compaction != nullptr) {
2404
+ delete ca.prepicked_compaction->compaction;
2405
+ }
2406
+ delete ca.prepicked_compaction;
2407
+ }
2408
+ TEST_SYNC_POINT("DBImpl::UnscheduleCompactionCallback");
2409
+ }
2410
+
2411
+ void DBImpl::UnscheduleFlushCallback(void* arg) {
2412
+ delete reinterpret_cast<FlushThreadArg*>(arg);
2413
+ TEST_SYNC_POINT("DBImpl::UnscheduleFlushCallback");
2414
+ }
2415
+
2416
+ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
2417
+ LogBuffer* log_buffer, FlushReason* reason,
2418
+ Env::Priority thread_pri) {
2419
+ mutex_.AssertHeld();
2420
+
2421
+ Status status;
2422
+ *reason = FlushReason::kOthers;
2423
+ // If BG work is stopped due to an error, but a recovery is in progress,
2424
+ // that means this flush is part of the recovery. So allow it to go through
2425
+ if (!error_handler_.IsBGWorkStopped()) {
2426
+ if (shutting_down_.load(std::memory_order_acquire)) {
2427
+ status = Status::ShutdownInProgress();
2428
+ }
2429
+ } else if (!error_handler_.IsRecoveryInProgress()) {
2430
+ status = error_handler_.GetBGError();
2431
+ }
2432
+
2433
+ if (!status.ok()) {
2434
+ return status;
2435
+ }
2436
+
2437
+ autovector<BGFlushArg> bg_flush_args;
2438
+ std::vector<SuperVersionContext>& superversion_contexts =
2439
+ job_context->superversion_contexts;
2440
+ autovector<ColumnFamilyData*> column_families_not_to_flush;
2441
+ while (!flush_queue_.empty()) {
2442
+ // This cfd is already referenced
2443
+ const FlushRequest& flush_req = PopFirstFromFlushQueue();
2444
+ superversion_contexts.clear();
2445
+ superversion_contexts.reserve(flush_req.size());
2446
+
2447
+ for (const auto& iter : flush_req) {
2448
+ ColumnFamilyData* cfd = iter.first;
2449
+ if (cfd->IsDropped() || !cfd->imm()->IsFlushPending()) {
2450
+ // can't flush this CF, try next one
2451
+ column_families_not_to_flush.push_back(cfd);
2452
+ continue;
2453
+ }
2454
+ superversion_contexts.emplace_back(SuperVersionContext(true));
2455
+ bg_flush_args.emplace_back(cfd, iter.second,
2456
+ &(superversion_contexts.back()));
2457
+ }
2458
+ if (!bg_flush_args.empty()) {
2459
+ break;
2460
+ }
2461
+ }
2462
+
2463
+ if (!bg_flush_args.empty()) {
2464
+ auto bg_job_limits = GetBGJobLimits();
2465
+ for (const auto& arg : bg_flush_args) {
2466
+ ColumnFamilyData* cfd = arg.cfd_;
2467
+ ROCKS_LOG_BUFFER(
2468
+ log_buffer,
2469
+ "Calling FlushMemTableToOutputFile with column "
2470
+ "family [%s], flush slots available %d, compaction slots available "
2471
+ "%d, "
2472
+ "flush slots scheduled %d, compaction slots scheduled %d",
2473
+ cfd->GetName().c_str(), bg_job_limits.max_flushes,
2474
+ bg_job_limits.max_compactions, bg_flush_scheduled_,
2475
+ bg_compaction_scheduled_);
2476
+ }
2477
+ status = FlushMemTablesToOutputFiles(bg_flush_args, made_progress,
2478
+ job_context, log_buffer, thread_pri);
2479
+ TEST_SYNC_POINT("DBImpl::BackgroundFlush:BeforeFlush");
2480
+ // All the CFDs in the FlushReq must have the same flush reason, so just
2481
+ // grab the first one
2482
+ *reason = bg_flush_args[0].cfd_->GetFlushReason();
2483
+ for (auto& arg : bg_flush_args) {
2484
+ ColumnFamilyData* cfd = arg.cfd_;
2485
+ if (cfd->UnrefAndTryDelete()) {
2486
+ arg.cfd_ = nullptr;
2487
+ }
2488
+ }
2489
+ }
2490
+ for (auto cfd : column_families_not_to_flush) {
2491
+ cfd->UnrefAndTryDelete();
2492
+ }
2493
+ return status;
2494
+ }
2495
+
2496
+ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
2497
+ bool made_progress = false;
2498
+ JobContext job_context(next_job_id_.fetch_add(1), true);
2499
+
2500
+ TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:start");
2501
+
2502
+ LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
2503
+ immutable_db_options_.info_log.get());
2504
+ {
2505
+ InstrumentedMutexLock l(&mutex_);
2506
+ assert(bg_flush_scheduled_);
2507
+ num_running_flushes_++;
2508
+
2509
+ std::unique_ptr<std::list<uint64_t>::iterator>
2510
+ pending_outputs_inserted_elem(new std::list<uint64_t>::iterator(
2511
+ CaptureCurrentFileNumberInPendingOutputs()));
2512
+ FlushReason reason;
2513
+
2514
+ Status s = BackgroundFlush(&made_progress, &job_context, &log_buffer,
2515
+ &reason, thread_pri);
2516
+ if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() &&
2517
+ reason != FlushReason::kErrorRecovery) {
2518
+ // Wait a little bit before retrying background flush in
2519
+ // case this is an environmental problem and we do not want to
2520
+ // chew up resources for failed flushes for the duration of
2521
+ // the problem.
2522
+ uint64_t error_cnt =
2523
+ default_cf_internal_stats_->BumpAndGetBackgroundErrorCount();
2524
+ bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
2525
+ mutex_.Unlock();
2526
+ ROCKS_LOG_ERROR(immutable_db_options_.info_log,
2527
+ "Waiting after background flush error: %s"
2528
+ "Accumulated background error counts: %" PRIu64,
2529
+ s.ToString().c_str(), error_cnt);
2530
+ log_buffer.FlushBufferToLog();
2531
+ LogFlush(immutable_db_options_.info_log);
2532
+ env_->SleepForMicroseconds(1000000);
2533
+ mutex_.Lock();
2534
+ }
2535
+
2536
+ TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0");
2537
+ ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
2538
+
2539
+ // If flush failed, we want to delete all temporary files that we might have
2540
+ // created. Thus, we force full scan in FindObsoleteFiles()
2541
+ FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
2542
+ !s.IsColumnFamilyDropped());
2543
+ // delete unnecessary files if any, this is done outside the mutex
2544
+ if (job_context.HaveSomethingToClean() ||
2545
+ job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
2546
+ mutex_.Unlock();
2547
+ TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound");
2548
+ // Have to flush the info logs before bg_flush_scheduled_--
2549
+ // because if bg_flush_scheduled_ becomes 0 and the lock is
2550
+ // released, the deconstructor of DB can kick in and destroy all the
2551
+ // states of DB so info_log might not be available after that point.
2552
+ // It also applies to access other states that DB owns.
2553
+ log_buffer.FlushBufferToLog();
2554
+ if (job_context.HaveSomethingToDelete()) {
2555
+ PurgeObsoleteFiles(job_context);
2556
+ }
2557
+ job_context.Clean();
2558
+ mutex_.Lock();
2559
+ }
2560
+ TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp");
2561
+
2562
+ assert(num_running_flushes_ > 0);
2563
+ num_running_flushes_--;
2564
+ bg_flush_scheduled_--;
2565
+ // See if there's more work to be done
2566
+ MaybeScheduleFlushOrCompaction();
2567
+ atomic_flush_install_cv_.SignalAll();
2568
+ bg_cv_.SignalAll();
2569
+ // IMPORTANT: there should be no code after calling SignalAll. This call may
2570
+ // signal the DB destructor that it's OK to proceed with destruction. In
2571
+ // that case, all DB variables will be dealloacated and referencing them
2572
+ // will cause trouble.
2573
+ }
2574
+ }
2575
+
2576
+ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
2577
+ Env::Priority bg_thread_pri) {
2578
+ bool made_progress = false;
2579
+ JobContext job_context(next_job_id_.fetch_add(1), true);
2580
+ TEST_SYNC_POINT("BackgroundCallCompaction:0");
2581
+ LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
2582
+ immutable_db_options_.info_log.get());
2583
+ {
2584
+ InstrumentedMutexLock l(&mutex_);
2585
+
2586
+ // This call will unlock/lock the mutex to wait for current running
2587
+ // IngestExternalFile() calls to finish.
2588
+ WaitForIngestFile();
2589
+
2590
+ num_running_compactions_++;
2591
+
2592
+ std::unique_ptr<std::list<uint64_t>::iterator>
2593
+ pending_outputs_inserted_elem(new std::list<uint64_t>::iterator(
2594
+ CaptureCurrentFileNumberInPendingOutputs()));
2595
+
2596
+ assert((bg_thread_pri == Env::Priority::BOTTOM &&
2597
+ bg_bottom_compaction_scheduled_) ||
2598
+ (bg_thread_pri == Env::Priority::LOW && bg_compaction_scheduled_));
2599
+ Status s = BackgroundCompaction(&made_progress, &job_context, &log_buffer,
2600
+ prepicked_compaction, bg_thread_pri);
2601
+ TEST_SYNC_POINT("BackgroundCallCompaction:1");
2602
+ if (s.IsBusy()) {
2603
+ bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
2604
+ mutex_.Unlock();
2605
+ env_->SleepForMicroseconds(10000); // prevent hot loop
2606
+ mutex_.Lock();
2607
+ } else if (!s.ok() && !s.IsShutdownInProgress() &&
2608
+ !s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) {
2609
+ // Wait a little bit before retrying background compaction in
2610
+ // case this is an environmental problem and we do not want to
2611
+ // chew up resources for failed compactions for the duration of
2612
+ // the problem.
2613
+ uint64_t error_cnt =
2614
+ default_cf_internal_stats_->BumpAndGetBackgroundErrorCount();
2615
+ bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
2616
+ mutex_.Unlock();
2617
+ log_buffer.FlushBufferToLog();
2618
+ ROCKS_LOG_ERROR(immutable_db_options_.info_log,
2619
+ "Waiting after background compaction error: %s, "
2620
+ "Accumulated background error counts: %" PRIu64,
2621
+ s.ToString().c_str(), error_cnt);
2622
+ LogFlush(immutable_db_options_.info_log);
2623
+ env_->SleepForMicroseconds(1000000);
2624
+ mutex_.Lock();
2625
+ } else if (s.IsManualCompactionPaused()) {
2626
+ ManualCompactionState* m = prepicked_compaction->manual_compaction_state;
2627
+ assert(m);
2628
+ ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused",
2629
+ m->cfd->GetName().c_str(), job_context.job_id);
2630
+ }
2631
+
2632
+ ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
2633
+
2634
+ // If compaction failed, we want to delete all temporary files that we might
2635
+ // have created (they might not be all recorded in job_context in case of a
2636
+ // failure). Thus, we force full scan in FindObsoleteFiles()
2637
+ FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
2638
+ !s.IsManualCompactionPaused() &&
2639
+ !s.IsColumnFamilyDropped() &&
2640
+ !s.IsBusy());
2641
+ TEST_SYNC_POINT("DBImpl::BackgroundCallCompaction:FoundObsoleteFiles");
2642
+
2643
+ // delete unnecessary files if any, this is done outside the mutex
2644
+ if (job_context.HaveSomethingToClean() ||
2645
+ job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
2646
+ mutex_.Unlock();
2647
+ // Have to flush the info logs before bg_compaction_scheduled_--
2648
+ // because if bg_flush_scheduled_ becomes 0 and the lock is
2649
+ // released, the deconstructor of DB can kick in and destroy all the
2650
+ // states of DB so info_log might not be available after that point.
2651
+ // It also applies to access other states that DB owns.
2652
+ log_buffer.FlushBufferToLog();
2653
+ if (job_context.HaveSomethingToDelete()) {
2654
+ PurgeObsoleteFiles(job_context);
2655
+ TEST_SYNC_POINT("DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles");
2656
+ }
2657
+ job_context.Clean();
2658
+ mutex_.Lock();
2659
+ }
2660
+
2661
+ assert(num_running_compactions_ > 0);
2662
+ num_running_compactions_--;
2663
+ if (bg_thread_pri == Env::Priority::LOW) {
2664
+ bg_compaction_scheduled_--;
2665
+ } else {
2666
+ assert(bg_thread_pri == Env::Priority::BOTTOM);
2667
+ bg_bottom_compaction_scheduled_--;
2668
+ }
2669
+
2670
+ versions_->GetColumnFamilySet()->FreeDeadColumnFamilies();
2671
+
2672
+ // See if there's more work to be done
2673
+ MaybeScheduleFlushOrCompaction();
2674
+ if (made_progress ||
2675
+ (bg_compaction_scheduled_ == 0 &&
2676
+ bg_bottom_compaction_scheduled_ == 0) ||
2677
+ HasPendingManualCompaction() || unscheduled_compactions_ == 0) {
2678
+ // signal if
2679
+ // * made_progress -- need to wakeup DelayWrite
2680
+ // * bg_{bottom,}_compaction_scheduled_ == 0 -- need to wakeup ~DBImpl
2681
+ // * HasPendingManualCompaction -- need to wakeup RunManualCompaction
2682
+ // If none of this is true, there is no need to signal since nobody is
2683
+ // waiting for it
2684
+ bg_cv_.SignalAll();
2685
+ }
2686
+ // IMPORTANT: there should be no code after calling SignalAll. This call may
2687
+ // signal the DB destructor that it's OK to proceed with destruction. In
2688
+ // that case, all DB variables will be dealloacated and referencing them
2689
+ // will cause trouble.
2690
+ }
2691
+ }
2692
+
2693
+ Status DBImpl::BackgroundCompaction(bool* made_progress,
2694
+ JobContext* job_context,
2695
+ LogBuffer* log_buffer,
2696
+ PrepickedCompaction* prepicked_compaction,
2697
+ Env::Priority thread_pri) {
2698
+ ManualCompactionState* manual_compaction =
2699
+ prepicked_compaction == nullptr
2700
+ ? nullptr
2701
+ : prepicked_compaction->manual_compaction_state;
2702
+ *made_progress = false;
2703
+ mutex_.AssertHeld();
2704
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Start");
2705
+
2706
+ bool is_manual = (manual_compaction != nullptr);
2707
+ std::unique_ptr<Compaction> c;
2708
+ if (prepicked_compaction != nullptr &&
2709
+ prepicked_compaction->compaction != nullptr) {
2710
+ c.reset(prepicked_compaction->compaction);
2711
+ }
2712
+ bool is_prepicked = is_manual || c;
2713
+
2714
+ // (manual_compaction->in_progress == false);
2715
+ bool trivial_move_disallowed =
2716
+ is_manual && manual_compaction->disallow_trivial_move;
2717
+
2718
+ CompactionJobStats compaction_job_stats;
2719
+ Status status;
2720
+ if (!error_handler_.IsBGWorkStopped()) {
2721
+ if (shutting_down_.load(std::memory_order_acquire)) {
2722
+ status = Status::ShutdownInProgress();
2723
+ } else if (is_manual &&
2724
+ manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
2725
+ status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
2726
+ }
2727
+ } else {
2728
+ status = error_handler_.GetBGError();
2729
+ // If we get here, it means a hard error happened after this compaction
2730
+ // was scheduled by MaybeScheduleFlushOrCompaction(), but before it got
2731
+ // a chance to execute. Since we didn't pop a cfd from the compaction
2732
+ // queue, increment unscheduled_compactions_
2733
+ unscheduled_compactions_++;
2734
+ }
2735
+
2736
+ if (!status.ok()) {
2737
+ if (is_manual) {
2738
+ manual_compaction->status = status;
2739
+ manual_compaction->done = true;
2740
+ manual_compaction->in_progress = false;
2741
+ manual_compaction = nullptr;
2742
+ }
2743
+ if (c) {
2744
+ c->ReleaseCompactionFiles(status);
2745
+ c.reset();
2746
+ }
2747
+ return status;
2748
+ }
2749
+
2750
+ if (is_manual) {
2751
+ // another thread cannot pick up the same work
2752
+ manual_compaction->in_progress = true;
2753
+ }
2754
+
2755
+ std::unique_ptr<TaskLimiterToken> task_token;
2756
+
2757
+ // InternalKey manual_end_storage;
2758
+ // InternalKey* manual_end = &manual_end_storage;
2759
+ bool sfm_reserved_compact_space = false;
2760
+ if (is_manual) {
2761
+ ManualCompactionState* m = manual_compaction;
2762
+ assert(m->in_progress);
2763
+ if (!c) {
2764
+ m->done = true;
2765
+ m->manual_end = nullptr;
2766
+ ROCKS_LOG_BUFFER(
2767
+ log_buffer,
2768
+ "[%s] Manual compaction from level-%d from %s .. "
2769
+ "%s; nothing to do\n",
2770
+ m->cfd->GetName().c_str(), m->input_level,
2771
+ (m->begin ? m->begin->DebugString(true).c_str() : "(begin)"),
2772
+ (m->end ? m->end->DebugString(true).c_str() : "(end)"));
2773
+ } else {
2774
+ // First check if we have enough room to do the compaction
2775
+ bool enough_room = EnoughRoomForCompaction(
2776
+ m->cfd, *(c->inputs()), &sfm_reserved_compact_space, log_buffer);
2777
+
2778
+ if (!enough_room) {
2779
+ // Then don't do the compaction
2780
+ c->ReleaseCompactionFiles(status);
2781
+ c.reset();
2782
+ // m's vars will get set properly at the end of this function,
2783
+ // as long as status == CompactionTooLarge
2784
+ status = Status::CompactionTooLarge();
2785
+ } else {
2786
+ ROCKS_LOG_BUFFER(
2787
+ log_buffer,
2788
+ "[%s] Manual compaction from level-%d to level-%d from %s .. "
2789
+ "%s; will stop at %s\n",
2790
+ m->cfd->GetName().c_str(), m->input_level, c->output_level(),
2791
+ (m->begin ? m->begin->DebugString(true).c_str() : "(begin)"),
2792
+ (m->end ? m->end->DebugString(true).c_str() : "(end)"),
2793
+ ((m->done || m->manual_end == nullptr)
2794
+ ? "(end)"
2795
+ : m->manual_end->DebugString(true).c_str()));
2796
+ }
2797
+ }
2798
+ } else if (!is_prepicked && !compaction_queue_.empty()) {
2799
+ if (HasExclusiveManualCompaction()) {
2800
+ // Can't compact right now, but try again later
2801
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction()::Conflict");
2802
+
2803
+ // Stay in the compaction queue.
2804
+ unscheduled_compactions_++;
2805
+
2806
+ return Status::OK();
2807
+ }
2808
+
2809
+ auto cfd = PickCompactionFromQueue(&task_token, log_buffer);
2810
+ if (cfd == nullptr) {
2811
+ // Can't find any executable task from the compaction queue.
2812
+ // All tasks have been throttled by compaction thread limiter.
2813
+ ++unscheduled_compactions_;
2814
+ return Status::Busy();
2815
+ }
2816
+
2817
+ // We unreference here because the following code will take a Ref() on
2818
+ // this cfd if it is going to use it (Compaction class holds a
2819
+ // reference).
2820
+ // This will all happen under a mutex so we don't have to be afraid of
2821
+ // somebody else deleting it.
2822
+ if (cfd->UnrefAndTryDelete()) {
2823
+ // This was the last reference of the column family, so no need to
2824
+ // compact.
2825
+ return Status::OK();
2826
+ }
2827
+
2828
+ // Pick up latest mutable CF Options and use it throughout the
2829
+ // compaction job
2830
+ // Compaction makes a copy of the latest MutableCFOptions. It should be used
2831
+ // throughout the compaction procedure to make sure consistency. It will
2832
+ // eventually be installed into SuperVersion
2833
+ auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
2834
+ if (!mutable_cf_options->disable_auto_compactions && !cfd->IsDropped()) {
2835
+ // NOTE: try to avoid unnecessary copy of MutableCFOptions if
2836
+ // compaction is not necessary. Need to make sure mutex is held
2837
+ // until we make a copy in the following code
2838
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction():BeforePickCompaction");
2839
+ c.reset(cfd->PickCompaction(*mutable_cf_options, mutable_db_options_,
2840
+ log_buffer));
2841
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction():AfterPickCompaction");
2842
+
2843
+ if (c != nullptr) {
2844
+ bool enough_room = EnoughRoomForCompaction(
2845
+ cfd, *(c->inputs()), &sfm_reserved_compact_space, log_buffer);
2846
+
2847
+ if (!enough_room) {
2848
+ // Then don't do the compaction
2849
+ c->ReleaseCompactionFiles(status);
2850
+ c->column_family_data()
2851
+ ->current()
2852
+ ->storage_info()
2853
+ ->ComputeCompactionScore(*(c->immutable_cf_options()),
2854
+ *(c->mutable_cf_options()));
2855
+ AddToCompactionQueue(cfd);
2856
+ ++unscheduled_compactions_;
2857
+
2858
+ c.reset();
2859
+ // Don't need to sleep here, because BackgroundCallCompaction
2860
+ // will sleep if !s.ok()
2861
+ status = Status::CompactionTooLarge();
2862
+ } else {
2863
+ // update statistics
2864
+ RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION,
2865
+ c->inputs(0)->size());
2866
+ // There are three things that can change compaction score:
2867
+ // 1) When flush or compaction finish. This case is covered by
2868
+ // InstallSuperVersionAndScheduleWork
2869
+ // 2) When MutableCFOptions changes. This case is also covered by
2870
+ // InstallSuperVersionAndScheduleWork, because this is when the new
2871
+ // options take effect.
2872
+ // 3) When we Pick a new compaction, we "remove" those files being
2873
+ // compacted from the calculation, which then influences compaction
2874
+ // score. Here we check if we need the new compaction even without the
2875
+ // files that are currently being compacted. If we need another
2876
+ // compaction, we might be able to execute it in parallel, so we add
2877
+ // it to the queue and schedule a new thread.
2878
+ if (cfd->NeedsCompaction()) {
2879
+ // Yes, we need more compactions!
2880
+ AddToCompactionQueue(cfd);
2881
+ ++unscheduled_compactions_;
2882
+ MaybeScheduleFlushOrCompaction();
2883
+ }
2884
+ }
2885
+ }
2886
+ }
2887
+ }
2888
+
2889
+ IOStatus io_s;
2890
+ if (!c) {
2891
+ // Nothing to do
2892
+ ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do");
2893
+ } else if (c->deletion_compaction()) {
2894
+ // TODO(icanadi) Do we want to honor snapshots here? i.e. not delete old
2895
+ // file if there is alive snapshot pointing to it
2896
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction",
2897
+ c->column_family_data());
2898
+ assert(c->num_input_files(1) == 0);
2899
+ assert(c->level() == 0);
2900
+ assert(c->column_family_data()->ioptions()->compaction_style ==
2901
+ kCompactionStyleFIFO);
2902
+
2903
+ compaction_job_stats.num_input_files = c->num_input_files(0);
2904
+
2905
+ NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
2906
+ compaction_job_stats, job_context->job_id);
2907
+
2908
+ for (const auto& f : *c->inputs(0)) {
2909
+ c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
2910
+ }
2911
+ status = versions_->LogAndApply(c->column_family_data(),
2912
+ *c->mutable_cf_options(), c->edit(),
2913
+ &mutex_, directories_.GetDbDir());
2914
+ io_s = versions_->io_status();
2915
+ InstallSuperVersionAndScheduleWork(c->column_family_data(),
2916
+ &job_context->superversion_contexts[0],
2917
+ *c->mutable_cf_options());
2918
+ ROCKS_LOG_BUFFER(log_buffer, "[%s] Deleted %d files\n",
2919
+ c->column_family_data()->GetName().c_str(),
2920
+ c->num_input_files(0));
2921
+ *made_progress = true;
2922
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
2923
+ c->column_family_data());
2924
+ } else if (!trivial_move_disallowed && c->IsTrivialMove()) {
2925
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction:TrivialMove");
2926
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction",
2927
+ c->column_family_data());
2928
+ // Instrument for event update
2929
+ // TODO(yhchiang): add op details for showing trivial-move.
2930
+ ThreadStatusUtil::SetColumnFamily(
2931
+ c->column_family_data(), c->column_family_data()->ioptions()->env,
2932
+ immutable_db_options_.enable_thread_tracking);
2933
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION);
2934
+
2935
+ compaction_job_stats.num_input_files = c->num_input_files(0);
2936
+
2937
+ NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
2938
+ compaction_job_stats, job_context->job_id);
2939
+
2940
+ // Move files to next level
2941
+ int32_t moved_files = 0;
2942
+ int64_t moved_bytes = 0;
2943
+ for (unsigned int l = 0; l < c->num_input_levels(); l++) {
2944
+ if (c->level(l) == c->output_level()) {
2945
+ continue;
2946
+ }
2947
+ for (size_t i = 0; i < c->num_input_files(l); i++) {
2948
+ FileMetaData* f = c->input(l, i);
2949
+ c->edit()->DeleteFile(c->level(l), f->fd.GetNumber());
2950
+ c->edit()->AddFile(c->output_level(), f->fd.GetNumber(),
2951
+ f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest,
2952
+ f->largest, f->fd.smallest_seqno,
2953
+ f->fd.largest_seqno, f->marked_for_compaction,
2954
+ f->oldest_blob_file_number, f->oldest_ancester_time,
2955
+ f->file_creation_time, f->file_checksum,
2956
+ f->file_checksum_func_name);
2957
+
2958
+ ROCKS_LOG_BUFFER(
2959
+ log_buffer,
2960
+ "[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n",
2961
+ c->column_family_data()->GetName().c_str(), f->fd.GetNumber(),
2962
+ c->output_level(), f->fd.GetFileSize());
2963
+ ++moved_files;
2964
+ moved_bytes += f->fd.GetFileSize();
2965
+ }
2966
+ }
2967
+
2968
+ status = versions_->LogAndApply(c->column_family_data(),
2969
+ *c->mutable_cf_options(), c->edit(),
2970
+ &mutex_, directories_.GetDbDir());
2971
+ io_s = versions_->io_status();
2972
+ // Use latest MutableCFOptions
2973
+ InstallSuperVersionAndScheduleWork(c->column_family_data(),
2974
+ &job_context->superversion_contexts[0],
2975
+ *c->mutable_cf_options());
2976
+
2977
+ VersionStorageInfo::LevelSummaryStorage tmp;
2978
+ c->column_family_data()->internal_stats()->IncBytesMoved(c->output_level(),
2979
+ moved_bytes);
2980
+ {
2981
+ event_logger_.LogToBuffer(log_buffer)
2982
+ << "job" << job_context->job_id << "event"
2983
+ << "trivial_move"
2984
+ << "destination_level" << c->output_level() << "files" << moved_files
2985
+ << "total_files_size" << moved_bytes;
2986
+ }
2987
+ ROCKS_LOG_BUFFER(
2988
+ log_buffer,
2989
+ "[%s] Moved #%d files to level-%d %" PRIu64 " bytes %s: %s\n",
2990
+ c->column_family_data()->GetName().c_str(), moved_files,
2991
+ c->output_level(), moved_bytes, status.ToString().c_str(),
2992
+ c->column_family_data()->current()->storage_info()->LevelSummary(&tmp));
2993
+ *made_progress = true;
2994
+
2995
+ // Clear Instrument
2996
+ ThreadStatusUtil::ResetThreadStatus();
2997
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
2998
+ c->column_family_data());
2999
+ } else if (!is_prepicked && c->output_level() > 0 &&
3000
+ c->output_level() ==
3001
+ c->column_family_data()
3002
+ ->current()
3003
+ ->storage_info()
3004
+ ->MaxOutputLevel(
3005
+ immutable_db_options_.allow_ingest_behind) &&
3006
+ env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
3007
+ // Forward compactions involving last level to the bottom pool if it exists,
3008
+ // such that compactions unlikely to contribute to write stalls can be
3009
+ // delayed or deprioritized.
3010
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction:ForwardToBottomPriPool");
3011
+ CompactionArg* ca = new CompactionArg;
3012
+ ca->db = this;
3013
+ ca->prepicked_compaction = new PrepickedCompaction;
3014
+ ca->prepicked_compaction->compaction = c.release();
3015
+ ca->prepicked_compaction->manual_compaction_state = nullptr;
3016
+ // Transfer requested token, so it doesn't need to do it again.
3017
+ ca->prepicked_compaction->task_token = std::move(task_token);
3018
+ ++bg_bottom_compaction_scheduled_;
3019
+ env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca, Env::Priority::BOTTOM,
3020
+ this, &DBImpl::UnscheduleCompactionCallback);
3021
+ } else {
3022
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction",
3023
+ c->column_family_data());
3024
+ int output_level __attribute__((__unused__));
3025
+ output_level = c->output_level();
3026
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:NonTrivial",
3027
+ &output_level);
3028
+ std::vector<SequenceNumber> snapshot_seqs;
3029
+ SequenceNumber earliest_write_conflict_snapshot;
3030
+ SnapshotChecker* snapshot_checker;
3031
+ GetSnapshotContext(job_context, &snapshot_seqs,
3032
+ &earliest_write_conflict_snapshot, &snapshot_checker);
3033
+ assert(is_snapshot_supported_ || snapshots_.empty());
3034
+ CompactionJob compaction_job(
3035
+ job_context->job_id, c.get(), immutable_db_options_,
3036
+ file_options_for_compaction_, versions_.get(), &shutting_down_,
3037
+ preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
3038
+ GetDataDir(c->column_family_data(), c->output_path_id()),
3039
+ GetDataDir(c->column_family_data(), 0), stats_, &mutex_,
3040
+ &error_handler_, snapshot_seqs, earliest_write_conflict_snapshot,
3041
+ snapshot_checker, table_cache_, &event_logger_,
3042
+ c->mutable_cf_options()->paranoid_file_checks,
3043
+ c->mutable_cf_options()->report_bg_io_stats, dbname_,
3044
+ &compaction_job_stats, thread_pri, io_tracer_,
3045
+ is_manual ? &manual_compaction_paused_ : nullptr, db_id_,
3046
+ db_session_id_, c->column_family_data()->GetFullHistoryTsLow());
3047
+ compaction_job.Prepare();
3048
+
3049
+ NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
3050
+ compaction_job_stats, job_context->job_id);
3051
+ mutex_.Unlock();
3052
+ TEST_SYNC_POINT_CALLBACK(
3053
+ "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", nullptr);
3054
+ // Should handle erorr?
3055
+ compaction_job.Run().PermitUncheckedError();
3056
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
3057
+ mutex_.Lock();
3058
+
3059
+ status = compaction_job.Install(*c->mutable_cf_options());
3060
+ io_s = compaction_job.io_status();
3061
+ if (status.ok()) {
3062
+ InstallSuperVersionAndScheduleWork(c->column_family_data(),
3063
+ &job_context->superversion_contexts[0],
3064
+ *c->mutable_cf_options());
3065
+ }
3066
+ *made_progress = true;
3067
+ TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
3068
+ c->column_family_data());
3069
+ }
3070
+
3071
+ if (status.ok() && !io_s.ok()) {
3072
+ status = io_s;
3073
+ } else {
3074
+ io_s.PermitUncheckedError();
3075
+ }
3076
+
3077
+ if (c != nullptr) {
3078
+ c->ReleaseCompactionFiles(status);
3079
+ *made_progress = true;
3080
+
3081
+ #ifndef ROCKSDB_LITE
3082
+ // Need to make sure SstFileManager does its bookkeeping
3083
+ auto sfm = static_cast<SstFileManagerImpl*>(
3084
+ immutable_db_options_.sst_file_manager.get());
3085
+ if (sfm && sfm_reserved_compact_space) {
3086
+ sfm->OnCompactionCompletion(c.get());
3087
+ }
3088
+ #endif // ROCKSDB_LITE
3089
+
3090
+ NotifyOnCompactionCompleted(c->column_family_data(), c.get(), status,
3091
+ compaction_job_stats, job_context->job_id);
3092
+ }
3093
+
3094
+ if (status.ok() || status.IsCompactionTooLarge() ||
3095
+ status.IsManualCompactionPaused()) {
3096
+ // Done
3097
+ } else if (status.IsColumnFamilyDropped() || status.IsShutdownInProgress()) {
3098
+ // Ignore compaction errors found during shutting down
3099
+ } else {
3100
+ ROCKS_LOG_WARN(immutable_db_options_.info_log, "Compaction error: %s",
3101
+ status.ToString().c_str());
3102
+ if (!io_s.ok()) {
3103
+ // Error while writing to MANIFEST.
3104
+ // In fact, versions_->io_status() can also be the result of renaming
3105
+ // CURRENT file. With current code, it's just difficult to tell. So just
3106
+ // be pessimistic and try write to a new MANIFEST.
3107
+ // TODO: distinguish between MANIFEST write and CURRENT renaming
3108
+ auto err_reason = versions_->io_status().ok()
3109
+ ? BackgroundErrorReason::kCompaction
3110
+ : BackgroundErrorReason::kManifestWrite;
3111
+ error_handler_.SetBGError(io_s, err_reason);
3112
+ } else {
3113
+ error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
3114
+ }
3115
+ if (c != nullptr && !is_manual && !error_handler_.IsBGWorkStopped()) {
3116
+ // Put this cfd back in the compaction queue so we can retry after some
3117
+ // time
3118
+ auto cfd = c->column_family_data();
3119
+ assert(cfd != nullptr);
3120
+ // Since this compaction failed, we need to recompute the score so it
3121
+ // takes the original input files into account
3122
+ c->column_family_data()
3123
+ ->current()
3124
+ ->storage_info()
3125
+ ->ComputeCompactionScore(*(c->immutable_cf_options()),
3126
+ *(c->mutable_cf_options()));
3127
+ if (!cfd->queued_for_compaction()) {
3128
+ AddToCompactionQueue(cfd);
3129
+ ++unscheduled_compactions_;
3130
+ }
3131
+ }
3132
+ }
3133
+ // this will unref its input_version and column_family_data
3134
+ c.reset();
3135
+
3136
+ if (is_manual) {
3137
+ ManualCompactionState* m = manual_compaction;
3138
+ if (!status.ok()) {
3139
+ m->status = status;
3140
+ m->done = true;
3141
+ }
3142
+ // For universal compaction:
3143
+ // Because universal compaction always happens at level 0, so one
3144
+ // compaction will pick up all overlapped files. No files will be
3145
+ // filtered out due to size limit and left for a successive compaction.
3146
+ // So we can safely conclude the current compaction.
3147
+ //
3148
+ // Also note that, if we don't stop here, then the current compaction
3149
+ // writes a new file back to level 0, which will be used in successive
3150
+ // compaction. Hence the manual compaction will never finish.
3151
+ //
3152
+ // Stop the compaction if manual_end points to nullptr -- this means
3153
+ // that we compacted the whole range. manual_end should always point
3154
+ // to nullptr in case of universal compaction
3155
+ if (m->manual_end == nullptr) {
3156
+ m->done = true;
3157
+ }
3158
+ if (!m->done) {
3159
+ // We only compacted part of the requested range. Update *m
3160
+ // to the range that is left to be compacted.
3161
+ // Universal and FIFO compactions should always compact the whole range
3162
+ assert(m->cfd->ioptions()->compaction_style !=
3163
+ kCompactionStyleUniversal ||
3164
+ m->cfd->ioptions()->num_levels > 1);
3165
+ assert(m->cfd->ioptions()->compaction_style != kCompactionStyleFIFO);
3166
+ m->tmp_storage = *m->manual_end;
3167
+ m->begin = &m->tmp_storage;
3168
+ m->incomplete = true;
3169
+ }
3170
+ m->in_progress = false; // not being processed anymore
3171
+ }
3172
+ TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Finish");
3173
+ return status;
3174
+ }
3175
+
3176
+ bool DBImpl::HasPendingManualCompaction() {
3177
+ return (!manual_compaction_dequeue_.empty());
3178
+ }
3179
+
3180
+ void DBImpl::AddManualCompaction(DBImpl::ManualCompactionState* m) {
3181
+ manual_compaction_dequeue_.push_back(m);
3182
+ }
3183
+
3184
+ void DBImpl::RemoveManualCompaction(DBImpl::ManualCompactionState* m) {
3185
+ // Remove from queue
3186
+ std::deque<ManualCompactionState*>::iterator it =
3187
+ manual_compaction_dequeue_.begin();
3188
+ while (it != manual_compaction_dequeue_.end()) {
3189
+ if (m == (*it)) {
3190
+ it = manual_compaction_dequeue_.erase(it);
3191
+ return;
3192
+ }
3193
+ ++it;
3194
+ }
3195
+ assert(false);
3196
+ return;
3197
+ }
3198
+
3199
+ bool DBImpl::ShouldntRunManualCompaction(ManualCompactionState* m) {
3200
+ if (num_running_ingest_file_ > 0) {
3201
+ // We need to wait for other IngestExternalFile() calls to finish
3202
+ // before running a manual compaction.
3203
+ return true;
3204
+ }
3205
+ if (m->exclusive) {
3206
+ return (bg_bottom_compaction_scheduled_ > 0 ||
3207
+ bg_compaction_scheduled_ > 0);
3208
+ }
3209
+ std::deque<ManualCompactionState*>::iterator it =
3210
+ manual_compaction_dequeue_.begin();
3211
+ bool seen = false;
3212
+ while (it != manual_compaction_dequeue_.end()) {
3213
+ if (m == (*it)) {
3214
+ ++it;
3215
+ seen = true;
3216
+ continue;
3217
+ } else if (MCOverlap(m, (*it)) && (!seen && !(*it)->in_progress)) {
3218
+ // Consider the other manual compaction *it, conflicts if:
3219
+ // overlaps with m
3220
+ // and (*it) is ahead in the queue and is not yet in progress
3221
+ return true;
3222
+ }
3223
+ ++it;
3224
+ }
3225
+ return false;
3226
+ }
3227
+
3228
+ bool DBImpl::HaveManualCompaction(ColumnFamilyData* cfd) {
3229
+ // Remove from priority queue
3230
+ std::deque<ManualCompactionState*>::iterator it =
3231
+ manual_compaction_dequeue_.begin();
3232
+ while (it != manual_compaction_dequeue_.end()) {
3233
+ if ((*it)->exclusive) {
3234
+ return true;
3235
+ }
3236
+ if ((cfd == (*it)->cfd) && (!((*it)->in_progress || (*it)->done))) {
3237
+ // Allow automatic compaction if manual compaction is
3238
+ // in progress
3239
+ return true;
3240
+ }
3241
+ ++it;
3242
+ }
3243
+ return false;
3244
+ }
3245
+
3246
+ bool DBImpl::HasExclusiveManualCompaction() {
3247
+ // Remove from priority queue
3248
+ std::deque<ManualCompactionState*>::iterator it =
3249
+ manual_compaction_dequeue_.begin();
3250
+ while (it != manual_compaction_dequeue_.end()) {
3251
+ if ((*it)->exclusive) {
3252
+ return true;
3253
+ }
3254
+ ++it;
3255
+ }
3256
+ return false;
3257
+ }
3258
+
3259
+ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
3260
+ if ((m->exclusive) || (m1->exclusive)) {
3261
+ return true;
3262
+ }
3263
+ if (m->cfd != m1->cfd) {
3264
+ return false;
3265
+ }
3266
+ return true;
3267
+ }
3268
+
3269
+ #ifndef ROCKSDB_LITE
3270
+ void DBImpl::BuildCompactionJobInfo(
3271
+ const ColumnFamilyData* cfd, Compaction* c, const Status& st,
3272
+ const CompactionJobStats& compaction_job_stats, const int job_id,
3273
+ const Version* current, CompactionJobInfo* compaction_job_info) const {
3274
+ assert(compaction_job_info != nullptr);
3275
+ compaction_job_info->cf_id = cfd->GetID();
3276
+ compaction_job_info->cf_name = cfd->GetName();
3277
+ compaction_job_info->status = st;
3278
+ compaction_job_info->thread_id = env_->GetThreadID();
3279
+ compaction_job_info->job_id = job_id;
3280
+ compaction_job_info->base_input_level = c->start_level();
3281
+ compaction_job_info->output_level = c->output_level();
3282
+ compaction_job_info->stats = compaction_job_stats;
3283
+ compaction_job_info->table_properties = c->GetOutputTableProperties();
3284
+ compaction_job_info->compaction_reason = c->compaction_reason();
3285
+ compaction_job_info->compression = c->output_compression();
3286
+ for (size_t i = 0; i < c->num_input_levels(); ++i) {
3287
+ for (const auto fmd : *c->inputs(i)) {
3288
+ const FileDescriptor& desc = fmd->fd;
3289
+ const uint64_t file_number = desc.GetNumber();
3290
+ auto fn = TableFileName(c->immutable_cf_options()->cf_paths, file_number,
3291
+ desc.GetPathId());
3292
+ compaction_job_info->input_files.push_back(fn);
3293
+ compaction_job_info->input_file_infos.push_back(CompactionFileInfo{
3294
+ static_cast<int>(i), file_number, fmd->oldest_blob_file_number});
3295
+ if (compaction_job_info->table_properties.count(fn) == 0) {
3296
+ std::shared_ptr<const TableProperties> tp;
3297
+ auto s = current->GetTableProperties(&tp, fmd, &fn);
3298
+ if (s.ok()) {
3299
+ compaction_job_info->table_properties[fn] = tp;
3300
+ }
3301
+ }
3302
+ }
3303
+ }
3304
+ for (const auto& newf : c->edit()->GetNewFiles()) {
3305
+ const FileMetaData& meta = newf.second;
3306
+ const FileDescriptor& desc = meta.fd;
3307
+ const uint64_t file_number = desc.GetNumber();
3308
+ compaction_job_info->output_files.push_back(TableFileName(
3309
+ c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId()));
3310
+ compaction_job_info->output_file_infos.push_back(CompactionFileInfo{
3311
+ newf.first, file_number, meta.oldest_blob_file_number});
3312
+ }
3313
+ }
3314
+ #endif
3315
+
3316
+ // SuperVersionContext gets created and destructed outside of the lock --
3317
+ // we use this conveniently to:
3318
+ // * malloc one SuperVersion() outside of the lock -- new_superversion
3319
+ // * delete SuperVersion()s outside of the lock -- superversions_to_free
3320
+ //
3321
+ // However, if InstallSuperVersionAndScheduleWork() gets called twice with the
3322
+ // same sv_context, we can't reuse the SuperVersion() that got
3323
+ // malloced because
3324
+ // first call already used it. In that rare case, we take a hit and create a
3325
+ // new SuperVersion() inside of the mutex. We do similar thing
3326
+ // for superversion_to_free
3327
+
3328
+ void DBImpl::InstallSuperVersionAndScheduleWork(
3329
+ ColumnFamilyData* cfd, SuperVersionContext* sv_context,
3330
+ const MutableCFOptions& mutable_cf_options) {
3331
+ mutex_.AssertHeld();
3332
+
3333
+ // Update max_total_in_memory_state_
3334
+ size_t old_memtable_size = 0;
3335
+ auto* old_sv = cfd->GetSuperVersion();
3336
+ if (old_sv) {
3337
+ old_memtable_size = old_sv->mutable_cf_options.write_buffer_size *
3338
+ old_sv->mutable_cf_options.max_write_buffer_number;
3339
+ }
3340
+
3341
+ // this branch is unlikely to step in
3342
+ if (UNLIKELY(sv_context->new_superversion == nullptr)) {
3343
+ sv_context->NewSuperVersion();
3344
+ }
3345
+ cfd->InstallSuperVersion(sv_context, &mutex_, mutable_cf_options);
3346
+
3347
+ // There may be a small data race here. The snapshot tricking bottommost
3348
+ // compaction may already be released here. But assuming there will always be
3349
+ // newer snapshot created and released frequently, the compaction will be
3350
+ // triggered soon anyway.
3351
+ bottommost_files_mark_threshold_ = kMaxSequenceNumber;
3352
+ for (auto* my_cfd : *versions_->GetColumnFamilySet()) {
3353
+ bottommost_files_mark_threshold_ = std::min(
3354
+ bottommost_files_mark_threshold_,
3355
+ my_cfd->current()->storage_info()->bottommost_files_mark_threshold());
3356
+ }
3357
+
3358
+ // Whenever we install new SuperVersion, we might need to issue new flushes or
3359
+ // compactions.
3360
+ SchedulePendingCompaction(cfd);
3361
+ MaybeScheduleFlushOrCompaction();
3362
+
3363
+ // Update max_total_in_memory_state_
3364
+ max_total_in_memory_state_ = max_total_in_memory_state_ - old_memtable_size +
3365
+ mutable_cf_options.write_buffer_size *
3366
+ mutable_cf_options.max_write_buffer_number;
3367
+ }
3368
+
3369
+ // ShouldPurge is called by FindObsoleteFiles when doing a full scan,
3370
+ // and db mutex (mutex_) should already be held.
3371
+ // Actually, the current implementation of FindObsoleteFiles with
3372
+ // full_scan=true can issue I/O requests to obtain list of files in
3373
+ // directories, e.g. env_->getChildren while holding db mutex.
3374
+ bool DBImpl::ShouldPurge(uint64_t file_number) const {
3375
+ return files_grabbed_for_purge_.find(file_number) ==
3376
+ files_grabbed_for_purge_.end() &&
3377
+ purge_files_.find(file_number) == purge_files_.end();
3378
+ }
3379
+
3380
+ // MarkAsGrabbedForPurge is called by FindObsoleteFiles, and db mutex
3381
+ // (mutex_) should already be held.
3382
+ void DBImpl::MarkAsGrabbedForPurge(uint64_t file_number) {
3383
+ files_grabbed_for_purge_.insert(file_number);
3384
+ }
3385
+
3386
+ void DBImpl::SetSnapshotChecker(SnapshotChecker* snapshot_checker) {
3387
+ InstrumentedMutexLock l(&mutex_);
3388
+ // snapshot_checker_ should only set once. If we need to set it multiple
3389
+ // times, we need to make sure the old one is not deleted while it is still
3390
+ // using by a compaction job.
3391
+ assert(!snapshot_checker_);
3392
+ snapshot_checker_.reset(snapshot_checker);
3393
+ }
3394
+
3395
+ void DBImpl::GetSnapshotContext(
3396
+ JobContext* job_context, std::vector<SequenceNumber>* snapshot_seqs,
3397
+ SequenceNumber* earliest_write_conflict_snapshot,
3398
+ SnapshotChecker** snapshot_checker_ptr) {
3399
+ mutex_.AssertHeld();
3400
+ assert(job_context != nullptr);
3401
+ assert(snapshot_seqs != nullptr);
3402
+ assert(earliest_write_conflict_snapshot != nullptr);
3403
+ assert(snapshot_checker_ptr != nullptr);
3404
+
3405
+ *snapshot_checker_ptr = snapshot_checker_.get();
3406
+ if (use_custom_gc_ && *snapshot_checker_ptr == nullptr) {
3407
+ *snapshot_checker_ptr = DisableGCSnapshotChecker::Instance();
3408
+ }
3409
+ if (*snapshot_checker_ptr != nullptr) {
3410
+ // If snapshot_checker is used, that means the flush/compaction may
3411
+ // contain values not visible to snapshot taken after
3412
+ // flush/compaction job starts. Take a snapshot and it will appear
3413
+ // in snapshot_seqs and force compaction iterator to consider such
3414
+ // snapshots.
3415
+ const Snapshot* job_snapshot =
3416
+ GetSnapshotImpl(false /*write_conflict_boundary*/, false /*lock*/);
3417
+ job_context->job_snapshot.reset(new ManagedSnapshot(this, job_snapshot));
3418
+ }
3419
+ *snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot);
3420
+ }
3421
+ } // namespace ROCKSDB_NAMESPACE