rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -87,6 +87,9 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
87
87
  mutex_.AssertHeld();
88
88
  assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
89
89
  ColumnFamilyData* cfd = flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
90
+ if (cfd->GetAndClearFlushSkipReschedule()) {
91
+ return false;
92
+ }
90
93
  uint64_t max_memtable_id =
91
94
  flush_req.cfd_to_max_mem_id_to_persist.begin()->second;
92
95
  if (cfd->IsDropped() ||
@@ -98,15 +101,20 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
98
101
  // alleviated if we continue with the flush instead of postponing it.
99
102
  const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
100
103
 
101
- // Taking the status of the active Memtable into consideration so that we are
102
- // not just checking if DB is currently already in write stall mode.
103
- int mem_to_flush = cfd->mem()->ApproximateMemoryUsageFast() >=
104
- cfd->mem()->write_buffer_size() / 2
105
- ? 1
106
- : 0;
104
+ // Use the same criteria as WaitUntilFlushWouldNotStallWrites does w.r.t
105
+ // defining what a write stall is about to happen means. If this uses a
106
+ // stricter criteria, for example, a write stall is about to happen if the
107
+ // last memtable is 10% full, there is a possibility that manual flush could
108
+ // be waiting in `WaitUntilFlushWouldNotStallWrites` with the incorrect
109
+ // expectation that others will clear up the excessive memtables and
110
+ // eventually let it proceed. The others in this case won't start clearing
111
+ // until the last memtable is 10% full. To avoid that scenario, the criteria
112
+ // this uses should be the same or less strict than
113
+ // `WaitUntilFlushWouldNotStallWrites` does.
107
114
  WriteStallCondition write_stall =
108
115
  ColumnFamilyData::GetWriteStallConditionAndCause(
109
- cfd->imm()->NumNotFlushed() + mem_to_flush, /*num_l0_files=*/0,
116
+ cfd->GetUnflushedMemTableCountForWriteStallCheck(),
117
+ /*num_l0_files=*/0,
110
118
  /*num_compaction_needed_bytes=*/0, mutable_cf_options,
111
119
  *cfd->ioptions())
112
120
  .first;
@@ -116,89 +124,19 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
116
124
  return true;
117
125
  }
118
126
 
119
- IOStatus DBImpl::SyncClosedLogs(const WriteOptions& write_options,
127
+ IOStatus DBImpl::SyncClosedWals(const WriteOptions& write_options,
120
128
  JobContext* job_context,
121
129
  VersionEdit* synced_wals,
122
130
  bool error_recovery_in_prog) {
123
- TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
124
- InstrumentedMutexLock l(&log_write_mutex_);
125
- autovector<log::Writer*, 1> logs_to_sync;
126
- uint64_t current_log_number = logfile_number_;
127
- while (logs_.front().number < current_log_number &&
128
- logs_.front().IsSyncing()) {
129
- log_sync_cv_.Wait();
130
- }
131
- for (auto it = logs_.begin();
132
- it != logs_.end() && it->number < current_log_number; ++it) {
133
- auto& log = *it;
134
- log.PrepareForSync();
135
- logs_to_sync.push_back(log.writer);
136
- }
137
-
138
- IOStatus io_s;
139
- if (!logs_to_sync.empty()) {
140
- log_write_mutex_.Unlock();
141
-
142
- assert(job_context);
143
-
144
- for (log::Writer* log : logs_to_sync) {
145
- ROCKS_LOG_INFO(immutable_db_options_.info_log,
146
- "[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
147
- log->get_log_number());
148
- if (error_recovery_in_prog) {
149
- log->file()->reset_seen_error();
150
- }
151
-
152
- IOOptions io_options;
153
- io_s = WritableFileWriter::PrepareIOOptions(write_options, io_options);
154
- if (!io_s.ok()) {
155
- break;
156
- }
157
- io_s = log->file()->Sync(io_options, immutable_db_options_.use_fsync);
158
- if (!io_s.ok()) {
159
- break;
160
- }
161
-
162
- if (immutable_db_options_.recycle_log_file_num > 0) {
163
- if (error_recovery_in_prog) {
164
- log->file()->reset_seen_error();
165
- }
166
- // Normally the log file is closed when purging obsolete file, but if
167
- // log recycling is enabled, the log file is closed here so that it
168
- // can be reused.
169
- io_s = log->Close(write_options);
170
- if (!io_s.ok()) {
171
- break;
172
- }
173
- }
174
- }
175
- if (io_s.ok()) {
176
- IOOptions io_options;
177
- io_s = WritableFileWriter::PrepareIOOptions(write_options, io_options);
178
- if (io_s.ok()) {
179
- io_s = directories_.GetWalDir()->FsyncWithDirOptions(
180
- io_options, nullptr,
181
- DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
182
- }
183
- }
184
-
185
- TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock",
186
- /*arg=*/nullptr);
187
- log_write_mutex_.Lock();
131
+ TEST_SYNC_POINT("DBImpl::SyncClosedWals:Start");
188
132
 
189
- // "number <= current_log_number - 1" is equivalent to
190
- // "number < current_log_number".
191
- if (io_s.ok()) {
192
- MarkLogsSynced(current_log_number - 1, true, synced_wals);
193
- } else {
194
- MarkLogsNotSynced(current_log_number - 1);
195
- }
196
- if (!io_s.ok()) {
197
- TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
198
- return io_s;
199
- }
133
+ IOStatus io_s = SyncWalImpl(/*include_current_wal*/ false, write_options,
134
+ job_context, synced_wals, error_recovery_in_prog);
135
+ if (!io_s.ok()) {
136
+ TEST_SYNC_POINT("DBImpl::SyncClosedWals:Failed");
137
+ } else {
138
+ TEST_SYNC_POINT("DBImpl::SyncClosedWals:end");
200
139
  }
201
- TEST_SYNC_POINT("DBImpl::SyncClosedLogs:end");
202
140
  return io_s;
203
141
  }
204
142
 
@@ -224,19 +162,25 @@ Status DBImpl::FlushMemTableToOutputFile(
224
162
  // the host crashes after flushing and before WAL is persistent, the
225
163
  // flushed SST may contain data from write batches whose updates to
226
164
  // other (unflushed) column families are missing.
165
+ //
166
+ // When 2PC is enabled, non-recent WAL(s) may be needed for crash-recovery,
167
+ // even when there is only one CF in the DB, for prepared transactions that
168
+ // had not been committed yet. Make sure we sync them to keep the persisted
169
+ // WAL state at least as new as the persisted SST state.
227
170
  const bool needs_to_sync_closed_wals =
228
171
  logfile_number_ > 0 &&
229
- versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1;
172
+ (versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1 ||
173
+ allow_2pc());
230
174
 
231
175
  // If needs_to_sync_closed_wals is true, we need to record the current
232
176
  // maximum memtable ID of this column family so that a later PickMemtables()
233
177
  // call will not pick memtables whose IDs are higher. This is due to the fact
234
- // that SyncClosedLogs() may release the db mutex, and memtable switch can
178
+ // that SyncClosedWals() may release the db mutex, and memtable switch can
235
179
  // happen for this column family in the meantime. The newly created memtables
236
180
  // have their data backed by unsynced WALs, thus they cannot be included in
237
181
  // this flush job.
238
182
  // Another reason why we must record the current maximum memtable ID of this
239
- // column family: SyncClosedLogs() may release db mutex, thus it's possible
183
+ // column family: SyncClosedWals() may release db mutex, thus it's possible
240
184
  // for application to continue to insert into memtables increasing db's
241
185
  // sequence number. The application may take a snapshot, but this snapshot is
242
186
  // not included in `snapshot_seqs` which will be passed to flush job because
@@ -250,7 +194,7 @@ Status DBImpl::FlushMemTableToOutputFile(
250
194
 
251
195
  // If needs_to_sync_closed_wals is false, then the flush job will pick ALL
252
196
  // existing memtables of the column family when PickMemTable() is called
253
- // later. Although we won't call SyncClosedLogs() in this case, we may still
197
+ // later. Although we won't call SyncClosedWals() in this case, we may still
254
198
  // call the callbacks of the listeners, i.e. NotifyOnFlushBegin() which also
255
199
  // releases and re-acquires the db mutex. In the meantime, the application
256
200
  // can still insert into the memtables and increase the db's sequence number.
@@ -280,12 +224,12 @@ Status DBImpl::FlushMemTableToOutputFile(
280
224
  bool need_cancel = false;
281
225
  IOStatus log_io_s = IOStatus::OK();
282
226
  if (needs_to_sync_closed_wals) {
283
- // SyncClosedLogs() may unlock and re-lock the log_write_mutex multiple
227
+ // SyncClosedWals() may unlock and re-lock the log_write_mutex multiple
284
228
  // times.
285
229
  VersionEdit synced_wals;
286
230
  bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
287
231
  mutex_.Unlock();
288
- log_io_s = SyncClosedLogs(write_options, job_context, &synced_wals,
232
+ log_io_s = SyncClosedWals(write_options, job_context, &synced_wals,
289
233
  error_recovery_in_prog);
290
234
  mutex_.Lock();
291
235
  if (log_io_s.ok() && synced_wals.IsWalAddition()) {
@@ -300,7 +244,7 @@ Status DBImpl::FlushMemTableToOutputFile(
300
244
  error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
301
245
  }
302
246
  } else {
303
- TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip");
247
+ TEST_SYNC_POINT("DBImpl::SyncClosedWals:Skip");
304
248
  }
305
249
  s = log_io_s;
306
250
 
@@ -574,7 +518,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
574
518
  VersionEdit synced_wals;
575
519
  bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
576
520
  mutex_.Unlock();
577
- log_io_s = SyncClosedLogs(write_options, job_context, &synced_wals,
521
+ log_io_s = SyncClosedWals(write_options, job_context, &synced_wals,
578
522
  error_recovery_in_prog);
579
523
  mutex_.Lock();
580
524
  if (log_io_s.ok() && synced_wals.IsWalAddition()) {
@@ -1088,7 +1032,12 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
1088
1032
  assert(ucmp->timestamp_size() == ts_low.size() && !ts_low.empty());
1089
1033
  if (!current_ts_low.empty() &&
1090
1034
  ucmp->CompareTimestamp(ts_low, current_ts_low) < 0) {
1091
- return Status::InvalidArgument("Cannot decrease full_history_ts_low");
1035
+ std::stringstream oss;
1036
+ oss << "Current full_history_ts_low: "
1037
+ << ucmp->TimestampToString(current_ts_low)
1038
+ << " is higher than provided ts: " << ucmp->TimestampToString(ts_low)
1039
+ << std::endl;
1040
+ return Status::InvalidArgument(oss.str());
1092
1041
  }
1093
1042
 
1094
1043
  Status s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
@@ -1120,6 +1069,11 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1120
1069
  if (options.target_path_id >= cfd->ioptions()->cf_paths.size()) {
1121
1070
  return Status::InvalidArgument("Invalid target path ID");
1122
1071
  }
1072
+ if (options.change_level &&
1073
+ cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
1074
+ return Status::NotSupported(
1075
+ "FIFO compaction does not support change_level.");
1076
+ }
1123
1077
 
1124
1078
  bool flush_needed = true;
1125
1079
 
@@ -1180,6 +1134,16 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1180
1134
  final_output_level, options, begin, end, exclusive,
1181
1135
  false /* disable_trivial_move */,
1182
1136
  std::numeric_limits<uint64_t>::max(), trim_ts);
1137
+ } else if (cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
1138
+ // FIFOCompactionPicker::CompactRange() will ignore the input key range
1139
+ // [begin, end] and just try to pick compaction based on the configured
1140
+ // option `compaction_options_fifo`. So we skip checking if [begin, end]
1141
+ // overlaps with the DB here.
1142
+ final_output_level = 0;
1143
+ s = RunManualCompaction(cfd, /*input_level=*/0, final_output_level, options,
1144
+ begin, end, exclusive,
1145
+ false /* disable_trivial_move */,
1146
+ std::numeric_limits<uint64_t>::max(), trim_ts);
1183
1147
  } else {
1184
1148
  int first_overlapped_level = kInvalidLevel;
1185
1149
  {
@@ -1264,8 +1228,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
1264
1228
  CleanupSuperVersion(super_version);
1265
1229
  }
1266
1230
  if (s.ok() && first_overlapped_level != kInvalidLevel) {
1267
- if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
1268
- cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
1231
+ if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal) {
1269
1232
  assert(first_overlapped_level == 0);
1270
1233
  s = RunManualCompaction(
1271
1234
  cfd, first_overlapped_level, first_overlapped_level, options, begin,
@@ -1420,6 +1383,14 @@ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
1420
1383
  LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
1421
1384
  immutable_db_options_.info_log.get());
1422
1385
 
1386
+ if (compact_options.compression !=
1387
+ CompressionType::kDisableCompressionOption) {
1388
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
1389
+ "[%s] [JOB %d] Found use of deprecated option "
1390
+ "`CompactionOptions::compression`",
1391
+ cfd->GetName().c_str(), job_context.job_id);
1392
+ }
1393
+
1423
1394
  // Perform CompactFiles
1424
1395
  TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile2");
1425
1396
  TEST_SYNC_POINT_CALLBACK("TestCompactFiles:PausingManualCompaction:3",
@@ -1509,16 +1480,11 @@ Status DBImpl::CompactFilesImpl(
1509
1480
  std::to_string(cfd->ioptions()->num_levels - 1));
1510
1481
  }
1511
1482
 
1512
- Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
1513
- &input_set, cf_meta, output_level);
1514
- TEST_SYNC_POINT("DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles");
1515
- if (!s.ok()) {
1516
- return s;
1517
- }
1518
-
1519
1483
  std::vector<CompactionInputFiles> input_files;
1520
- s = cfd->compaction_picker()->GetCompactionInputsFromFileNumbers(
1521
- &input_files, &input_set, version->storage_info(), compact_options);
1484
+ Status s = cfd->compaction_picker()->SanitizeAndConvertCompactionInputFiles(
1485
+ &input_set, cf_meta, output_level, version->storage_info(), &input_files);
1486
+ TEST_SYNC_POINT(
1487
+ "DBImpl::CompactFilesImpl::PostSanitizeAndConvertCompactionInputFiles");
1522
1488
  if (!s.ok()) {
1523
1489
  return s;
1524
1490
  }
@@ -1629,10 +1595,12 @@ Status DBImpl::CompactFilesImpl(
1629
1595
 
1630
1596
  ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
1631
1597
 
1598
+ mutex_.Unlock();
1632
1599
  if (compaction_job_info != nullptr) {
1633
1600
  BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats,
1634
1601
  job_context->job_id, compaction_job_info);
1635
1602
  }
1603
+ mutex_.Lock();
1636
1604
 
1637
1605
  if (status.ok()) {
1638
1606
  // Done
@@ -1774,6 +1742,8 @@ void DBImpl::NotifyOnCompactionCompleted(
1774
1742
 
1775
1743
  // REQUIREMENT: block all background work by calling PauseBackgroundWork()
1776
1744
  // before calling this function
1745
+ // TODO (hx235): Replace Status::NotSupported() with Status::Aborted() for
1746
+ // better semantics like CompactFiles()
1777
1747
  Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1778
1748
  assert(level < cfd->NumberLevels());
1779
1749
  if (target_level >= cfd->NumberLevels()) {
@@ -1809,6 +1779,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1809
1779
  if (to_level != level) {
1810
1780
  std::vector<CompactionInputFiles> input(1);
1811
1781
  input[0].level = level;
1782
+ // TODO (hx235): Only refit the output files in the current manual
1783
+ // compaction instead of all the files in the output level
1812
1784
  for (auto& f : vstorage->LevelFiles(level)) {
1813
1785
  input[0].files.push_back(f);
1814
1786
  }
@@ -1840,6 +1812,12 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1840
1812
  }
1841
1813
  } else {
1842
1814
  // to_level < level
1815
+ if (to_level == 0 && input[0].files.size() > 1) {
1816
+ refitting_level_ = false;
1817
+ return Status::Aborted(
1818
+ "Moving more than 1 file from non-L0 to L0 is not allowed as it "
1819
+ "does not bring any benefit to read nor write throughput.");
1820
+ }
1843
1821
  // Check levels are empty for a trivial move
1844
1822
  for (int l = to_level; l < level; l++) {
1845
1823
  if (vstorage->NumLevelFiles(l) > 0) {
@@ -2154,16 +2132,6 @@ Status DBImpl::RunManualCompaction(
2154
2132
  manual.begin, manual.end, &manual.manual_end, &manual_conflict,
2155
2133
  max_file_num_to_ignore, trim_ts)) == nullptr &&
2156
2134
  manual_conflict))) {
2157
- if (!scheduled) {
2158
- // There is a conflicting compaction
2159
- if (manual_compaction_paused_ > 0 || manual.canceled == true) {
2160
- // Stop waiting since it was canceled. Pretend the error came from
2161
- // compaction so the below cleanup/error handling code can process it.
2162
- manual.done = true;
2163
- manual.status =
2164
- Status::Incomplete(Status::SubCode::kManualCompactionPaused);
2165
- }
2166
- }
2167
2135
  if (!manual.done) {
2168
2136
  bg_cv_.Wait();
2169
2137
  }
@@ -2238,6 +2206,17 @@ Status DBImpl::RunManualCompaction(
2238
2206
  *final_output_level = compaction->output_level();
2239
2207
  }
2240
2208
  }
2209
+ if (!scheduled) {
2210
+ // There is nothing scheduled to wait on, so any cancellation can end the
2211
+ // manual now.
2212
+ if (manual_compaction_paused_ > 0 || manual.canceled == true) {
2213
+ // Stop waiting since it was canceled. Pretend the error came from
2214
+ // compaction so the below cleanup/error handling code can process it.
2215
+ manual.done = true;
2216
+ manual.status =
2217
+ Status::Incomplete(Status::SubCode::kManualCompactionPaused);
2218
+ }
2219
+ }
2241
2220
  }
2242
2221
 
2243
2222
  log_buffer.FlushBufferToLog();
@@ -2271,6 +2250,23 @@ void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
2271
2250
  }
2272
2251
  }
2273
2252
 
2253
+ void DBImpl::NotifyOnManualFlushScheduled(autovector<ColumnFamilyData*> cfds,
2254
+ FlushReason flush_reason) {
2255
+ if (immutable_db_options_.listeners.size() == 0U) {
2256
+ return;
2257
+ }
2258
+ if (shutting_down_.load(std::memory_order_acquire)) {
2259
+ return;
2260
+ }
2261
+ std::vector<ManualFlushInfo> info;
2262
+ for (ColumnFamilyData* cfd : cfds) {
2263
+ info.push_back({cfd->GetID(), cfd->GetName(), flush_reason});
2264
+ }
2265
+ for (const auto& listener : immutable_db_options_.listeners) {
2266
+ listener->OnManualFlushScheduled(this, info);
2267
+ }
2268
+ }
2269
+
2274
2270
  Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2275
2271
  const FlushOptions& flush_options,
2276
2272
  FlushReason flush_reason,
@@ -2310,13 +2306,15 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2310
2306
  }
2311
2307
  WaitForPendingWrites();
2312
2308
 
2313
- if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) {
2309
+ if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load() ||
2310
+ IsRecoveryFlush(flush_reason)) {
2314
2311
  s = SwitchMemtable(cfd, &context);
2315
2312
  }
2316
2313
  const uint64_t flush_memtable_id = std::numeric_limits<uint64_t>::max();
2317
2314
  if (s.ok()) {
2318
2315
  if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
2319
- !cached_recoverable_state_empty_.load()) {
2316
+ !cached_recoverable_state_empty_.load() ||
2317
+ IsRecoveryFlush(flush_reason)) {
2320
2318
  FlushRequest req{flush_reason, {{cfd, flush_memtable_id}}};
2321
2319
  flush_reqs.emplace_back(std::move(req));
2322
2320
  memtable_ids_to_wait.emplace_back(
@@ -2375,7 +2373,14 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2375
2373
  }
2376
2374
  }
2377
2375
  for (const auto& req : flush_reqs) {
2378
- SchedulePendingFlush(req);
2376
+ assert(req.cfd_to_max_mem_id_to_persist.size() == 1);
2377
+ ColumnFamilyData* loop_cfd =
2378
+ req.cfd_to_max_mem_id_to_persist.begin()->first;
2379
+ bool already_queued_for_flush = loop_cfd->queued_for_flush();
2380
+ bool flush_req_enqueued = SchedulePendingFlush(req);
2381
+ if (already_queued_for_flush || flush_req_enqueued) {
2382
+ loop_cfd->SetFlushSkipReschedule();
2383
+ }
2379
2384
  }
2380
2385
  MaybeScheduleFlushOrCompaction();
2381
2386
  }
@@ -2387,6 +2392,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
2387
2392
  }
2388
2393
  }
2389
2394
  }
2395
+
2396
+ NotifyOnManualFlushScheduled({cfd}, flush_reason);
2390
2397
  TEST_SYNC_POINT("DBImpl::FlushMemTable:AfterScheduleFlush");
2391
2398
  TEST_SYNC_POINT("DBImpl::FlushMemTable:BeforeWaitForBgFlush");
2392
2399
  if (s.ok() && flush_options.wait) {
@@ -2484,7 +2491,7 @@ Status DBImpl::AtomicFlushMemTables(
2484
2491
  }
2485
2492
  WaitForPendingWrites();
2486
2493
 
2487
- SelectColumnFamiliesForAtomicFlush(&cfds, candidate_cfds);
2494
+ SelectColumnFamiliesForAtomicFlush(&cfds, candidate_cfds, flush_reason);
2488
2495
 
2489
2496
  // Unref the newly generated candidate cfds (when not provided) in
2490
2497
  // `candidate_cfds`
@@ -2495,7 +2502,8 @@ Status DBImpl::AtomicFlushMemTables(
2495
2502
  }
2496
2503
 
2497
2504
  for (auto cfd : cfds) {
2498
- if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) {
2505
+ if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load() &&
2506
+ !IsRecoveryFlush(flush_reason)) {
2499
2507
  continue;
2500
2508
  }
2501
2509
  cfd->Ref();
@@ -2531,6 +2539,7 @@ Status DBImpl::AtomicFlushMemTables(
2531
2539
  }
2532
2540
  }
2533
2541
  }
2542
+ NotifyOnManualFlushScheduled(cfds, flush_reason);
2534
2543
  TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:AfterScheduleFlush");
2535
2544
  TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush");
2536
2545
  if (s.ok() && flush_options.wait) {
@@ -2588,7 +2597,9 @@ Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
2588
2597
  flush_reason,
2589
2598
  {{cfd,
2590
2599
  std::numeric_limits<uint64_t>::max() /* max_mem_id_to_persist */}}};
2591
- SchedulePendingFlush(flush_req);
2600
+ if (SchedulePendingFlush(flush_req)) {
2601
+ cfd->SetFlushSkipReschedule();
2602
+ };
2592
2603
  }
2593
2604
  }
2594
2605
  MaybeScheduleFlushOrCompaction();
@@ -2676,13 +2687,13 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
2676
2687
  // mode due to pending compaction bytes, but that's less common
2677
2688
  // No extra immutable Memtable will be created if the current Memtable is
2678
2689
  // empty.
2679
- int mem_to_flush = cfd->mem()->IsEmpty() ? 0 : 1;
2680
- write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
2681
- cfd->imm()->NumNotFlushed() + mem_to_flush,
2682
- vstorage->l0_delay_trigger_count() + 1,
2683
- vstorage->estimated_compaction_needed_bytes(),
2684
- mutable_cf_options, *cfd->ioptions())
2685
- .first;
2690
+ write_stall_condition =
2691
+ ColumnFamilyData::GetWriteStallConditionAndCause(
2692
+ cfd->GetUnflushedMemTableCountForWriteStallCheck(),
2693
+ vstorage->l0_delay_trigger_count() + 1,
2694
+ vstorage->estimated_compaction_needed_bytes(), mutable_cf_options,
2695
+ *cfd->ioptions())
2696
+ .first;
2686
2697
  } while (write_stall_condition != WriteStallCondition::kNormal);
2687
2698
  }
2688
2699
  return Status::OK();
@@ -2994,13 +3005,14 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
2994
3005
  return cfd;
2995
3006
  }
2996
3007
 
2997
- void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
3008
+ bool DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
2998
3009
  mutex_.AssertHeld();
3010
+ bool enqueued = false;
2999
3011
  if (reject_new_background_jobs_) {
3000
- return;
3012
+ return enqueued;
3001
3013
  }
3002
3014
  if (flush_req.cfd_to_max_mem_id_to_persist.empty()) {
3003
- return;
3015
+ return enqueued;
3004
3016
  }
3005
3017
  if (!immutable_db_options_.atomic_flush) {
3006
3018
  // For the non-atomic flush case, we never schedule multiple column
@@ -3015,6 +3027,7 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
3015
3027
  cfd->set_queued_for_flush(true);
3016
3028
  ++unscheduled_flushes_;
3017
3029
  flush_queue_.push_back(flush_req);
3030
+ enqueued = true;
3018
3031
  }
3019
3032
  } else {
3020
3033
  for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
@@ -3023,7 +3036,9 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
3023
3036
  }
3024
3037
  ++unscheduled_flushes_;
3025
3038
  flush_queue_.push_back(flush_req);
3039
+ enqueued = true;
3026
3040
  }
3041
+ return enqueued;
3027
3042
  }
3028
3043
 
3029
3044
  void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
@@ -3032,6 +3047,8 @@ void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
3032
3047
  return;
3033
3048
  }
3034
3049
  if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) {
3050
+ TEST_SYNC_POINT_CALLBACK("SchedulePendingCompaction::cfd",
3051
+ static_cast<void*>(cfd));
3035
3052
  AddToCompactionQueue(cfd);
3036
3053
  ++unscheduled_compactions_;
3037
3054
  }
@@ -3319,7 +3336,7 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
3319
3336
  bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
3320
3337
  mutex_.Unlock();
3321
3338
  ROCKS_LOG_ERROR(immutable_db_options_.info_log,
3322
- "[JOB %d] Waiting after background flush error: %s"
3339
+ "[JOB %d] Waiting after background flush error: %s, "
3323
3340
  "Accumulated background error counts: %" PRIu64,
3324
3341
  job_context.job_id, s.ToString().c_str(), error_cnt);
3325
3342
  log_buffer.FlushBufferToLog();
@@ -4149,7 +4166,7 @@ void DBImpl::BuildCompactionJobInfo(
4149
4166
  compaction_job_info->base_input_level = c->start_level();
4150
4167
  compaction_job_info->output_level = c->output_level();
4151
4168
  compaction_job_info->stats = compaction_job_stats;
4152
- const auto& input_table_properties = c->GetInputTableProperties();
4169
+ const auto& input_table_properties = c->GetOrInitInputTableProperties();
4153
4170
  const auto& output_table_properties = c->GetOutputTableProperties();
4154
4171
  compaction_job_info->table_properties.insert(input_table_properties.begin(),
4155
4172
  input_table_properties.end());
@@ -4344,6 +4361,7 @@ Status DBImpl::WaitForCompact(
4344
4361
  }
4345
4362
  if ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
4346
4363
  bg_flush_scheduled_ || unscheduled_compactions_ ||
4364
+ (wait_for_compact_options.wait_for_purge && bg_purge_scheduled_) ||
4347
4365
  unscheduled_flushes_ || error_handler_.IsRecoveryInProgress()) &&
4348
4366
  (error_handler_.GetBGError().ok())) {
4349
4367
  if (wait_for_compact_options.timeout.count()) {
@@ -4351,6 +4369,7 @@ Status DBImpl::WaitForCompact(
4351
4369
  return Status::TimedOut();
4352
4370
  }
4353
4371
  } else {
4372
+ TEST_SYNC_POINT("DBImpl::WaitForCompact:InsideLoop");
4354
4373
  bg_cv_.Wait();
4355
4374
  }
4356
4375
  } else if (wait_for_compact_options.close_db) {
@@ -314,6 +314,11 @@ const autovector<uint64_t>& DBImpl::TEST_GetFilesToQuarantine() const {
314
314
  return error_handler_.GetFilesToQuarantine();
315
315
  }
316
316
 
317
+ void DBImpl::TEST_DeleteObsoleteFiles() {
318
+ InstrumentedMutexLock l(&mutex_);
319
+ DeleteObsoleteFiles();
320
+ }
321
+
317
322
  size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
318
323
  InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
319
324
  return EstimateInMemoryStatsHistorySize();