rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -0,0 +1,139 @@
1
+ // Copyright (c) 2024-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #pragma once
7
+ #include <string>
8
+
9
+ #include "rocksdb/file_system.h"
10
+
11
+ namespace ROCKSDB_NAMESPACE {
12
+ // A FileSystem that links files to a local (destination) directory from a
13
+ // corresponding remote (source) directory on demand. The decision to link
14
+ // depends on the file type, with appendable or rename-able files, such as,
15
+ // descriptors, logs, CURRENT, being read in place in the remote directory,
16
+ // and SST files being linked. In the future, files read in place may be
17
+ // mirrored to the local directory, so the local dir has a complete database
18
+ // for troubleshooting purposes.
19
+
20
+ class OnDemandFileSystem : public FileSystemWrapper {
21
+ public:
22
+ OnDemandFileSystem(const std::shared_ptr<FileSystem>& target,
23
+ const std::string& remote_path,
24
+ const std::string& local_path)
25
+ : FileSystemWrapper(target),
26
+ remote_path_(remote_path),
27
+ local_path_(local_path) {}
28
+
29
+ const char* Name() const override { return "OnDemandFileSystem"; }
30
+
31
+ IOStatus NewSequentialFile(const std::string& fname,
32
+ const FileOptions& file_opts,
33
+ std::unique_ptr<FSSequentialFile>* result,
34
+ IODebugContext* dbg) override;
35
+
36
+ IOStatus NewRandomAccessFile(const std::string& fname,
37
+ const FileOptions& file_opts,
38
+ std::unique_ptr<FSRandomAccessFile>* result,
39
+ IODebugContext* dbg) override;
40
+
41
+ IOStatus NewWritableFile(const std::string& fname,
42
+ const FileOptions& file_opts,
43
+ std::unique_ptr<FSWritableFile>* result,
44
+ IODebugContext* dbg) override;
45
+
46
+ IOStatus ReuseWritableFile(const std::string& /*fname*/,
47
+ const std::string& /*old_fname*/,
48
+ const FileOptions& /*fopts*/,
49
+ std::unique_ptr<FSWritableFile>* /*result*/,
50
+ IODebugContext* /*dbg*/) override {
51
+ return IOStatus::NotSupported("ReuseWritableFile");
52
+ }
53
+
54
+ IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts,
55
+ std::unique_ptr<FSDirectory>* result,
56
+ IODebugContext* dbg) override;
57
+
58
+ IOStatus FileExists(const std::string& fname, const IOOptions& options,
59
+ IODebugContext* dbg) override;
60
+
61
+ IOStatus GetChildren(const std::string& dir, const IOOptions& options,
62
+ std::vector<std::string>* result,
63
+ IODebugContext* dbg) override;
64
+
65
+ IOStatus GetChildrenFileAttributes(const std::string& dir,
66
+ const IOOptions& options,
67
+ std::vector<FileAttributes>* result,
68
+ IODebugContext* dbg) override;
69
+
70
+ IOStatus GetFileSize(const std::string& fname, const IOOptions& options,
71
+ uint64_t* file_size, IODebugContext* dbg) override;
72
+
73
+ private:
74
+ bool CheckPathAndAdjust(const std::string& orig, const std::string& replace,
75
+ std::string& path);
76
+ bool LookupFileType(const std::string& name, FileType* type);
77
+
78
+ const std::string remote_path_;
79
+ const std::string local_path_;
80
+ };
81
+
82
+ // A wrapper class around an FSSequentialFile object. Its mainly
83
+ // intended to be used for appendable files like MANIFEST and logs.
84
+ // Beneath the covers, it tracks when EOF is reached, and reopens
85
+ // the file in order to read the latest appended data. This is
86
+ // necessary on some distributed file systems as they may have
87
+ // stale metadata about the file.
88
+ // TODO: Mirror the data read to a local file for troubleshooting
89
+ // purposes, as well as recovery in case the source dir is
90
+ // deleted.
91
+ class OnDemandSequentialFile : public FSSequentialFile {
92
+ public:
93
+ OnDemandSequentialFile(std::unique_ptr<FSSequentialFile>&& file,
94
+ OnDemandFileSystem* fs, const FileOptions& file_opts,
95
+ const std::string& path)
96
+ : file_(std::move(file)),
97
+ fs_(fs),
98
+ file_opts_(file_opts),
99
+ path_(path),
100
+ eof_(false),
101
+ offset_(0) {}
102
+
103
+ virtual ~OnDemandSequentialFile() {}
104
+
105
+ IOStatus Read(size_t n, const IOOptions& options, Slice* result,
106
+ char* scratch, IODebugContext* dbg) override;
107
+
108
+ IOStatus Skip(uint64_t n) override;
109
+
110
+ bool use_direct_io() const override;
111
+
112
+ size_t GetRequiredBufferAlignment() const override;
113
+
114
+ IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override {
115
+ return IOStatus::NotSupported("InvalidateCache not supported.");
116
+ }
117
+
118
+ IOStatus PositionedRead(uint64_t /*offset*/, size_t /*n*/,
119
+ const IOOptions& /*options*/, Slice* /*result*/,
120
+ char* /*scratch*/, IODebugContext* /*dbg*/) override {
121
+ return IOStatus::NotSupported("PositionedRead");
122
+ }
123
+
124
+ Temperature GetTemperature() const override;
125
+
126
+ private:
127
+ std::unique_ptr<FSSequentialFile> file_;
128
+ OnDemandFileSystem* fs_;
129
+ const FileOptions file_opts_;
130
+ const std::string path_;
131
+ bool eof_;
132
+ uint64_t offset_;
133
+ };
134
+
135
+ std::shared_ptr<FileSystem> NewOnDemandFileSystem(
136
+ const std::shared_ptr<FileSystem>& fs, std::string remote_path,
137
+ std::string local_path);
138
+
139
+ } // namespace ROCKSDB_NAMESPACE
@@ -28,7 +28,7 @@
28
28
  #include <cstdio>
29
29
  #include <cstdlib>
30
30
  #include <cstring>
31
- #ifdef OS_LINUX
31
+ #if defined(OS_LINUX) || defined(OS_ANDROID)
32
32
  #include <sys/statfs.h>
33
33
  #include <sys/sysmacros.h>
34
34
  #endif
@@ -457,7 +457,6 @@ Status PosixHelper::GetLogicalBlockSizeOfDirectory(const std::string& directory,
457
457
  size_t* size) {
458
458
  int fd = open(directory.c_str(), O_DIRECTORY | O_RDONLY);
459
459
  if (fd == -1) {
460
- close(fd);
461
460
  return Status::IOError("Cannot open directory " + directory);
462
461
  }
463
462
  *size = PosixHelper::GetLogicalBlockSizeOfFd(fd);
@@ -1377,9 +1376,10 @@ IOStatus PosixWritableFile::Close(const IOOptions& /*opts*/,
1377
1376
  // After ftruncate, we check whether ftruncate has the correct behavior.
1378
1377
  // If not, we should hack it with FALLOC_FL_PUNCH_HOLE
1379
1378
  if (result == 0 &&
1380
- (file_stats.st_size + file_stats.st_blksize - 1) /
1381
- file_stats.st_blksize !=
1382
- file_stats.st_blocks / (file_stats.st_blksize / 512)) {
1379
+ static_cast<size_t>((file_stats.st_size + file_stats.st_blksize - 1) /
1380
+ file_stats.st_blksize) !=
1381
+ static_cast<size_t>(file_stats.st_blocks /
1382
+ (file_stats.st_blksize / 512))) {
1383
1383
  IOSTATS_TIMER_GUARD(allocate_nanos);
1384
1384
  if (allow_fallocate_) {
1385
1385
  fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, filesize_,
@@ -1441,10 +1441,12 @@ void PosixWritableFile::SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) {
1441
1441
  #ifdef OS_LINUX
1442
1442
  // Suppress Valgrind "Unimplemented functionality" error.
1443
1443
  #ifndef ROCKSDB_VALGRIND_RUN
1444
+ uint64_t fcntl_hint = hint;
1445
+
1444
1446
  if (hint == write_hint_) {
1445
1447
  return;
1446
1448
  }
1447
- if (fcntl(fd_, F_SET_RW_HINT, &hint) == 0) {
1449
+ if (fcntl(fd_, F_SET_RW_HINT, &fcntl_hint) == 0) {
1448
1450
  write_hint_ = hint;
1449
1451
  }
1450
1452
  #else
@@ -30,7 +30,7 @@
30
30
  // For non linux platform, the following macros are used only as place
31
31
  // holder.
32
32
  #if !(defined OS_LINUX) && !(defined OS_FREEBSD) && !(defined CYGWIN) && \
33
- !(defined OS_AIX)
33
+ !(defined OS_AIX) && !(defined OS_ANDROID)
34
34
  #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */
35
35
  #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */
36
36
  #define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
@@ -31,6 +31,7 @@ DeleteScheduler::DeleteScheduler(SystemClock* clock, FileSystem* fs,
31
31
  total_trash_size_(0),
32
32
  rate_bytes_per_sec_(rate_bytes_per_sec),
33
33
  pending_files_(0),
34
+ next_trash_bucket_(0),
34
35
  bytes_max_delete_chunk_(bytes_max_delete_chunk),
35
36
  closing_(false),
36
37
  cv_(&mu_),
@@ -60,30 +61,72 @@ DeleteScheduler::~DeleteScheduler() {
60
61
  Status DeleteScheduler::DeleteFile(const std::string& file_path,
61
62
  const std::string& dir_to_sync,
62
63
  const bool force_bg) {
64
+ uint64_t total_size = sst_file_manager_->GetTotalSize();
63
65
  if (rate_bytes_per_sec_.load() <= 0 ||
64
66
  (!force_bg &&
65
- total_trash_size_.load() >
66
- sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) {
67
+ total_trash_size_.load() > total_size * max_trash_db_ratio_.load())) {
67
68
  // Rate limiting is disabled or trash size makes up more than
68
69
  // max_trash_db_ratio_ (default 25%) of the total DB size
69
- TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
70
- Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
70
+ Status s = DeleteFileImmediately(file_path, /*accounted=*/true);
71
71
  if (s.ok()) {
72
- s = sst_file_manager_->OnDeleteFile(file_path);
73
72
  ROCKS_LOG_INFO(info_log_,
74
73
  "Deleted file %s immediately, rate_bytes_per_sec %" PRIi64
75
- ", total_trash_size %" PRIu64 " max_trash_db_ratio %lf",
74
+ ", total_trash_size %" PRIu64 ", total_size %" PRIi64
75
+ ", max_trash_db_ratio %lf",
76
76
  file_path.c_str(), rate_bytes_per_sec_.load(),
77
- total_trash_size_.load(), max_trash_db_ratio_.load());
78
- InstrumentedMutexLock l(&mu_);
79
- RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
77
+ total_trash_size_.load(), total_size,
78
+ max_trash_db_ratio_.load());
79
+ }
80
+ return s;
81
+ }
82
+ return AddFileToDeletionQueue(file_path, dir_to_sync, /*bucket=*/std::nullopt,
83
+ /*accounted=*/true);
84
+ }
85
+
86
+ Status DeleteScheduler::DeleteUnaccountedFile(const std::string& file_path,
87
+ const std::string& dir_to_sync,
88
+ const bool force_bg,
89
+ std::optional<int32_t> bucket) {
90
+ uint64_t num_hard_links = 1;
91
+ fs_->NumFileLinks(file_path, IOOptions(), &num_hard_links, nullptr)
92
+ .PermitUncheckedError();
93
+
94
+ // We can tolerate rare races where we might immediately delete both links
95
+ // to a file.
96
+ if (rate_bytes_per_sec_.load() <= 0 || (!force_bg && num_hard_links > 1)) {
97
+ Status s = DeleteFileImmediately(file_path, /*accounted=*/false);
98
+ if (s.ok()) {
99
+ ROCKS_LOG_INFO(info_log_,
100
+ "Deleted file %s immediately, rate_bytes_per_sec %" PRIi64,
101
+ file_path.c_str(), rate_bytes_per_sec_.load());
80
102
  }
81
103
  return s;
82
104
  }
105
+ return AddFileToDeletionQueue(file_path, dir_to_sync, bucket,
106
+ /*accounted=*/false);
107
+ }
83
108
 
109
+ Status DeleteScheduler::DeleteFileImmediately(const std::string& file_path,
110
+ bool accounted) {
111
+ TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
112
+ TEST_SYNC_POINT_CALLBACK("DeleteScheduler::DeleteFile::cb",
113
+ const_cast<std::string*>(&file_path));
114
+ Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
115
+ if (s.ok()) {
116
+ s = OnDeleteFile(file_path, accounted);
117
+ InstrumentedMutexLock l(&mu_);
118
+ RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
119
+ }
120
+ return s;
121
+ }
122
+
123
+ Status DeleteScheduler::AddFileToDeletionQueue(const std::string& file_path,
124
+ const std::string& dir_to_sync,
125
+ std::optional<int32_t> bucket,
126
+ bool accounted) {
84
127
  // Move file to trash
85
128
  std::string trash_file;
86
- Status s = MarkAsTrash(file_path, &trash_file);
129
+ Status s = MarkAsTrash(file_path, accounted, &trash_file);
87
130
  ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(),
88
131
  s.ToString().c_str());
89
132
 
@@ -92,7 +135,7 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
92
135
  file_path.c_str(), s.ToString().c_str());
93
136
  s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
94
137
  if (s.ok()) {
95
- s = sst_file_manager_->OnDeleteFile(file_path);
138
+ s = OnDeleteFile(file_path, accounted);
96
139
  ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately",
97
140
  trash_file.c_str());
98
141
  InstrumentedMutexLock l(&mu_);
@@ -102,11 +145,13 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
102
145
  }
103
146
 
104
147
  // Update the total trash size
105
- uint64_t trash_file_size = 0;
106
- IOStatus io_s =
107
- fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
108
- if (io_s.ok()) {
109
- total_trash_size_.fetch_add(trash_file_size);
148
+ if (accounted) {
149
+ uint64_t trash_file_size = 0;
150
+ IOStatus io_s =
151
+ fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
152
+ if (io_s.ok()) {
153
+ total_trash_size_.fetch_add(trash_file_size);
154
+ }
110
155
  }
111
156
  //**TODO: What should we do if we failed to
112
157
  // get the file size?
@@ -115,8 +160,15 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
115
160
  {
116
161
  InstrumentedMutexLock l(&mu_);
117
162
  RecordTick(stats_.get(), FILES_MARKED_TRASH);
118
- queue_.emplace(trash_file, dir_to_sync);
163
+ queue_.emplace(trash_file, dir_to_sync, accounted, bucket);
119
164
  pending_files_++;
165
+ if (bucket.has_value()) {
166
+ auto iter = pending_files_in_buckets_.find(bucket.value());
167
+ assert(iter != pending_files_in_buckets_.end());
168
+ if (iter != pending_files_in_buckets_.end()) {
169
+ iter->second++;
170
+ }
171
+ }
120
172
  if (pending_files_ == 1) {
121
173
  cv_.SignalAll();
122
174
  }
@@ -175,7 +227,7 @@ Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
175
227
  }
176
228
 
177
229
  Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
178
- std::string* trash_file) {
230
+ bool accounted, std::string* trash_file) {
179
231
  // Sanity check of the path
180
232
  size_t idx = file_path.rfind('/');
181
233
  if (idx == std::string::npos || idx == file_path.size() - 1) {
@@ -209,7 +261,7 @@ Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
209
261
  }
210
262
  cnt++;
211
263
  }
212
- if (s.ok()) {
264
+ if (s.ok() && accounted) {
213
265
  s = sst_file_manager_->OnMoveFile(file_path, *trash_file);
214
266
  }
215
267
  return s;
@@ -233,6 +285,8 @@ void DeleteScheduler::BackgroundEmptyTrash() {
233
285
  uint64_t total_deleted_bytes = 0;
234
286
  int64_t current_delete_rate = rate_bytes_per_sec_.load();
235
287
  while (!queue_.empty() && !closing_) {
288
+ // Satisfy static analysis.
289
+ std::optional<int32_t> bucket = std::nullopt;
236
290
  if (current_delete_rate != rate_bytes_per_sec_.load()) {
237
291
  // User changed the delete rate
238
292
  current_delete_rate = rate_bytes_per_sec_.load();
@@ -245,14 +299,17 @@ void DeleteScheduler::BackgroundEmptyTrash() {
245
299
  // Get new file to delete
246
300
  const FileAndDir& fad = queue_.front();
247
301
  std::string path_in_trash = fad.fname;
302
+ std::string dir_to_sync = fad.dir;
303
+ bool accounted = fad.accounted;
304
+ bucket = fad.bucket;
248
305
 
249
306
  // We don't need to hold the lock while deleting the file
250
307
  mu_.Unlock();
251
308
  uint64_t deleted_bytes = 0;
252
309
  bool is_complete = true;
253
310
  // Delete file from trash and update total_penlty value
254
- Status s =
255
- DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete);
311
+ Status s = DeleteTrashFile(path_in_trash, dir_to_sync, accounted,
312
+ &deleted_bytes, &is_complete);
256
313
  total_deleted_bytes += deleted_bytes;
257
314
  mu_.Lock();
258
315
  if (is_complete) {
@@ -286,12 +343,20 @@ void DeleteScheduler::BackgroundEmptyTrash() {
286
343
  TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
287
344
  &total_penalty);
288
345
 
346
+ int32_t pending_files_in_bucket = std::numeric_limits<int32_t>::max();
289
347
  if (is_complete) {
290
348
  pending_files_--;
349
+ if (bucket.has_value()) {
350
+ auto iter = pending_files_in_buckets_.find(bucket.value());
351
+ assert(iter != pending_files_in_buckets_.end());
352
+ if (iter != pending_files_in_buckets_.end()) {
353
+ pending_files_in_bucket = iter->second--;
354
+ }
355
+ }
291
356
  }
292
- if (pending_files_ == 0) {
293
- // Unblock WaitForEmptyTrash since there are no more files waiting
294
- // to be deleted
357
+ if (pending_files_ == 0 || pending_files_in_bucket == 0) {
358
+ // Unblock WaitForEmptyTrash or WaitForEmptyTrashBucket since there are
359
+ // no more files waiting to be deleted
295
360
  cv_.SignalAll();
296
361
  }
297
362
  }
@@ -300,12 +365,14 @@ void DeleteScheduler::BackgroundEmptyTrash() {
300
365
 
301
366
  Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
302
367
  const std::string& dir_to_sync,
303
- uint64_t* deleted_bytes,
368
+ bool accounted, uint64_t* deleted_bytes,
304
369
  bool* is_complete) {
305
370
  uint64_t file_size;
306
371
  Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr);
307
372
  *is_complete = true;
308
373
  TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
374
+ TEST_SYNC_POINT_CALLBACK("DeleteScheduler::DeleteTrashFile::cb",
375
+ const_cast<std::string*>(&path_in_trash));
309
376
  if (s.ok()) {
310
377
  bool need_full_delete = true;
311
378
  if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
@@ -372,7 +439,7 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
372
439
  }
373
440
  if (s.ok()) {
374
441
  *deleted_bytes = file_size;
375
- s = sst_file_manager_->OnDeleteFile(path_in_trash);
442
+ s = OnDeleteFile(path_in_trash, accounted);
376
443
  }
377
444
  }
378
445
  }
@@ -382,12 +449,24 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
382
449
  path_in_trash.c_str(), s.ToString().c_str());
383
450
  *deleted_bytes = 0;
384
451
  } else {
385
- total_trash_size_.fetch_sub(*deleted_bytes);
452
+ if (accounted) {
453
+ total_trash_size_.fetch_sub(*deleted_bytes);
454
+ }
386
455
  }
387
456
 
388
457
  return s;
389
458
  }
390
459
 
460
+ Status DeleteScheduler::OnDeleteFile(const std::string& file_path,
461
+ bool accounted) {
462
+ if (accounted) {
463
+ return sst_file_manager_->OnDeleteFile(file_path);
464
+ }
465
+ TEST_SYNC_POINT_CALLBACK("DeleteScheduler::OnDeleteFile",
466
+ const_cast<std::string*>(&file_path));
467
+ return Status::OK();
468
+ }
469
+
391
470
  void DeleteScheduler::WaitForEmptyTrash() {
392
471
  InstrumentedMutexLock l(&mu_);
393
472
  while (pending_files_ > 0 && !closing_) {
@@ -395,6 +474,30 @@ void DeleteScheduler::WaitForEmptyTrash() {
395
474
  }
396
475
  }
397
476
 
477
+ std::optional<int32_t> DeleteScheduler::NewTrashBucket() {
478
+ if (rate_bytes_per_sec_.load() <= 0) {
479
+ return std::nullopt;
480
+ }
481
+ InstrumentedMutexLock l(&mu_);
482
+ int32_t bucket_number = next_trash_bucket_++;
483
+ pending_files_in_buckets_.emplace(bucket_number, 0);
484
+ return bucket_number;
485
+ }
486
+
487
+ void DeleteScheduler::WaitForEmptyTrashBucket(int32_t bucket) {
488
+ InstrumentedMutexLock l(&mu_);
489
+ if (bucket >= next_trash_bucket_) {
490
+ return;
491
+ }
492
+ auto iter = pending_files_in_buckets_.find(bucket);
493
+ while (iter != pending_files_in_buckets_.end() && iter->second > 0 &&
494
+ !closing_) {
495
+ cv_.Wait();
496
+ iter = pending_files_in_buckets_.find(bucket);
497
+ }
498
+ pending_files_in_buckets_.erase(bucket);
499
+ }
500
+
398
501
  void DeleteScheduler::MaybeCreateBackgroundThread() {
399
502
  if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) {
400
503
  bg_thread_.reset(
@@ -7,6 +7,7 @@
7
7
 
8
8
 
9
9
  #include <map>
10
+ #include <optional>
10
11
  #include <queue>
11
12
  #include <string>
12
13
  #include <thread>
@@ -48,16 +49,45 @@ class DeleteScheduler {
48
49
  MaybeCreateBackgroundThread();
49
50
  }
50
51
 
51
- // Mark file as trash directory and schedule its deletion. If force_bg is
52
- // set, it forces the file to always be deleted in the background thread,
53
- // except when rate limiting is disabled
52
+ // Delete an accounted file that is tracked by `SstFileManager` and should be
53
+ // tracked by this `DeleteScheduler` when it's deleted.
54
+ // The file is deleted immediately if slow deletion is disabled. If force_bg
55
+ // is not set and trash to db size ratio exceeded the configured threshold,
56
+ // it is immediately deleted too. In all other cases, the file will be moved
57
+ // to a trash directory and scheduled for deletion by a background thread.
54
58
  Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
55
59
  const bool force_bg = false);
56
60
 
57
- // Wait for all files being deleteing in the background to finish or for
61
+ // Delete an unaccounted file that is not tracked by `SstFileManager` and
62
+ // should not be tracked by this `DeleteScheduler` when it's deleted.
63
+ // The file is deleted immediately if slow deletion is disabled. If force_bg
64
+ // is not set and the file have more than 1 hard link, it is immediately
65
+ // deleted too. In all other cases, the file will be moved to a trash
66
+ // directory and scheduled for deletion by a background thread.
67
+ // This API also supports assign a file to a specified bucket created by
68
+ // `NewTrashBucket` when delete files in the background. So the caller can
69
+ // wait for a specific bucket to be empty by checking the
70
+ // `WaitForEmptyTrashBucket` API.
71
+ Status DeleteUnaccountedFile(const std::string& file_path,
72
+ const std::string& dir_to_sync,
73
+ const bool force_bg = false,
74
+ std::optional<int32_t> bucket = std::nullopt);
75
+
76
+ // Wait for all files being deleted in the background to finish or for
58
77
  // destructor to be called.
59
78
  void WaitForEmptyTrash();
60
79
 
80
+ // Creates a new trash bucket. A bucket is only created and returned when slow
81
+ // deletion is enabled.
82
+ // For each bucket that is created, the user should also call
83
+ // `WaitForEmptyTrashBucket` after scheduling file deletions to make sure the
84
+ // trash files are all cleared.
85
+ std::optional<int32_t> NewTrashBucket();
86
+
87
+ // Wait for all the files in the specified bucket to be deleted in the
88
+ // background or for the destructor to be called.
89
+ void WaitForEmptyTrashBucket(int32_t bucket);
90
+
61
91
  // Return a map containing errors that happened in BackgroundEmptyTrash
62
92
  // file_path => error status
63
93
  std::map<std::string, Status> GetBackgroundErrors();
@@ -87,12 +117,21 @@ class DeleteScheduler {
87
117
  }
88
118
 
89
119
  private:
90
- Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
120
+ Status DeleteFileImmediately(const std::string& file_path, bool accounted);
121
+
122
+ Status AddFileToDeletionQueue(const std::string& file_path,
123
+ const std::string& dir_to_sync,
124
+ std::optional<int32_t> bucket, bool accounted);
125
+
126
+ Status MarkAsTrash(const std::string& file_path, bool accounted,
127
+ std::string* path_in_trash);
91
128
 
92
129
  Status DeleteTrashFile(const std::string& path_in_trash,
93
- const std::string& dir_to_sync,
130
+ const std::string& dir_to_sync, bool accounted,
94
131
  uint64_t* deleted_bytes, bool* is_complete);
95
132
 
133
+ Status OnDeleteFile(const std::string& file_path, bool accounted);
134
+
96
135
  void BackgroundEmptyTrash();
97
136
 
98
137
  void MaybeCreateBackgroundThread();
@@ -104,19 +143,28 @@ class DeleteScheduler {
104
143
  std::atomic<uint64_t> total_trash_size_;
105
144
  // Maximum number of bytes that should be deleted per second
106
145
  std::atomic<int64_t> rate_bytes_per_sec_;
107
- // Mutex to protect queue_, pending_files_, bg_errors_, closing_, stats_
146
+ // Mutex to protect queue_, pending_files_, next_trash_bucket_,
147
+ // pending_files_in_buckets_, bg_errors_, closing_, stats_
108
148
  InstrumentedMutex mu_;
109
149
 
110
150
  struct FileAndDir {
111
- FileAndDir(const std::string& f, const std::string& d) : fname(f), dir(d) {}
151
+ FileAndDir(const std::string& _fname, const std::string& _dir,
152
+ bool _accounted, std::optional<int32_t> _bucket)
153
+ : fname(_fname), dir(_dir), accounted(_accounted), bucket(_bucket) {}
112
154
  std::string fname;
113
155
  std::string dir; // empty will be skipped.
156
+ bool accounted;
157
+ std::optional<int32_t> bucket;
114
158
  };
115
159
 
116
160
  // Queue of trash files that need to be deleted
117
161
  std::queue<FileAndDir> queue_;
118
162
  // Number of trash files that are waiting to be deleted
119
163
  int32_t pending_files_;
164
+ // Next trash bucket that can be created
165
+ int32_t next_trash_bucket_;
166
+ // A mapping from trash bucket to number of pending files in the bucket
167
+ std::map<int32_t, int32_t> pending_files_in_buckets_;
120
168
  uint64_t bytes_max_delete_chunk_;
121
169
  // Errors that happened in BackgroundEmptyTrash (file_path => error)
122
170
  std::map<std::string, Status> bg_errors_;
@@ -127,6 +175,7 @@ class DeleteScheduler {
127
175
  // Condition variable signaled in these conditions
128
176
  // - pending_files_ value change from 0 => 1
129
177
  // - pending_files_ value change from 1 => 0
178
+ // - a value in pending_files_in_buckets change from 1 => 0
130
179
  // - closing_ value is set to true
131
180
  InstrumentedCondVar cv_;
132
181
  // Background thread running BackgroundEmptyTrash
@@ -138,6 +187,10 @@ class DeleteScheduler {
138
187
  // If the trash size constitutes for more than this fraction of the total DB
139
188
  // size we will start deleting new files passed to DeleteScheduler
140
189
  // immediately
190
+ // Unaccounted files passed for deletion will not cause change in
191
+ // total_trash_size_ or affect the DeleteScheduler::total_trash_size_ over
192
+ // SstFileManager::total_size_ ratio. Their slow deletion is not subject to
193
+ // this configured threshold either.
141
194
  std::atomic<double> max_trash_db_ratio_;
142
195
  static const uint64_t kMicrosInSecond = 1000 * 1000LL;
143
196
  std::shared_ptr<Statistics> stats_;
@@ -59,7 +59,10 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source,
59
59
  return io_s;
60
60
  }
61
61
  if (slice.size() == 0) {
62
- return IOStatus::Corruption("file too small");
62
+ return IOStatus::Corruption(
63
+ "File smaller than expected for copy: " + source + " expecting " +
64
+ std::to_string(size) + " more bytes after " +
65
+ std::to_string(dest_writer->GetFileSize()));
63
66
  }
64
67
 
65
68
  io_s = dest_writer->Append(opts, slice);
@@ -122,8 +125,8 @@ IOStatus CreateFile(FileSystem* fs, const std::string& destination,
122
125
  Status DeleteDBFile(const ImmutableDBOptions* db_options,
123
126
  const std::string& fname, const std::string& dir_to_sync,
124
127
  const bool force_bg, const bool force_fg) {
125
- SstFileManagerImpl* sfm =
126
- static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
128
+ SstFileManagerImpl* sfm = static_cast_with_check<SstFileManagerImpl>(
129
+ db_options->sst_file_manager.get());
127
130
  if (sfm && !force_fg) {
128
131
  return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
129
132
  } else {
@@ -131,6 +134,21 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
131
134
  }
132
135
  }
133
136
 
137
+ Status DeleteUnaccountedDBFile(const ImmutableDBOptions* db_options,
138
+ const std::string& fname,
139
+ const std::string& dir_to_sync,
140
+ const bool force_bg, const bool force_fg,
141
+ std::optional<int32_t> bucket) {
142
+ SstFileManagerImpl* sfm = static_cast_with_check<SstFileManagerImpl>(
143
+ db_options->sst_file_manager.get());
144
+ if (sfm && !force_fg) {
145
+ return sfm->ScheduleUnaccountedFileDeletion(fname, dir_to_sync, force_bg,
146
+ bucket);
147
+ } else {
148
+ return db_options->env->DeleteFile(fname);
149
+ }
150
+ }
151
+
134
152
  // requested_checksum_func_name brings the function name of the checksum
135
153
  // generator in checksum_factory. Empty string is permitted, in which case the
136
154
  // name of the generator created by the factory is unchecked. When
@@ -226,7 +244,10 @@ IOStatus GenerateOneFileChecksum(
226
244
  io_s.ToString());
227
245
  }
228
246
  if (slice.size() == 0) {
229
- return IOStatus::Corruption("file too small");
247
+ return IOStatus::Corruption(
248
+ "File smaller than expected for checksum: " + file_path +
249
+ " expecting " + std::to_string(size) + " more bytes after " +
250
+ std::to_string(offset));
230
251
  }
231
252
  checksum_generator->Update(slice.data(), slice.size());
232
253
  size -= slice.size();