rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -152,7 +152,7 @@ Status FileChecksumRetriever::ApplyVersionEdit(VersionEdit& edit,
152
152
 
153
153
  VersionEditHandler::VersionEditHandler(
154
154
  bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
155
- VersionSet* version_set, bool track_missing_files,
155
+ VersionSet* version_set, bool track_found_and_missing_files,
156
156
  bool no_error_if_files_missing, const std::shared_ptr<IOTracer>& io_tracer,
157
157
  const ReadOptions& read_options, bool skip_load_table_files,
158
158
  EpochNumberRequirement epoch_number_requirement)
@@ -160,7 +160,7 @@ VersionEditHandler::VersionEditHandler(
160
160
  read_only_(read_only),
161
161
  column_families_(std::move(column_families)),
162
162
  version_set_(version_set),
163
- track_missing_files_(track_missing_files),
163
+ track_found_and_missing_files_(track_found_and_missing_files),
164
164
  no_error_if_files_missing_(no_error_if_files_missing),
165
165
  io_tracer_(io_tracer),
166
166
  skip_load_table_files_(skip_load_table_files),
@@ -500,7 +500,8 @@ ColumnFamilyData* VersionEditHandler::CreateCfAndInit(
500
500
  assert(builders_.find(cf_id) == builders_.end());
501
501
  builders_.emplace(cf_id,
502
502
  VersionBuilderUPtr(new BaseReferencedVersionBuilder(cfd)));
503
- if (track_missing_files_) {
503
+ if (track_found_and_missing_files_) {
504
+ cf_to_found_files_.emplace(cf_id, std::unordered_set<uint64_t>());
504
505
  cf_to_missing_files_.emplace(cf_id, std::unordered_set<uint64_t>());
505
506
  cf_to_missing_blob_files_high_.emplace(cf_id, kInvalidBlobFileNumber);
506
507
  }
@@ -513,7 +514,11 @@ ColumnFamilyData* VersionEditHandler::DestroyCfAndCleanup(
513
514
  auto builder_iter = builders_.find(cf_id);
514
515
  assert(builder_iter != builders_.end());
515
516
  builders_.erase(builder_iter);
516
- if (track_missing_files_) {
517
+ if (track_found_and_missing_files_) {
518
+ auto found_files_iter = cf_to_found_files_.find(cf_id);
519
+ assert(found_files_iter != cf_to_found_files_.end());
520
+ cf_to_found_files_.erase(found_files_iter);
521
+
517
522
  auto missing_files_iter = cf_to_missing_files_.find(cf_id);
518
523
  assert(missing_files_iter != cf_to_missing_files_.end());
519
524
  cf_to_missing_files_.erase(missing_files_iter);
@@ -729,7 +734,7 @@ VersionEditHandlerPointInTime::VersionEditHandlerPointInTime(
729
734
  const ReadOptions& read_options,
730
735
  EpochNumberRequirement epoch_number_requirement)
731
736
  : VersionEditHandler(read_only, column_families, version_set,
732
- /*track_missing_files=*/true,
737
+ /*track_found_and_missing_files=*/true,
733
738
  /*no_error_if_files_missing=*/true, io_tracer,
734
739
  read_options, epoch_number_requirement) {}
735
740
 
@@ -824,6 +829,12 @@ void VersionEditHandlerPointInTime::CheckIterationResult(
824
829
 
825
830
  version_set_->AppendVersion(cfd, v_iter->second);
826
831
  versions_.erase(v_iter);
832
+ // Let's clear found_files, since any files in that are part of the
833
+ // installed Version. Any files that got obsoleted would have already
834
+ // been moved to intermediate_files_
835
+ auto found_files_iter = cf_to_found_files_.find(cfd->GetID());
836
+ assert(found_files_iter != cf_to_found_files_.end());
837
+ found_files_iter->second.clear();
827
838
  }
828
839
  }
829
840
  } else {
@@ -854,10 +865,16 @@ ColumnFamilyData* VersionEditHandlerPointInTime::DestroyCfAndCleanup(
854
865
 
855
866
  Status VersionEditHandlerPointInTime::MaybeCreateVersion(
856
867
  const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) {
868
+ TEST_SYNC_POINT("VersionEditHandlerPointInTime::MaybeCreateVersion:Begin1");
869
+ TEST_SYNC_POINT("VersionEditHandlerPointInTime::MaybeCreateVersion:Begin2");
857
870
  assert(cfd != nullptr);
858
871
  if (!force_create_version) {
859
872
  assert(edit.GetColumnFamily() == cfd->GetID());
860
873
  }
874
+ auto found_files_iter = cf_to_found_files_.find(cfd->GetID());
875
+ assert(found_files_iter != cf_to_found_files_.end());
876
+ std::unordered_set<uint64_t>& found_files = found_files_iter->second;
877
+
861
878
  auto missing_files_iter = cf_to_missing_files_.find(cfd->GetID());
862
879
  assert(missing_files_iter != cf_to_missing_files_.end());
863
880
  std::unordered_set<uint64_t>& missing_files = missing_files_iter->second;
@@ -889,6 +906,18 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
889
906
  auto fiter = missing_files.find(file_num);
890
907
  if (fiter != missing_files.end()) {
891
908
  missing_files.erase(fiter);
909
+ } else {
910
+ fiter = found_files.find(file_num);
911
+ // Only mark new files added during this catchup attempt for deletion.
912
+ // These files were never installed in VersionStorageInfo.
913
+ // Already referenced files that are deleted by a VersionEdit will
914
+ // be added to the VersionStorageInfo's obsolete files when the old
915
+ // version is dereferenced.
916
+ if (fiter != found_files.end()) {
917
+ intermediate_files_.emplace_back(
918
+ MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_num));
919
+ found_files.erase(fiter);
920
+ }
892
921
  }
893
922
  }
894
923
 
@@ -904,9 +933,14 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
904
933
  s = VerifyFile(cfd, fpath, level, meta);
905
934
  if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) {
906
935
  missing_files.insert(file_num);
936
+ if (s.IsCorruption()) {
937
+ found_files.insert(file_num);
938
+ }
907
939
  s = Status::OK();
908
940
  } else if (!s.ok()) {
909
941
  break;
942
+ } else {
943
+ found_files.insert(file_num);
910
944
  }
911
945
  }
912
946
 
@@ -104,7 +104,7 @@ using VersionBuilderUPtr = std::unique_ptr<BaseReferencedVersionBuilder>;
104
104
  // To use this class and its subclasses,
105
105
  // 1. Create an object of VersionEditHandler or its subclasses.
106
106
  // VersionEditHandler handler(read_only, column_families, version_set,
107
- // track_missing_files,
107
+ // track_found_and_missing_files,
108
108
  // no_error_if_files_missing);
109
109
  // 2. Status s = handler.Iterate(reader, &db_id);
110
110
  // 3. Check s and handle possible errors.
@@ -116,16 +116,17 @@ class VersionEditHandler : public VersionEditHandlerBase {
116
116
  explicit VersionEditHandler(
117
117
  bool read_only,
118
118
  const std::vector<ColumnFamilyDescriptor>& column_families,
119
- VersionSet* version_set, bool track_missing_files,
119
+ VersionSet* version_set, bool track_found_and_missing_files,
120
120
  bool no_error_if_files_missing,
121
121
  const std::shared_ptr<IOTracer>& io_tracer,
122
122
  const ReadOptions& read_options,
123
123
  EpochNumberRequirement epoch_number_requirement =
124
124
  EpochNumberRequirement::kMustPresent)
125
- : VersionEditHandler(
126
- read_only, column_families, version_set, track_missing_files,
127
- no_error_if_files_missing, io_tracer, read_options,
128
- /*skip_load_table_files=*/false, epoch_number_requirement) {}
125
+ : VersionEditHandler(read_only, column_families, version_set,
126
+ track_found_and_missing_files,
127
+ no_error_if_files_missing, io_tracer, read_options,
128
+ /*skip_load_table_files=*/false,
129
+ epoch_number_requirement) {}
129
130
 
130
131
  ~VersionEditHandler() override {}
131
132
 
@@ -144,7 +145,7 @@ class VersionEditHandler : public VersionEditHandlerBase {
144
145
  protected:
145
146
  explicit VersionEditHandler(
146
147
  bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
147
- VersionSet* version_set, bool track_missing_files,
148
+ VersionSet* version_set, bool track_found_and_missing_files,
148
149
  bool no_error_if_files_missing,
149
150
  const std::shared_ptr<IOTracer>& io_tracer,
150
151
  const ReadOptions& read_options, bool skip_load_table_files,
@@ -195,7 +196,8 @@ class VersionEditHandler : public VersionEditHandlerBase {
195
196
  // by subsequent manifest records, Recover() will return failure status.
196
197
  std::unordered_map<uint32_t, std::string> column_families_not_found_;
197
198
  VersionEditParams version_edit_params_;
198
- const bool track_missing_files_;
199
+ const bool track_found_and_missing_files_;
200
+ std::unordered_map<uint32_t, std::unordered_set<uint64_t>> cf_to_found_files_;
199
201
  std::unordered_map<uint32_t, std::unordered_set<uint64_t>>
200
202
  cf_to_missing_files_;
201
203
  std::unordered_map<uint32_t, uint64_t> cf_to_missing_blob_files_high_;
@@ -273,6 +275,8 @@ class VersionEditHandlerPointInTime : public VersionEditHandler {
273
275
 
274
276
  bool in_atomic_group_ = false;
275
277
 
278
+ std::vector<std::string> intermediate_files_;
279
+
276
280
  private:
277
281
  bool AtomicUpdateVersionsCompleted();
278
282
  bool AtomicUpdateVersionsContains(uint32_t cfid);
@@ -310,6 +314,10 @@ class ManifestTailer : public VersionEditHandlerPointInTime {
310
314
  return cfds_changed_;
311
315
  }
312
316
 
317
+ std::vector<std::string>& GetIntermediateFiles() {
318
+ return intermediate_files_;
319
+ }
320
+
313
321
  protected:
314
322
  Status Initialize() override;
315
323
 
@@ -342,7 +350,7 @@ class DumpManifestHandler : public VersionEditHandler {
342
350
  bool json)
343
351
  : VersionEditHandler(
344
352
  /*read_only=*/true, column_families, version_set,
345
- /*track_missing_files=*/false,
353
+ /*track_found_and_missing_files=*/false,
346
354
  /*no_error_if_files_missing=*/false, io_tracer, read_options,
347
355
  /*skip_load_table_files=*/true),
348
356
  verbose_(verbose),
@@ -356,14 +364,15 @@ class DumpManifestHandler : public VersionEditHandler {
356
364
 
357
365
  Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override {
358
366
  // Write out each individual edit
359
- if (verbose_ && !json_) {
367
+ if (json_) {
360
368
  // Print out DebugStrings. Can include non-terminating null characters.
361
- fwrite(edit.DebugString(hex_).data(), sizeof(char),
362
- edit.DebugString(hex_).size(), stdout);
363
- } else if (json_) {
369
+ std::string edit_dump_str = edit.DebugJSON(count_, hex_);
370
+ fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
371
+ fwrite("\n", sizeof(char), 1, stdout);
372
+ } else if (verbose_) {
364
373
  // Print out DebugStrings. Can include non-terminating null characters.
365
- fwrite(edit.DebugString(hex_).data(), sizeof(char),
366
- edit.DebugString(hex_).size(), stdout);
374
+ std::string edit_dump_str = edit.DebugString(hex_);
375
+ fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
367
376
  }
368
377
  ++count_;
369
378
  return VersionEditHandler::ApplyVersionEdit(edit, cfd);
@@ -857,10 +857,14 @@ Version::~Version() {
857
857
  f->refs--;
858
858
  if (f->refs <= 0) {
859
859
  assert(cfd_ != nullptr);
860
+ // When not in the process of closing the DB, we'll have a superversion
861
+ // to get current mutable options from
862
+ auto* sv = cfd_->GetSuperVersion();
860
863
  uint32_t path_id = f->fd.GetPathId();
861
864
  assert(path_id < cfd_->ioptions()->cf_paths.size());
862
865
  vset_->obsolete_files_.emplace_back(
863
866
  f, cfd_->ioptions()->cf_paths[path_id].path,
867
+ sv ? sv->mutable_cf_options.uncache_aggressiveness : 0,
864
868
  cfd_->GetFileMetadataCacheReservationManager());
865
869
  }
866
870
  }
@@ -972,7 +976,8 @@ class LevelIterator final : public InternalIterator {
972
976
  const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
973
977
  nullptr,
974
978
  bool allow_unprepared_value = false,
975
- TruncatedRangeDelIterator**** range_tombstone_iter_ptr_ = nullptr)
979
+ std::unique_ptr<TruncatedRangeDelIterator>*** range_tombstone_iter_ptr_ =
980
+ nullptr)
976
981
  : table_cache_(table_cache),
977
982
  read_options_(read_options),
978
983
  file_options_(file_options),
@@ -1112,9 +1117,8 @@ class LevelIterator final : public InternalIterator {
1112
1117
  }
1113
1118
 
1114
1119
  void ClearRangeTombstoneIter() {
1115
- if (range_tombstone_iter_ && *range_tombstone_iter_) {
1116
- delete *range_tombstone_iter_;
1117
- *range_tombstone_iter_ = nullptr;
1120
+ if (range_tombstone_iter_) {
1121
+ range_tombstone_iter_->reset();
1118
1122
  }
1119
1123
  }
1120
1124
 
@@ -1197,7 +1201,7 @@ class LevelIterator final : public InternalIterator {
1197
1201
  // iterator end).
1198
1202
  //
1199
1203
  // *range_tombstone_iter_ points to range tombstones of the current SST file
1200
- TruncatedRangeDelIterator** range_tombstone_iter_;
1204
+ std::unique_ptr<TruncatedRangeDelIterator>* range_tombstone_iter_;
1201
1205
 
1202
1206
  // The sentinel key to be returned
1203
1207
  Slice sentinel_;
@@ -1276,11 +1280,9 @@ void LevelIterator::Seek(const Slice& target) {
1276
1280
  ts_sz);
1277
1281
  if (prefix_extractor_->InDomain(target_user_key_without_ts) &&
1278
1282
  (!prefix_extractor_->InDomain(next_file_first_user_key_without_ts) ||
1279
- user_comparator_.CompareWithoutTimestamp(
1280
- prefix_extractor_->Transform(target_user_key_without_ts), false,
1281
- prefix_extractor_->Transform(
1282
- next_file_first_user_key_without_ts),
1283
- false) != 0)) {
1283
+ prefix_extractor_->Transform(target_user_key_without_ts)
1284
+ .compare(prefix_extractor_->Transform(
1285
+ next_file_first_user_key_without_ts)) != 0)) {
1284
1286
  // SkipEmptyFileForward() will not advance to next file when this flag
1285
1287
  // is set for reason detailed below.
1286
1288
  //
@@ -1925,7 +1927,7 @@ InternalIterator* Version::TEST_GetLevelIterator(
1925
1927
  int level, bool allow_unprepared_value) {
1926
1928
  auto* arena = merge_iter_builder->GetArena();
1927
1929
  auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
1928
- TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
1930
+ std::unique_ptr<TruncatedRangeDelIterator>** tombstone_iter_ptr = nullptr;
1929
1931
  auto level_iter = new (mem) LevelIterator(
1930
1932
  cfd_->table_cache(), read_options, file_options_,
1931
1933
  cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
@@ -2025,7 +2027,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2025
2027
  auto* arena = merge_iter_builder->GetArena();
2026
2028
  if (level == 0) {
2027
2029
  // Merge all level zero files together since they may overlap
2028
- TruncatedRangeDelIterator* tombstone_iter = nullptr;
2030
+ std::unique_ptr<TruncatedRangeDelIterator> tombstone_iter = nullptr;
2029
2031
  for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) {
2030
2032
  const auto& file = storage_info_.LevelFilesBrief(0).files[i];
2031
2033
  auto table_iter = cfd_->table_cache()->NewIterator(
@@ -2042,8 +2044,8 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2042
2044
  if (read_options.ignore_range_deletions) {
2043
2045
  merge_iter_builder->AddIterator(table_iter);
2044
2046
  } else {
2045
- merge_iter_builder->AddPointAndTombstoneIterator(table_iter,
2046
- tombstone_iter);
2047
+ merge_iter_builder->AddPointAndTombstoneIterator(
2048
+ table_iter, std::move(tombstone_iter));
2047
2049
  }
2048
2050
  }
2049
2051
  if (should_sample) {
@@ -2060,7 +2062,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
2060
2062
  // walks through the non-overlapping files in the level, opening them
2061
2063
  // lazily.
2062
2064
  auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
2063
- TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
2065
+ std::unique_ptr<TruncatedRangeDelIterator>** tombstone_iter_ptr = nullptr;
2064
2066
  auto level_iter = new (mem) LevelIterator(
2065
2067
  cfd_->table_cache(), read_options, soptions,
2066
2068
  cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
@@ -2777,7 +2779,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
2777
2779
  }
2778
2780
  }
2779
2781
 
2780
- if (s.ok() && !blob_ctxs.empty()) {
2782
+ if (!blob_ctxs.empty()) {
2781
2783
  MultiGetBlob(read_options, keys_with_blobs_range, blob_ctxs);
2782
2784
  }
2783
2785
 
@@ -3130,6 +3132,10 @@ bool Version::MaybeInitializeFileMetaData(const ReadOptions& read_options,
3130
3132
  file_meta->raw_value_size = tp->raw_value_size;
3131
3133
  file_meta->raw_key_size = tp->raw_key_size;
3132
3134
  file_meta->num_range_deletions = tp->num_range_deletions;
3135
+ // Ensure new invariants on old files
3136
+ file_meta->num_deletions =
3137
+ std::max(tp->num_deletions, tp->num_range_deletions);
3138
+ file_meta->num_entries = std::max(tp->num_entries, tp->num_deletions);
3133
3139
  return true;
3134
3140
  }
3135
3141
 
@@ -3141,6 +3147,7 @@ void VersionStorageInfo::UpdateAccumulatedStats(FileMetaData* file_meta) {
3141
3147
  accumulated_file_size_ += file_meta->fd.GetFileSize();
3142
3148
  accumulated_raw_key_size_ += file_meta->raw_key_size;
3143
3149
  accumulated_raw_value_size_ += file_meta->raw_value_size;
3150
+ assert(file_meta->num_entries >= file_meta->num_deletions);
3144
3151
  accumulated_num_non_deletions_ +=
3145
3152
  file_meta->num_entries - file_meta->num_deletions;
3146
3153
  accumulated_num_deletions_ += file_meta->num_deletions;
@@ -3496,6 +3503,10 @@ void VersionStorageInfo::ComputeCompactionScore(
3496
3503
  score = kScoreForNeedCompaction;
3497
3504
  }
3498
3505
  } else {
3506
+ // For universal compaction, if a user configures `max_read_amp`, then
3507
+ // the score may be a false positive signal.
3508
+ // `level0_file_num_compaction_trigger` is used as a trigger to check
3509
+ // if there is any compaction work to do.
3499
3510
  score = static_cast<double>(num_sorted_runs) /
3500
3511
  mutable_cf_options.level0_file_num_compaction_trigger;
3501
3512
  if (compaction_style_ == kCompactionStyleLevel && num_levels() > 1) {
@@ -4612,25 +4623,27 @@ uint64_t VersionStorageInfo::GetMaxEpochNumberOfFiles() const {
4612
4623
  return max_epoch_number;
4613
4624
  }
4614
4625
 
4615
- void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd) {
4616
- cfd->ResetNextEpochNumber();
4626
+ void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd,
4627
+ bool restart_epoch, bool force) {
4628
+ if (restart_epoch) {
4629
+ cfd->ResetNextEpochNumber();
4617
4630
 
4618
- bool reserve_epoch_num_for_file_ingested_behind =
4619
- cfd->ioptions()->allow_ingest_behind;
4620
- if (reserve_epoch_num_for_file_ingested_behind) {
4621
- uint64_t reserved_epoch_number = cfd->NewEpochNumber();
4622
- assert(reserved_epoch_number == kReservedEpochNumberForFileIngestedBehind);
4623
- ROCKS_LOG_INFO(cfd->ioptions()->info_log.get(),
4624
- "[%s]CF has reserved epoch number %" PRIu64
4625
- " for files ingested "
4626
- "behind since `Options::allow_ingest_behind` is true",
4627
- cfd->GetName().c_str(), reserved_epoch_number);
4631
+ bool reserve_epoch_num_for_file_ingested_behind =
4632
+ cfd->ioptions()->allow_ingest_behind;
4633
+ if (reserve_epoch_num_for_file_ingested_behind) {
4634
+ uint64_t reserved_epoch_number = cfd->NewEpochNumber();
4635
+ assert(reserved_epoch_number ==
4636
+ kReservedEpochNumberForFileIngestedBehind);
4637
+ ROCKS_LOG_INFO(cfd->ioptions()->info_log.get(),
4638
+ "[%s]CF has reserved epoch number %" PRIu64
4639
+ " for files ingested "
4640
+ "behind since `Options::allow_ingest_behind` is true",
4641
+ cfd->GetName().c_str(), reserved_epoch_number);
4642
+ }
4628
4643
  }
4629
4644
 
4630
- if (HasMissingEpochNumber()) {
4631
- assert(epoch_number_requirement_ == EpochNumberRequirement::kMightMissing);
4632
- assert(num_levels_ >= 1);
4633
-
4645
+ bool missing_epoch_number = HasMissingEpochNumber();
4646
+ if (missing_epoch_number || force) {
4634
4647
  for (int level = num_levels_ - 1; level >= 1; --level) {
4635
4648
  auto& files_at_level = files_[level];
4636
4649
  if (files_at_level.empty()) {
@@ -4641,17 +4654,19 @@ void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd) {
4641
4654
  f->epoch_number = next_epoch_number;
4642
4655
  }
4643
4656
  }
4644
-
4645
4657
  for (auto file_meta_iter = files_[0].rbegin();
4646
4658
  file_meta_iter != files_[0].rend(); file_meta_iter++) {
4647
4659
  FileMetaData* f = *file_meta_iter;
4648
4660
  f->epoch_number = cfd->NewEpochNumber();
4649
4661
  }
4650
-
4651
- ROCKS_LOG_WARN(cfd->ioptions()->info_log.get(),
4652
- "[%s]CF's epoch numbers are inferred based on seqno",
4653
- cfd->GetName().c_str());
4654
- epoch_number_requirement_ = EpochNumberRequirement::kMustPresent;
4662
+ if (missing_epoch_number) {
4663
+ assert(epoch_number_requirement_ ==
4664
+ EpochNumberRequirement::kMightMissing);
4665
+ ROCKS_LOG_WARN(cfd->ioptions()->info_log.get(),
4666
+ "[%s]CF's epoch numbers are inferred based on seqno",
4667
+ cfd->GetName().c_str());
4668
+ epoch_number_requirement_ = EpochNumberRequirement::kMustPresent;
4669
+ }
4655
4670
  } else {
4656
4671
  assert(epoch_number_requirement_ == EpochNumberRequirement::kMustPresent);
4657
4672
  cfd->SetNextEpochNumber(
@@ -5179,13 +5194,21 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
5179
5194
  }
5180
5195
 
5181
5196
  VersionSet::~VersionSet() {
5182
- // we need to delete column_family_set_ because its destructor depends on
5183
- // VersionSet
5197
+ // Must clean up column families to make all files "obsolete"
5184
5198
  column_family_set_.reset();
5199
+
5185
5200
  for (auto& file : obsolete_files_) {
5186
5201
  if (file.metadata->table_reader_handle) {
5187
- table_cache_->Release(file.metadata->table_reader_handle);
5188
- TableCache::Evict(table_cache_, file.metadata->fd.GetNumber());
5202
+ // NOTE: DB is shutting down, so file is probably not obsolete, just
5203
+ // no longer referenced by Versions in memory.
5204
+ // For more context, see comment on "table_cache_->EraseUnRefEntries()"
5205
+ // in DBImpl::CloseHelper().
5206
+ // Using uncache_aggressiveness=0 overrides any previous marking to
5207
+ // attempt to uncache the file's blocks (which after cleaning up
5208
+ // column families could cause use-after-free)
5209
+ TableCache::ReleaseObsolete(table_cache_,
5210
+ file.metadata->table_reader_handle,
5211
+ /*uncache_aggressiveness=*/0);
5189
5212
  }
5190
5213
  file.DeleteMetadata();
5191
5214
  }
@@ -5485,6 +5508,7 @@ Status VersionSet::ProcessManifestWrites(
5485
5508
  Status s;
5486
5509
  IOStatus io_s;
5487
5510
  IOStatus manifest_io_status;
5511
+ std::unique_ptr<log::Writer> new_desc_log_ptr;
5488
5512
  {
5489
5513
  FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_);
5490
5514
  mu->Unlock();
@@ -5513,6 +5537,7 @@ Status VersionSet::ProcessManifestWrites(
5513
5537
  }
5514
5538
  }
5515
5539
 
5540
+ log::Writer* raw_desc_log_ptr = descriptor_log_.get();
5516
5541
  if (s.ok() && new_descriptor_log) {
5517
5542
  // This is fine because everything inside of this block is serialized --
5518
5543
  // only one thread can be here at the same time
@@ -5534,12 +5559,14 @@ Status VersionSet::ProcessManifestWrites(
5534
5559
  db_options_->listeners, nullptr,
5535
5560
  tmp_set.Contains(FileType::kDescriptorFile),
5536
5561
  tmp_set.Contains(FileType::kDescriptorFile)));
5537
- descriptor_log_.reset(
5562
+ new_desc_log_ptr.reset(
5538
5563
  new log::Writer(std::move(file_writer), 0, false));
5564
+ raw_desc_log_ptr = new_desc_log_ptr.get();
5539
5565
  s = WriteCurrentStateToManifest(write_options, curr_state,
5540
- wal_additions, descriptor_log_.get(),
5541
- io_s);
5542
- } else {
5566
+ wal_additions, raw_desc_log_ptr, io_s);
5567
+ assert(s == io_s);
5568
+ }
5569
+ if (!io_s.ok()) {
5543
5570
  manifest_io_status = io_s;
5544
5571
  s = io_s;
5545
5572
  }
@@ -5582,7 +5609,7 @@ Status VersionSet::ProcessManifestWrites(
5582
5609
  }
5583
5610
  ++idx;
5584
5611
  #endif /* !NDEBUG */
5585
- io_s = descriptor_log_->AddRecord(write_options, record);
5612
+ io_s = raw_desc_log_ptr->AddRecord(write_options, record);
5586
5613
  if (!io_s.ok()) {
5587
5614
  s = io_s;
5588
5615
  manifest_io_status = io_s;
@@ -5592,7 +5619,7 @@ Status VersionSet::ProcessManifestWrites(
5592
5619
 
5593
5620
  if (s.ok()) {
5594
5621
  io_s =
5595
- SyncManifest(db_options_, write_options, descriptor_log_->file());
5622
+ SyncManifest(db_options_, write_options, raw_desc_log_ptr->file());
5596
5623
  manifest_io_status = io_s;
5597
5624
  TEST_SYNC_POINT_CALLBACK(
5598
5625
  "VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s);
@@ -5625,7 +5652,7 @@ Status VersionSet::ProcessManifestWrites(
5625
5652
 
5626
5653
  if (s.ok()) {
5627
5654
  // find offset in manifest file where this version is stored.
5628
- new_manifest_file_size = descriptor_log_->file()->GetFileSize();
5655
+ new_manifest_file_size = raw_desc_log_ptr->file()->GetFileSize();
5629
5656
  }
5630
5657
 
5631
5658
  if (first_writer.edit_list.front()->IsColumnFamilyDrop()) {
@@ -5671,6 +5698,7 @@ Status VersionSet::ProcessManifestWrites(
5671
5698
  // Append the old manifest file to the obsolete_manifest_ list to be deleted
5672
5699
  // by PurgeObsoleteFiles later.
5673
5700
  if (s.ok() && new_descriptor_log) {
5701
+ descriptor_log_ = std::move(new_desc_log_ptr);
5674
5702
  obsolete_manifests_.emplace_back(
5675
5703
  DescriptorFileName("", manifest_file_number_));
5676
5704
  }
@@ -5740,14 +5768,11 @@ Status VersionSet::ProcessManifestWrites(
5740
5768
  for (auto v : versions) {
5741
5769
  delete v;
5742
5770
  }
5743
- if (manifest_io_status.ok()) {
5744
- manifest_file_number_ = pending_manifest_file_number_;
5745
- manifest_file_size_ = new_manifest_file_size;
5746
- }
5747
5771
  // If manifest append failed for whatever reason, the file could be
5748
5772
  // corrupted. So we need to force the next version update to start a
5749
5773
  // new manifest file.
5750
5774
  descriptor_log_.reset();
5775
+ new_desc_log_ptr.reset();
5751
5776
  // If manifest operations failed, then we know the CURRENT file still
5752
5777
  // points to the original MANIFEST. Therefore, we can safely delete the
5753
5778
  // new MANIFEST.
@@ -5774,7 +5799,7 @@ Status VersionSet::ProcessManifestWrites(
5774
5799
  // a) CURRENT points to the new MANIFEST, and the new MANIFEST is present.
5775
5800
  // b) CURRENT points to the original MANIFEST, and the original MANIFEST
5776
5801
  // also exists.
5777
- if (new_descriptor_log && !manifest_io_status.ok()) {
5802
+ if (!manifest_io_status.ok() && new_descriptor_log) {
5778
5803
  ROCKS_LOG_INFO(db_options_->info_log,
5779
5804
  "Deleting manifest %" PRIu64 " current manifest %" PRIu64
5780
5805
  "\n",
@@ -6013,7 +6038,8 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
6013
6038
 
6014
6039
  Status VersionSet::Recover(
6015
6040
  const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
6016
- std::string* db_id, bool no_error_if_files_missing) {
6041
+ std::string* db_id, bool no_error_if_files_missing, bool is_retry,
6042
+ Status* log_status) {
6017
6043
  const ReadOptions read_options(Env::IOActivity::kDBOpen);
6018
6044
  // Read "CURRENT" file, which contains a pointer to the current manifest
6019
6045
  // file
@@ -6038,8 +6064,11 @@ Status VersionSet::Recover(
6038
6064
  }
6039
6065
  manifest_file_reader.reset(new SequentialFileReader(
6040
6066
  std::move(manifest_file), manifest_path,
6041
- db_options_->log_readahead_size, io_tracer_, db_options_->listeners));
6067
+ db_options_->log_readahead_size, io_tracer_, db_options_->listeners,
6068
+ /*rate_limiter=*/nullptr, is_retry));
6042
6069
  }
6070
+ TEST_SYNC_POINT("VersionSet::Recover:StartManifestRead");
6071
+
6043
6072
  uint64_t current_manifest_file_size = 0;
6044
6073
  uint64_t log_number = 0;
6045
6074
  {
@@ -6050,8 +6079,8 @@ Status VersionSet::Recover(
6050
6079
  true /* checksum */, 0 /* log_number */);
6051
6080
  VersionEditHandler handler(
6052
6081
  read_only, column_families, const_cast<VersionSet*>(this),
6053
- /*track_missing_files=*/false, no_error_if_files_missing, io_tracer_,
6054
- read_options, EpochNumberRequirement::kMightMissing);
6082
+ /*track_found_and_missing_files=*/false, no_error_if_files_missing,
6083
+ io_tracer_, read_options, EpochNumberRequirement::kMightMissing);
6055
6084
  handler.Iterate(reader, &log_read_status);
6056
6085
  s = handler.status();
6057
6086
  if (s.ok()) {
@@ -6063,6 +6092,9 @@ Status VersionSet::Recover(
6063
6092
  if (s.ok()) {
6064
6093
  RecoverEpochNumbers();
6065
6094
  }
6095
+ if (log_status) {
6096
+ *log_status = log_read_status;
6097
+ }
6066
6098
  }
6067
6099
 
6068
6100
  if (s.ok()) {
@@ -7046,8 +7078,8 @@ InternalIterator* VersionSet::MakeInputIterator(
7046
7078
  // that will be initialized to where CompactionMergingIterator stores
7047
7079
  // pointer to its range tombstones. This is used by LevelIterator
7048
7080
  // to update pointer to range tombstones as it traverse different SST files.
7049
- std::vector<
7050
- std::pair<TruncatedRangeDelIterator*, TruncatedRangeDelIterator***>>
7081
+ std::vector<std::pair<std::unique_ptr<TruncatedRangeDelIterator>,
7082
+ std::unique_ptr<TruncatedRangeDelIterator>**>>
7051
7083
  range_tombstones;
7052
7084
  size_t num = 0;
7053
7085
  for (size_t which = 0; which < c->num_input_levels(); which++) {
@@ -7069,7 +7101,8 @@ InternalIterator* VersionSet::MakeInputIterator(
7069
7101
  *end, fmd.smallest.user_key()) < 0) {
7070
7102
  continue;
7071
7103
  }
7072
- TruncatedRangeDelIterator* range_tombstone_iter = nullptr;
7104
+ std::unique_ptr<TruncatedRangeDelIterator> range_tombstone_iter =
7105
+ nullptr;
7073
7106
  list[num++] = cfd->table_cache()->NewIterator(
7074
7107
  read_options, file_options_compactions,
7075
7108
  cfd->internal_comparator(), fmd, range_del_agg,
@@ -7086,11 +7119,13 @@ InternalIterator* VersionSet::MakeInputIterator(
7086
7119
  c->mutable_cf_options()->block_protection_bytes_per_key,
7087
7120
  /*range_del_read_seqno=*/nullptr,
7088
7121
  /*range_del_iter=*/&range_tombstone_iter);
7089
- range_tombstones.emplace_back(range_tombstone_iter, nullptr);
7122
+ range_tombstones.emplace_back(std::move(range_tombstone_iter),
7123
+ nullptr);
7090
7124
  }
7091
7125
  } else {
7092
7126
  // Create concatenating iterator for the files from this level
7093
- TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
7127
+ std::unique_ptr<TruncatedRangeDelIterator>** tombstone_iter_ptr =
7128
+ nullptr;
7094
7129
  list[num++] = new LevelIterator(
7095
7130
  cfd->table_cache(), read_options, file_options_compactions,
7096
7131
  cfd->internal_comparator(), c->input_levels(which),
@@ -7137,6 +7172,20 @@ Status VersionSet::GetMetadataForFile(uint64_t number, int* filelevel,
7137
7172
  }
7138
7173
 
7139
7174
  void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
7175
+ if (!metadata) {
7176
+ return;
7177
+ }
7178
+ assert(metadata);
7179
+ size_t count = 0;
7180
+ for (auto cfd : *column_family_set_) {
7181
+ if (cfd->IsDropped() || !cfd->initialized()) {
7182
+ continue;
7183
+ }
7184
+ for (int level = 0; level < cfd->NumberLevels(); level++) {
7185
+ count += cfd->current()->storage_info()->LevelFiles(level).size();
7186
+ }
7187
+ }
7188
+ metadata->reserve(count);
7140
7189
  for (auto cfd : *column_family_set_) {
7141
7190
  if (cfd->IsDropped() || !cfd->initialized()) {
7142
7191
  continue;
@@ -7409,7 +7458,8 @@ Status ReactiveVersionSet::ReadAndApply(
7409
7458
  InstrumentedMutex* mu,
7410
7459
  std::unique_ptr<log::FragmentBufferedReader>* manifest_reader,
7411
7460
  Status* manifest_read_status,
7412
- std::unordered_set<ColumnFamilyData*>* cfds_changed) {
7461
+ std::unordered_set<ColumnFamilyData*>* cfds_changed,
7462
+ std::vector<std::string>* files_to_delete) {
7413
7463
  assert(manifest_reader != nullptr);
7414
7464
  assert(cfds_changed != nullptr);
7415
7465
  mu->AssertHeld();
@@ -7426,6 +7476,9 @@ Status ReactiveVersionSet::ReadAndApply(
7426
7476
  if (s.ok()) {
7427
7477
  *cfds_changed = std::move(manifest_tailer_->GetUpdatedColumnFamilies());
7428
7478
  }
7479
+ if (files_to_delete) {
7480
+ *files_to_delete = std::move(manifest_tailer_->GetIntermediateFiles());
7481
+ }
7429
7482
 
7430
7483
  return s;
7431
7484
  }