rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -28,6 +28,8 @@ uint64_t DBImpl::MinLogNumberToKeep() {
28
28
  return versions_->min_log_number_to_keep();
29
29
  }
30
30
 
31
+ uint64_t DBImpl::MinLogNumberToRecycle() { return min_log_number_to_recycle_; }
32
+
31
33
  uint64_t DBImpl::MinObsoleteSstNumberToKeep() {
32
34
  mutex_.AssertHeld();
33
35
  if (!pending_outputs_.empty()) {
@@ -175,31 +177,10 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
175
177
  versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live);
176
178
  InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(),
177
179
  dbname_);
178
- std::set<std::string> paths;
179
- for (size_t path_id = 0; path_id < immutable_db_options_.db_paths.size();
180
- path_id++) {
181
- paths.insert(immutable_db_options_.db_paths[path_id].path);
182
- }
183
-
184
- // Note that if cf_paths is not specified in the ColumnFamilyOptions
185
- // of a particular column family, we use db_paths as the cf_paths
186
- // setting. Hence, there can be multiple duplicates of files from db_paths
187
- // in the following code. The duplicate are removed while identifying
188
- // unique files in PurgeObsoleteFiles.
189
- for (auto cfd : *versions_->GetColumnFamilySet()) {
190
- for (size_t path_id = 0; path_id < cfd->ioptions()->cf_paths.size();
191
- path_id++) {
192
- auto& path = cfd->ioptions()->cf_paths[path_id].path;
193
-
194
- if (paths.find(path) == paths.end()) {
195
- paths.insert(path);
196
- }
197
- }
198
- }
199
-
180
+ // PurgeObsoleteFiles will dedupe duplicate files.
200
181
  IOOptions io_opts;
201
182
  io_opts.do_not_recurse = true;
202
- for (auto& path : paths) {
183
+ for (auto& path : CollectAllDBPaths()) {
203
184
  // set of all files in the directory. We'll exclude files that are still
204
185
  // alive in the subsequent processings.
205
186
  std::vector<std::string> files;
@@ -298,7 +279,8 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
298
279
  while (alive_log_files_.begin()->number < min_log_number) {
299
280
  auto& earliest = *alive_log_files_.begin();
300
281
  if (immutable_db_options_.recycle_log_file_num >
301
- log_recycle_files_.size()) {
282
+ log_recycle_files_.size() &&
283
+ earliest.number >= MinLogNumberToRecycle()) {
302
284
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
303
285
  "adding log %" PRIu64 " to recycle list\n",
304
286
  earliest.number);
@@ -330,6 +312,26 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
330
312
  // logs_ could have changed while we were waiting.
331
313
  continue;
332
314
  }
315
+ // This WAL file is not live, so it's OK if we never sync the rest of it.
316
+ // If it's already closed, then it's been fully synced. If
317
+ // !background_close_inactive_wals then we need to Close it before
318
+ // removing from logs_ but not blocking while holding log_write_mutex_.
319
+ if (!immutable_db_options_.background_close_inactive_wals &&
320
+ log.writer->file()) {
321
+ // We are taking ownership of and pinning the front entry, so we can
322
+ // expect it to be the same after releasing and re-acquiring the lock
323
+ log.PrepareForSync();
324
+ log_write_mutex_.Unlock();
325
+ // TODO: maybe check the return value of Close.
326
+ // TODO: plumb Env::IOActivity, Env::IOPriority
327
+ auto s = log.writer->file()->Close({});
328
+ s.PermitUncheckedError();
329
+ log_write_mutex_.Lock();
330
+ log.writer->PublishIfClosed();
331
+ assert(&log == &logs_.front());
332
+ log.FinishSync();
333
+ log_sync_cv_.SignalAll();
334
+ }
333
335
  logs_to_free_.push_back(log.ReleaseWriter());
334
336
  logs_.pop_front();
335
337
  }
@@ -428,12 +430,24 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
428
430
  state.manifest_delete_files.size());
429
431
  // We may ignore the dbname when generating the file names.
430
432
  for (auto& file : state.sst_delete_files) {
431
- if (!file.only_delete_metadata) {
432
- candidate_files.emplace_back(
433
- MakeTableFileName(file.metadata->fd.GetNumber()), file.path);
434
- }
435
- if (file.metadata->table_reader_handle) {
436
- table_cache_->Release(file.metadata->table_reader_handle);
433
+ auto* handle = file.metadata->table_reader_handle;
434
+ if (file.only_delete_metadata) {
435
+ if (handle) {
436
+ // Simply release handle of file that is not being deleted
437
+ table_cache_->Release(handle);
438
+ }
439
+ } else {
440
+ // File is being deleted (actually obsolete)
441
+ auto number = file.metadata->fd.GetNumber();
442
+ candidate_files.emplace_back(MakeTableFileName(number), file.path);
443
+ if (handle == nullptr) {
444
+ // For files not "pinned" in table cache
445
+ handle = TableCache::Lookup(table_cache_.get(), number);
446
+ }
447
+ if (handle) {
448
+ TableCache::ReleaseObsolete(table_cache_.get(), handle,
449
+ file.uncache_aggressiveness);
450
+ }
437
451
  }
438
452
  file.DeleteMetadata();
439
453
  }
@@ -509,7 +523,7 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
509
523
  for (const auto w : state.logs_to_free) {
510
524
  // TODO: maybe check the return value of Close.
511
525
  // TODO: plumb Env::IOActivity, Env::IOPriority
512
- auto s = w->Close(WriteOptions());
526
+ auto s = w->Close({});
513
527
  s.PermitUncheckedError();
514
528
  }
515
529
 
@@ -595,8 +609,6 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
595
609
  std::string fname;
596
610
  std::string dir_to_sync;
597
611
  if (type == kTableFile) {
598
- // evict from cache
599
- TableCache::Evict(table_cache_.get(), number);
600
612
  fname = MakeTableFileName(candidate_file.file_path, number);
601
613
  dir_to_sync = candidate_file.file_path;
602
614
  } else if (type == kBlobFile) {
@@ -963,28 +975,26 @@ Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
963
975
  return s;
964
976
  }
965
977
 
966
- Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
967
- mutex_.AssertHeld();
968
- std::vector<std::string> paths;
969
- paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator)));
978
+ std::set<std::string> DBImpl::CollectAllDBPaths() {
979
+ std::set<std::string> all_db_paths;
980
+ all_db_paths.insert(NormalizePath(dbname_));
970
981
  for (const auto& db_path : immutable_db_options_.db_paths) {
971
- paths.push_back(
972
- NormalizePath(db_path.path + std::string(1, kFilePathSeparator)));
982
+ all_db_paths.insert(NormalizePath(db_path.path));
973
983
  }
974
984
  for (const auto* cfd : *versions_->GetColumnFamilySet()) {
975
985
  for (const auto& cf_path : cfd->ioptions()->cf_paths) {
976
- paths.push_back(
977
- NormalizePath(cf_path.path + std::string(1, kFilePathSeparator)));
986
+ all_db_paths.insert(NormalizePath(cf_path.path));
978
987
  }
979
988
  }
980
- // Dedup paths
981
- std::sort(paths.begin(), paths.end());
982
- paths.erase(std::unique(paths.begin(), paths.end()), paths.end());
989
+ return all_db_paths;
990
+ }
983
991
 
992
+ Status DBImpl::MaybeUpdateNextFileNumber(RecoveryContext* recovery_ctx) {
993
+ mutex_.AssertHeld();
984
994
  uint64_t next_file_number = versions_->current_next_file_number();
985
995
  uint64_t largest_file_number = next_file_number;
986
996
  Status s;
987
- for (const auto& path : paths) {
997
+ for (const auto& path : CollectAllDBPaths()) {
988
998
  std::vector<std::string> files;
989
999
  s = env_->GetChildren(path, &files);
990
1000
  if (!s.ok()) {
@@ -996,13 +1006,10 @@ Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
996
1006
  if (!ParseFileName(fname, &number, &type)) {
997
1007
  continue;
998
1008
  }
999
- // path ends with '/' or '\\'
1000
- const std::string normalized_fpath = path + fname;
1009
+ const std::string normalized_fpath = path + kFilePathSeparator + fname;
1001
1010
  largest_file_number = std::max(largest_file_number, number);
1002
- if (type == kTableFile && number >= next_file_number &&
1003
- recovery_ctx->files_to_delete_.find(normalized_fpath) ==
1004
- recovery_ctx->files_to_delete_.end()) {
1005
- recovery_ctx->files_to_delete_.emplace(normalized_fpath, path);
1011
+ if ((type == kTableFile || type == kBlobFile)) {
1012
+ recovery_ctx->existing_data_files_.push_back(normalized_fpath);
1006
1013
  }
1007
1014
  }
1008
1015
  }
@@ -1022,5 +1029,4 @@ Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
1022
1029
  recovery_ctx->UpdateVersionEdits(default_cfd, edit);
1023
1030
  return s;
1024
1031
  }
1025
-
1026
1032
  } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,348 @@
1
+ // Copyright (c) 2024-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "db/db_impl/db_impl_follower.h"
7
+
8
+ #include <algorithm>
9
+ #include <cinttypes>
10
+
11
+ #include "db/arena_wrapped_db_iter.h"
12
+ #include "db/merge_context.h"
13
+ #include "env/composite_env_wrapper.h"
14
+ #include "env/fs_on_demand.h"
15
+ #include "logging/auto_roll_logger.h"
16
+ #include "logging/logging.h"
17
+ #include "monitoring/perf_context_imp.h"
18
+ #include "rocksdb/configurable.h"
19
+ #include "rocksdb/db.h"
20
+ #include "util/cast_util.h"
21
+ #include "util/write_batch_util.h"
22
+
23
+ namespace ROCKSDB_NAMESPACE {
24
+
25
+ DBImplFollower::DBImplFollower(const DBOptions& db_options,
26
+ std::unique_ptr<Env>&& env,
27
+ const std::string& dbname, std::string src_path)
28
+ : DBImplSecondary(db_options, dbname, ""),
29
+ env_guard_(std::move(env)),
30
+ stop_requested_(false),
31
+ src_path_(std::move(src_path)),
32
+ cv_(&mu_) {
33
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
34
+ "Opening the db in follower mode");
35
+ LogFlush(immutable_db_options_.info_log);
36
+ }
37
+
38
+ DBImplFollower::~DBImplFollower() {
39
+ Status s = Close();
40
+ if (!s.ok()) {
41
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "Error closing DB : %s",
42
+ s.ToString().c_str());
43
+ }
44
+ }
45
+
46
+ // Recover a follower DB instance by reading the MANIFEST. The verification
47
+ // as part of the MANIFEST replay will ensure that local links to the
48
+ // leader's files are created, thus ensuring we can continue reading them
49
+ // even if the leader deletes those files due to compaction.
50
+ // TODO:
51
+ // 1. Devise a mechanism to prevent misconfiguration by, for example,
52
+ // keeping a local copy of the IDENTITY file and cross checking
53
+ // 2. Make the recovery more robust by retrying if the first attempt
54
+ // fails.
55
+ Status DBImplFollower::Recover(
56
+ const std::vector<ColumnFamilyDescriptor>& column_families,
57
+ bool /*readonly*/, bool /*error_if_wal_file_exists*/,
58
+ bool /*error_if_data_exists_in_wals*/, bool /*is_retry*/, uint64_t*,
59
+ RecoveryContext* /*recovery_ctx*/, bool* /*can_retry*/) {
60
+ mutex_.AssertHeld();
61
+
62
+ JobContext job_context(0);
63
+ Status s;
64
+ s = static_cast<ReactiveVersionSet*>(versions_.get())
65
+ ->Recover(column_families, &manifest_reader_, &manifest_reporter_,
66
+ &manifest_reader_status_);
67
+ if (!s.ok()) {
68
+ if (manifest_reader_status_) {
69
+ manifest_reader_status_->PermitUncheckedError();
70
+ }
71
+ return s;
72
+ }
73
+ if (immutable_db_options_.paranoid_checks && s.ok()) {
74
+ s = CheckConsistency();
75
+ }
76
+ if (s.ok()) {
77
+ default_cf_handle_ = new ColumnFamilyHandleImpl(
78
+ versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
79
+ default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats();
80
+
81
+ // Start the periodic catch-up thread
82
+ // TODO: See if it makes sense to have a threadpool, rather than a thread
83
+ // per follower DB instance
84
+ catch_up_thread_.reset(
85
+ new port::Thread(&DBImplFollower::PeriodicRefresh, this));
86
+ }
87
+
88
+ return s;
89
+ }
90
+
91
+ // Try to catch up by tailing the MANIFEST.
92
+ // TODO:
93
+ // 1. Cleanup obsolete files afterward
94
+ // 2. Add some error notifications and statistics
95
+ Status DBImplFollower::TryCatchUpWithLeader() {
96
+ assert(versions_.get() != nullptr);
97
+ assert(manifest_reader_.get() != nullptr);
98
+ Status s;
99
+
100
+ TEST_SYNC_POINT("DBImplFollower::TryCatchupWithLeader:Begin1");
101
+ TEST_SYNC_POINT("DBImplFollower::TryCatchupWithLeader:Begin2");
102
+ // read the manifest and apply new changes to the follower instance
103
+ std::unordered_set<ColumnFamilyData*> cfds_changed;
104
+ JobContext job_context(0, true /*create_superversion*/);
105
+ {
106
+ InstrumentedMutexLock lock_guard(&mutex_);
107
+ std::vector<std::string> files_to_delete;
108
+ s = static_cast_with_check<ReactiveVersionSet>(versions_.get())
109
+ ->ReadAndApply(&mutex_, &manifest_reader_,
110
+ manifest_reader_status_.get(), &cfds_changed,
111
+ &files_to_delete);
112
+ ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem_);
113
+ pending_outputs_inserted_elem_.reset(new std::list<uint64_t>::iterator(
114
+ CaptureCurrentFileNumberInPendingOutputs()));
115
+
116
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "Last sequence is %" PRIu64,
117
+ static_cast<uint64_t>(versions_->LastSequence()));
118
+ ROCKS_LOG_INFO(
119
+ immutable_db_options_.info_log, "Next file number is %" PRIu64,
120
+ static_cast<uint64_t>(versions_->current_next_file_number()));
121
+ for (ColumnFamilyData* cfd : cfds_changed) {
122
+ if (cfd->IsDropped()) {
123
+ ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] is dropped\n",
124
+ cfd->GetName().c_str());
125
+ continue;
126
+ }
127
+ VersionStorageInfo::LevelSummaryStorage tmp;
128
+ ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
129
+ "[%s] Level summary: %s\n", cfd->GetName().c_str(),
130
+ cfd->current()->storage_info()->LevelSummary(&tmp));
131
+ }
132
+
133
+ if (s.ok()) {
134
+ for (auto cfd : cfds_changed) {
135
+ if (cfd->mem()->GetEarliestSequenceNumber() <
136
+ versions_->LastSequence()) {
137
+ // Construct a new memtable with earliest sequence number set to the
138
+ // last sequence number in the VersionSet. This matters when
139
+ // DBImpl::MultiCFSnapshot tries to get consistent references
140
+ // to super versions in a lock free manner, it checks the earliest
141
+ // sequence number to detect if there was a change in version in
142
+ // the meantime.
143
+ const MutableCFOptions mutable_cf_options =
144
+ *cfd->GetLatestMutableCFOptions();
145
+ MemTable* new_mem = cfd->ConstructNewMemtable(
146
+ mutable_cf_options, versions_->LastSequence());
147
+ cfd->mem()->SetNextLogNumber(cfd->GetLogNumber());
148
+ cfd->mem()->ConstructFragmentedRangeTombstones();
149
+ cfd->imm()->Add(cfd->mem(), &job_context.memtables_to_free);
150
+ new_mem->Ref();
151
+ cfd->SetMemtable(new_mem);
152
+ }
153
+
154
+ // This will check if the old memtable is still referenced
155
+ cfd->imm()->RemoveOldMemTables(cfd->GetLogNumber(),
156
+ &job_context.memtables_to_free);
157
+ auto& sv_context = job_context.superversion_contexts.back();
158
+ cfd->InstallSuperVersion(&sv_context, &mutex_);
159
+ sv_context.NewSuperVersion();
160
+ }
161
+ }
162
+
163
+ for (auto& file : files_to_delete) {
164
+ IOStatus io_s = fs_->DeleteFile(file, IOOptions(), nullptr);
165
+ if (!io_s.ok()) {
166
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
167
+ "Cannot delete file %s: %s", file.c_str(),
168
+ io_s.ToString().c_str());
169
+ }
170
+ }
171
+ }
172
+ job_context.Clean();
173
+
174
+ // Cleanup unused, obsolete files.
175
+ JobContext purge_files_job_context(0);
176
+ {
177
+ InstrumentedMutexLock lock_guard(&mutex_);
178
+ // Currently, follower instance does not create any database files, thus
179
+ // is unnecessary for the follower to force full scan.
180
+ FindObsoleteFiles(&purge_files_job_context, /*force=*/false);
181
+ }
182
+ if (purge_files_job_context.HaveSomethingToDelete()) {
183
+ PurgeObsoleteFiles(purge_files_job_context);
184
+ }
185
+ purge_files_job_context.Clean();
186
+
187
+ TEST_SYNC_POINT("DBImplFollower::TryCatchupWithLeader:End");
188
+
189
+ return s;
190
+ }
191
+
192
+ void DBImplFollower::PeriodicRefresh() {
193
+ while (!stop_requested_.load()) {
194
+ MutexLock l(&mu_);
195
+ int64_t wait_until =
196
+ immutable_db_options_.clock->NowMicros() +
197
+ immutable_db_options_.follower_refresh_catchup_period_ms * 1000;
198
+ immutable_db_options_.clock->TimedWait(
199
+ &cv_, std::chrono::microseconds(wait_until));
200
+ if (stop_requested_.load()) {
201
+ break;
202
+ }
203
+ Status s;
204
+ for (uint64_t i = 0;
205
+ i < immutable_db_options_.follower_catchup_retry_count &&
206
+ !stop_requested_.load();
207
+ ++i) {
208
+ s = TryCatchUpWithLeader();
209
+
210
+ if (s.ok()) {
211
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
212
+ "Successful catch up on attempt %llu",
213
+ static_cast<unsigned long long>(i));
214
+ break;
215
+ }
216
+ wait_until = immutable_db_options_.clock->NowMicros() +
217
+ immutable_db_options_.follower_catchup_retry_wait_ms * 1000;
218
+ immutable_db_options_.clock->TimedWait(
219
+ &cv_, std::chrono::microseconds(wait_until));
220
+ }
221
+ if (!s.ok()) {
222
+ ROCKS_LOG_INFO(immutable_db_options_.info_log, "Catch up unsuccessful");
223
+ }
224
+ }
225
+ }
226
+
227
+ Status DBImplFollower::Close() {
228
+ if (catch_up_thread_) {
229
+ stop_requested_.store(true);
230
+ {
231
+ MutexLock l(&mu_);
232
+ cv_.SignalAll();
233
+ }
234
+ catch_up_thread_->join();
235
+ catch_up_thread_.reset();
236
+ }
237
+
238
+ ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem_);
239
+
240
+ return DBImpl::Close();
241
+ }
242
+
243
+ Status DB::OpenAsFollower(const Options& options, const std::string& dbname,
244
+ const std::string& leader_path,
245
+ std::unique_ptr<DB>* dbptr) {
246
+ dbptr->reset();
247
+
248
+ DBOptions db_options(options);
249
+ ColumnFamilyOptions cf_options(options);
250
+ std::vector<ColumnFamilyDescriptor> column_families;
251
+ column_families.emplace_back(kDefaultColumnFamilyName, cf_options);
252
+ std::vector<ColumnFamilyHandle*> handles;
253
+
254
+ Status s = DB::OpenAsFollower(db_options, dbname, leader_path,
255
+ column_families, &handles, dbptr);
256
+ if (s.ok()) {
257
+ assert(handles.size() == 1);
258
+ delete handles[0];
259
+ }
260
+ return s;
261
+ }
262
+
263
+ Status DB::OpenAsFollower(
264
+ const DBOptions& db_options, const std::string& dbname,
265
+ const std::string& src_path,
266
+ const std::vector<ColumnFamilyDescriptor>& column_families,
267
+ std::vector<ColumnFamilyHandle*>* handles, std::unique_ptr<DB>* dbptr) {
268
+ dbptr->reset();
269
+
270
+ FileSystem* fs = db_options.env->GetFileSystem().get();
271
+ {
272
+ IOStatus io_s;
273
+ if (db_options.create_if_missing) {
274
+ io_s = fs->CreateDirIfMissing(dbname, IOOptions(), nullptr);
275
+ } else {
276
+ io_s = fs->FileExists(dbname, IOOptions(), nullptr);
277
+ }
278
+ if (!io_s.ok()) {
279
+ return static_cast<Status>(io_s);
280
+ }
281
+ }
282
+ std::unique_ptr<Env> new_env(new CompositeEnvWrapper(
283
+ db_options.env, NewOnDemandFileSystem(db_options.env->GetFileSystem(),
284
+ src_path, dbname)));
285
+
286
+ DBOptions tmp_opts(db_options);
287
+ Status s;
288
+ tmp_opts.env = new_env.get();
289
+ if (nullptr == tmp_opts.info_log) {
290
+ s = CreateLoggerFromOptions(dbname, tmp_opts, &tmp_opts.info_log);
291
+ if (!s.ok()) {
292
+ tmp_opts.info_log = nullptr;
293
+ return s;
294
+ }
295
+ }
296
+
297
+ handles->clear();
298
+ DBImplFollower* impl =
299
+ new DBImplFollower(tmp_opts, std::move(new_env), dbname, src_path);
300
+ impl->versions_.reset(new ReactiveVersionSet(
301
+ dbname, &impl->immutable_db_options_, impl->file_options_,
302
+ impl->table_cache_.get(), impl->write_buffer_manager_,
303
+ &impl->write_controller_, impl->io_tracer_));
304
+ impl->column_family_memtables_.reset(
305
+ new ColumnFamilyMemTablesImpl(impl->versions_->GetColumnFamilySet()));
306
+ impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
307
+
308
+ impl->mutex_.Lock();
309
+ s = impl->Recover(column_families, /*read_only=*/true,
310
+ /*error_if_wal_file_exists=*/false,
311
+ /*error_if_data_exists_in_wals=*/false);
312
+ if (s.ok()) {
313
+ for (const auto& cf : column_families) {
314
+ auto cfd =
315
+ impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
316
+ if (nullptr == cfd) {
317
+ s = Status::InvalidArgument("Column family not found", cf.name);
318
+ break;
319
+ }
320
+ handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
321
+ }
322
+ }
323
+ SuperVersionContext sv_context(false /* create_superversion */);
324
+ if (s.ok()) {
325
+ for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
326
+ sv_context.NewSuperVersion();
327
+ cfd->InstallSuperVersion(&sv_context, &impl->mutex_);
328
+ }
329
+ }
330
+ impl->mutex_.Unlock();
331
+ sv_context.Clean();
332
+ if (s.ok()) {
333
+ dbptr->reset(impl);
334
+ for (auto h : *handles) {
335
+ impl->NewThreadStatusCfInfo(
336
+ static_cast_with_check<ColumnFamilyHandleImpl>(h)->cfd());
337
+ }
338
+ } else {
339
+ for (auto h : *handles) {
340
+ delete h;
341
+ }
342
+ handles->clear();
343
+ delete impl;
344
+ }
345
+ return s;
346
+ }
347
+
348
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,54 @@
1
+ // Copyright (c) 2024-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #pragma once
7
+
8
+ #include <string>
9
+ #include <vector>
10
+
11
+ #include "db/db_impl/db_impl.h"
12
+ #include "db/db_impl/db_impl_secondary.h"
13
+ #include "logging/logging.h"
14
+ #include "port/port.h"
15
+
16
+ namespace ROCKSDB_NAMESPACE {
17
+
18
+ class DBImplFollower : public DBImplSecondary {
19
+ public:
20
+ DBImplFollower(const DBOptions& db_options, std::unique_ptr<Env>&& env,
21
+ const std::string& dbname, std::string src_path);
22
+ ~DBImplFollower();
23
+
24
+ Status Close() override;
25
+
26
+ protected:
27
+ bool OwnTablesAndLogs() const override {
28
+ // TODO: Change this to true once we've properly implemented file
29
+ // deletion for the read scaling case
30
+ return true;
31
+ }
32
+
33
+ Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
34
+ bool /*readonly*/, bool /*error_if_wal_file_exists*/,
35
+ bool /*error_if_data_exists_in_wals*/,
36
+ bool /*is_retry*/ = false, uint64_t* = nullptr,
37
+ RecoveryContext* /*recovery_ctx*/ = nullptr,
38
+ bool* /*can_retry*/ = nullptr) override;
39
+
40
+ private:
41
+ friend class DB;
42
+
43
+ Status TryCatchUpWithLeader();
44
+ void PeriodicRefresh();
45
+
46
+ std::unique_ptr<Env> env_guard_;
47
+ std::unique_ptr<port::Thread> catch_up_thread_;
48
+ std::atomic<bool> stop_requested_;
49
+ std::string src_path_;
50
+ port::Mutex mu_;
51
+ port::CondVar cv_;
52
+ std::unique_ptr<std::list<uint64_t>::iterator> pending_outputs_inserted_elem_;
53
+ };
54
+ } // namespace ROCKSDB_NAMESPACE