rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -20,66 +20,19 @@
20
20
 
21
21
  #include <string>
22
22
 
23
- #include "rocksdb/cleanable.h"
24
- #include "rocksdb/slice.h"
25
- #include "rocksdb/status.h"
23
+ #include "rocksdb/iterator_base.h"
26
24
  #include "rocksdb/wide_columns.h"
27
25
 
28
26
  namespace ROCKSDB_NAMESPACE {
29
27
 
30
- class Iterator : public Cleanable {
28
+ class Iterator : public IteratorBase {
31
29
  public:
32
30
  Iterator() {}
33
31
  // No copying allowed
34
32
  Iterator(const Iterator&) = delete;
35
33
  void operator=(const Iterator&) = delete;
36
34
 
37
- virtual ~Iterator() {}
38
-
39
- // An iterator is either positioned at a key/value pair, or
40
- // not valid. This method returns true iff the iterator is valid.
41
- // Always returns false if !status().ok().
42
- virtual bool Valid() const = 0;
43
-
44
- // Position at the first key in the source. The iterator is Valid()
45
- // after this call iff the source is not empty.
46
- virtual void SeekToFirst() = 0;
47
-
48
- // Position at the last key in the source. The iterator is
49
- // Valid() after this call iff the source is not empty.
50
- virtual void SeekToLast() = 0;
51
-
52
- // Position at the first key in the source that at or past target.
53
- // The iterator is Valid() after this call iff the source contains
54
- // an entry that comes at or past target.
55
- // All Seek*() methods clear any error status() that the iterator had prior to
56
- // the call; after the seek, status() indicates only the error (if any) that
57
- // happened during the seek, not any past errors.
58
- // Target does not contain timestamp.
59
- virtual void Seek(const Slice& target) = 0;
60
-
61
- // Position at the last key in the source that at or before target.
62
- // The iterator is Valid() after this call iff the source contains
63
- // an entry that comes at or before target.
64
- // Target does not contain timestamp.
65
- virtual void SeekForPrev(const Slice& target) = 0;
66
-
67
- // Moves to the next entry in the source. After this call, Valid() is
68
- // true iff the iterator was not positioned at the last entry in the source.
69
- // REQUIRES: Valid()
70
- virtual void Next() = 0;
71
-
72
- // Moves to the previous entry in the source. After this call, Valid() is
73
- // true iff the iterator was not positioned at the first entry in source.
74
- // REQUIRES: Valid()
75
- virtual void Prev() = 0;
76
-
77
- // Return the key for the current entry. The underlying storage for
78
- // the returned slice is valid only until the next modification of the
79
- // iterator (i.e. the next SeekToFirst/SeekToLast/Seek/SeekForPrev/Next/Prev
80
- // operation).
81
- // REQUIRES: Valid()
82
- virtual Slice key() const = 0;
35
+ virtual ~Iterator() override {}
83
36
 
84
37
  // Return the value for the current entry. If the entry is a plain key-value,
85
38
  // return the value as-is; if it is a wide-column entity, return the value of
@@ -102,27 +55,6 @@ class Iterator : public Cleanable {
102
55
  return kNoWideColumns;
103
56
  }
104
57
 
105
- // If an error has occurred, return it. Else return an ok status.
106
- // If non-blocking IO is requested and this operation cannot be
107
- // satisfied without doing some IO, then this returns Status::Incomplete().
108
- virtual Status status() const = 0;
109
-
110
- // If supported, the DB state that the iterator reads from is updated to
111
- // the latest state. The iterator will be invalidated after the call.
112
- // Regardless of whether the iterator was created/refreshed previously
113
- // with or without a snapshot, the iterator will be reading the
114
- // latest DB state after this call.
115
- // Note that you will need to call a Seek*() function to get the iterator
116
- // back into a valid state before calling a function that assumes the
117
- // state is already valid, like Next().
118
- virtual Status Refresh() { return Refresh(nullptr); }
119
-
120
- // Similar to Refresh() but the iterator will be reading the latest DB state
121
- // under the given snapshot.
122
- virtual Status Refresh(const class Snapshot*) {
123
- return Status::NotSupported("Refresh() is not supported");
124
- }
125
-
126
58
  // Property "rocksdb.iterator.is-key-pinned":
127
59
  // If returning "1", this means that the Slice returned by key() is valid
128
60
  // as long as the iterator is not deleted.
@@ -130,6 +62,12 @@ class Iterator : public Cleanable {
130
62
  // - Iterator created with ReadOptions::pin_data = true
131
63
  // - DB tables were created with
132
64
  // BlockBasedTableOptions::use_delta_encoding = false.
65
+ // Property "rocksdb.iterator.is-value-pinned":
66
+ // If returning "1", this means that the Slice returned by value() is valid
67
+ // as long as the iterator is not deleted.
68
+ // It is guaranteed to always return "1" if
69
+ // - Iterator created with ReadOptions::pin_data = true
70
+ // - The value is found in a `kTypeValue` record
133
71
  // Property "rocksdb.iterator.super-version-number":
134
72
  // LSM version used by the iterator. The same format as DB Property
135
73
  // kCurrentSuperVersionNumber. See its comment for more information.
@@ -0,0 +1,90 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #pragma once
7
+
8
+ #include "rocksdb/cleanable.h"
9
+ #include "rocksdb/slice.h"
10
+ #include "rocksdb/status.h"
11
+
12
+ namespace ROCKSDB_NAMESPACE {
13
+
14
+ class IteratorBase : public Cleanable {
15
+ public:
16
+ IteratorBase() {}
17
+ // No copying allowed
18
+ IteratorBase(const IteratorBase&) = delete;
19
+ void operator=(const IteratorBase&) = delete;
20
+
21
+ virtual ~IteratorBase() {}
22
+
23
+ // An iterator is either positioned at a key/value pair, or
24
+ // not valid. This method returns true iff the iterator is valid.
25
+ // Always returns false if !status().ok().
26
+ virtual bool Valid() const = 0;
27
+
28
+ // Position at the first key in the source. The iterator is Valid()
29
+ // after this call iff the source is not empty.
30
+ virtual void SeekToFirst() = 0;
31
+
32
+ // Position at the last key in the source. The iterator is
33
+ // Valid() after this call iff the source is not empty.
34
+ virtual void SeekToLast() = 0;
35
+
36
+ // Position at the first key in the source that at or past target.
37
+ // The iterator is Valid() after this call iff the source contains
38
+ // an entry that comes at or past target.
39
+ // All Seek*() methods clear any error status() that the iterator had prior to
40
+ // the call; after the seek, status() indicates only the error (if any) that
41
+ // happened during the seek, not any past errors.
42
+ // Target does not contain timestamp.
43
+ virtual void Seek(const Slice& target) = 0;
44
+
45
+ // Position at the last key in the source that at or before target.
46
+ // The iterator is Valid() after this call iff the source contains
47
+ // an entry that comes at or before target.
48
+ // Target does not contain timestamp.
49
+ virtual void SeekForPrev(const Slice& target) = 0;
50
+
51
+ // Moves to the next entry in the source. After this call, Valid() is
52
+ // true iff the iterator was not positioned at the last entry in the source.
53
+ // REQUIRES: Valid()
54
+ virtual void Next() = 0;
55
+
56
+ // Moves to the previous entry in the source. After this call, Valid() is
57
+ // true iff the iterator was not positioned at the first entry in source.
58
+ // REQUIRES: Valid()
59
+ virtual void Prev() = 0;
60
+
61
+ // If supported, the DB state that the iterator reads from is updated to
62
+ // the latest state. The iterator will be invalidated after the call.
63
+ // Regardless of whether the iterator was created/refreshed previously
64
+ // with or without a snapshot, the iterator will be reading the
65
+ // latest DB state after this call.
66
+ // Note that you will need to call a Seek*() function to get the iterator
67
+ // back into a valid state before calling a function that assumes the
68
+ // state is already valid, like Next().
69
+ virtual Status Refresh() { return Refresh(nullptr); }
70
+
71
+ // Similar to Refresh() but the iterator will be reading the latest DB state
72
+ // under the given snapshot.
73
+ virtual Status Refresh(const class Snapshot*) {
74
+ return Status::NotSupported("Refresh() is not supported");
75
+ }
76
+
77
+ // Return the key for the current entry. The underlying storage for
78
+ // the returned slice is valid only until the next modification of the
79
+ // iterator (i.e. the next SeekToFirst/SeekToLast/Seek/SeekForPrev/Next/Prev
80
+ // operation).
81
+ // REQUIRES: Valid()
82
+ virtual Slice key() const = 0;
83
+
84
+ // If an error has occurred, return it. Else return an ok status.
85
+ // If non-blocking IO is requested and this operation cannot be
86
+ // satisfied without doing some IO, then this returns Status::Incomplete().
87
+ virtual Status status() const = 0;
88
+ };
89
+
90
+ } // namespace ROCKSDB_NAMESPACE
@@ -328,6 +328,15 @@ struct BlobFileGarbageInfo : public BlobFileInfo {
328
328
  uint64_t garbage_blob_bytes;
329
329
  };
330
330
 
331
+ struct ManualFlushInfo {
332
+ // the id of the column family
333
+ uint32_t cf_id;
334
+ // the name of the column family
335
+ std::string cf_name;
336
+ // Reason that triggered this manual flush
337
+ FlushReason flush_reason;
338
+ };
339
+
331
340
  struct FlushJobInfo {
332
341
  // the id of the column family
333
342
  uint32_t cf_id;
@@ -492,6 +501,10 @@ struct MemTableInfo {
492
501
  uint64_t num_entries;
493
502
  // Total number of deletes in memtable
494
503
  uint64_t num_deletes;
504
+
505
+ // The newest user-defined timestamps in the memtable. Note this field is
506
+ // only populated when `persist_user_defined_timestamps` is false.
507
+ std::string newest_udt;
495
508
  };
496
509
 
497
510
  struct ExternalFileIngestionInfo {
@@ -595,6 +608,14 @@ class EventListener : public Customizable {
595
608
  virtual void OnFlushBegin(DB* /*db*/,
596
609
  const FlushJobInfo& /*flush_job_info*/) {}
597
610
 
611
+ // A callback function to RocksDB which will be called after a manual flush
612
+ // is scheduled. The default implementation is no-op.
613
+ // The size of the `manual_flush_info` vector should only be bigger than 1 if
614
+ // the DB enables atomic flush and has more than 1 column families. Its size
615
+ // should be 1 in all other cases.
616
+ virtual void OnManualFlushScheduled(
617
+ DB* /*db*/, const std::vector<ManualFlushInfo>& /*manual_flush_info*/) {}
618
+
598
619
  // A callback function for RocksDB which will be called whenever
599
620
  // a SST file is deleted. Different from OnCompactionCompleted and
600
621
  // OnFlushCompleted, this callback is designed for external logging
@@ -234,6 +234,12 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
234
234
  // Number of files to trigger level-0 compaction. A value <0 means that
235
235
  // level-0 compaction will not be triggered by number of files at all.
236
236
  //
237
+ // Universal compaction: RocksDB will try to keep the number of sorted runs
238
+ // no more than this number. If CompactionOptionsUniversal::max_read_amp is
239
+ // set, then this option will be used only as a trigger to look for
240
+ // compaction. CompactionOptionsUniversal::max_read_amp will be the limit
241
+ // on the number of sorted runs.
242
+ //
237
243
  // Default: 4
238
244
  //
239
245
  // Dynamically changeable through SetOptions() API
@@ -344,6 +350,48 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
344
350
  // Dynamically changeable through SetOptions() API
345
351
  uint32_t memtable_max_range_deletions = 0;
346
352
 
353
+ // EXPERIMENTAL
354
+ // When > 0, RocksDB attempts to erase some block cache entries for files
355
+ // that have become obsolete, which means they are about to be deleted.
356
+ // To avoid excessive tracking, this "uncaching" process is iterative and
357
+ // speculative, meaning it could incur extra background CPU effort if the
358
+ // file's blocks are generally not cached. A larger number indicates more
359
+ // willingness to spend CPU time to maximize block cache hit rates by
360
+ // erasing known-obsolete entries.
361
+ //
362
+ // When uncache_aggressiveness=1, block cache entries for an obsolete file
363
+ // are only erased until any attempted erase operation fails because the
364
+ // block is not cached. Then no further attempts are made to erase cached
365
+ // blocks for that file.
366
+ //
367
+ // For larger values, erasure is attempted until evidence incidates that the
368
+ // chance of success is < 0.99^(a-1), where a = uncache_aggressiveness. For
369
+ // example:
370
+ // 2 -> Attempt only while expecting >= 99% successful/useful erasure
371
+ // 11 -> 90%
372
+ // 69 -> 50%
373
+ // 110 -> 33%
374
+ // 230 -> 10%
375
+ // 460 -> 1%
376
+ // 690 -> 0.1%
377
+ // 1000 -> 1 in 23000
378
+ // 10000 -> Always (for all practical purposes)
379
+ // NOTE: UINT32_MAX and nearby values could take additional special meanings
380
+ // in the future.
381
+ //
382
+ // Pinned cache entries (guaranteed present) are always erased if
383
+ // uncache_aggressiveness > 0, but are not used in predicting the chances of
384
+ // successful erasure of non-pinned entries.
385
+ //
386
+ // NOTE: In the case of copied DBs (such as Checkpoints) sharing a block
387
+ // cache, it is possible that a file becoming obsolete doesn't mean its
388
+ // block cache entries (shared among copies) are obsolete. Such a scenerio
389
+ // is the best case for uncache_aggressiveness = 0.
390
+ //
391
+ // Once validated in production, the default will likely change to something
392
+ // around 300.
393
+ uint32_t uncache_aggressiveness = 0;
394
+
347
395
  // Create ColumnFamilyOptions with default values for all fields
348
396
  ColumnFamilyOptions();
349
397
  // Create ColumnFamilyOptions from Options
@@ -597,9 +645,7 @@ struct DBOptions {
597
645
  bool verify_sst_unique_id_in_manifest = true;
598
646
 
599
647
  // Use the specified object to interact with the environment,
600
- // e.g. to read/write files, schedule background work, etc. In the near
601
- // future, support for doing storage operations such as read/write files
602
- // through env will be deprecated in favor of file_system (see below)
648
+ // e.g. to read/write files, schedule background work, etc.
603
649
  // Default: Env::Default()
604
650
  Env* env = Env::Default();
605
651
 
@@ -643,11 +689,10 @@ struct DBOptions {
643
689
  // Default: nullptr
644
690
  std::shared_ptr<Logger> info_log = nullptr;
645
691
 
646
- #ifdef NDEBUG
647
- InfoLogLevel info_log_level = INFO_LEVEL;
648
- #else
649
- InfoLogLevel info_log_level = DEBUG_LEVEL;
650
- #endif // NDEBUG
692
+ // Minimum level for sending log messages to info_log. The default is
693
+ // INFO_LEVEL when RocksDB is compiled in release mode, and DEBUG_LEVEL
694
+ // when it is compiled in debug mode.
695
+ InfoLogLevel info_log_level = Logger::kDefaultLogLevel;
651
696
 
652
697
  // Number of open files that can be used by the DB. You may need to
653
698
  // increase this if your database has a large working set. Value -1 means
@@ -1053,6 +1098,9 @@ struct DBOptions {
1053
1098
  uint64_t bytes_per_sync = 0;
1054
1099
 
1055
1100
  // Same as bytes_per_sync, but applies to WAL files
1101
+ // This does not gaurantee the WALs are synced in the order of creation. New
1102
+ // WAL can be synced while an older WAL doesn't. Therefore upon system crash,
1103
+ // this hole in the WAL data can create partial data loss.
1056
1104
  //
1057
1105
  // Default: 0, turned off
1058
1106
  //
@@ -1220,6 +1268,8 @@ struct DBOptions {
1220
1268
  bool allow_2pc = false;
1221
1269
 
1222
1270
  // A global cache for table-level rows.
1271
+ // Used to speed up Get() queries.
1272
+ // NOTE: does not work with DeleteRange() yet.
1223
1273
  // Default: nullptr (disabled)
1224
1274
  std::shared_ptr<RowCache> row_cache = nullptr;
1225
1275
 
@@ -1295,6 +1345,15 @@ struct DBOptions {
1295
1345
  // the WAL is read.
1296
1346
  CompressionType wal_compression = kNoCompression;
1297
1347
 
1348
+ // Set to true to re-instate an old behavior of keeping complete, synced WAL
1349
+ // files open for write until they are collected for deletion by a
1350
+ // background thread. This should not be needed unless there is a
1351
+ // performance issue with file Close(), but setting it to true means that
1352
+ // Checkpoint might call LinkFile on a WAL still open for write, which might
1353
+ // be unsupported on some FileSystem implementations. As this is intended as
1354
+ // a temporary kill switch, it is already DEPRECATED.
1355
+ bool background_close_inactive_wals = false;
1356
+
1298
1357
  // If true, RocksDB supports flushing multiple column families and committing
1299
1358
  // their results atomically to MANIFEST. Note that it is not
1300
1359
  // necessary to set atomic_flush to true if WAL is always enabled since WAL
@@ -1484,6 +1543,30 @@ struct DBOptions {
1484
1543
  // use "0:00-23:59". To make an entire day have no offpeak period, leave
1485
1544
  // this field blank. Default: Empty string (no offpeak).
1486
1545
  std::string daily_offpeak_time_utc = "";
1546
+
1547
+ // EXPERIMENTAL
1548
+
1549
+ // When a RocksDB database is opened in follower mode, this option
1550
+ // is set by the user to request the frequency of the follower
1551
+ // attempting to refresh its view of the leader. RocksDB may choose to
1552
+ // trigger catch ups more frequently if it detects any changes in the
1553
+ // database state.
1554
+ // Default every 10s.
1555
+ uint64_t follower_refresh_catchup_period_ms = 10000;
1556
+
1557
+ // For a given catch up attempt, this option specifies the number of times
1558
+ // to tail the MANIFEST and try to install a new, consistent version before
1559
+ // giving up. Though it should be extremely rare, the catch up may fail if
1560
+ // the leader is mutating the LSM at a very high rate and the follower is
1561
+ // unable to get a consistent view.
1562
+ // Default to 10 attempts
1563
+ uint64_t follower_catchup_retry_count = 10;
1564
+
1565
+ // Time to wait between consecutive catch up attempts
1566
+ // Default 100ms
1567
+ uint64_t follower_catchup_retry_wait_ms = 100;
1568
+
1569
+ // End EXPERIMENTAL
1487
1570
  };
1488
1571
 
1489
1572
  // Options to control the behavior of a database (passed to DB::Open)
@@ -1908,20 +1991,29 @@ Status CreateLoggerFromOptions(const std::string& dbname,
1908
1991
 
1909
1992
  // CompactionOptions are used in CompactFiles() call.
1910
1993
  struct CompactionOptions {
1994
+ // DEPRECATED: this option is unsafe because it allows the user to set any
1995
+ // `CompressionType` while always using `CompressionOptions` from the
1996
+ // `ColumnFamilyOptions`. As a result the `CompressionType` and
1997
+ // `CompressionOptions` can easily be inconsistent.
1998
+ //
1911
1999
  // Compaction output compression type
1912
- // Default: snappy
2000
+ //
2001
+ // Default: `kDisableCompressionOption`
2002
+ //
1913
2003
  // If set to `kDisableCompressionOption`, RocksDB will choose compression type
1914
- // according to the `ColumnFamilyOptions`, taking into account the output
1915
- // level if `compression_per_level` is specified.
2004
+ // according to the `ColumnFamilyOptions`. RocksDB takes into account the
2005
+ // output level in case the `ColumnFamilyOptions` has level-specific settings.
1916
2006
  CompressionType compression;
2007
+
1917
2008
  // Compaction will create files of size `output_file_size_limit`.
1918
2009
  // Default: MAX, which means that compaction will create a single file
1919
2010
  uint64_t output_file_size_limit;
2011
+
1920
2012
  // If > 0, it will replace the option in the DBOptions for this compaction.
1921
2013
  uint32_t max_subcompactions;
1922
2014
 
1923
2015
  CompactionOptions()
1924
- : compression(kSnappyCompression),
2016
+ : compression(kDisableCompressionOption),
1925
2017
  output_file_size_limit(std::numeric_limits<uint64_t>::max()),
1926
2018
  max_subcompactions(0) {}
1927
2019
  };
@@ -2087,6 +2179,24 @@ struct IngestExternalFileOptions {
2087
2179
  //
2088
2180
  // XXX: "bottommost" is obsolete/confusing terminology to refer to last level
2089
2181
  bool fail_if_not_bottommost_level = false;
2182
+ // EXPERIMENTAL
2183
+ // If set to true, ingestion will
2184
+ // - allow the files to not be generated by SstFileWriter, and
2185
+ // - ignore cf_id mismatch between cf_id in the files and the CF they are
2186
+ // being ingested into.
2187
+ //
2188
+ // REQUIRES:
2189
+ // - files to be ingested do not overlap with existing keys.
2190
+ // - write_global_seqno = false
2191
+ // - move_files = false
2192
+ //
2193
+ // Warning: This ONLY works for SST files where all keys have sequence number
2194
+ // zero and with no duplicated user keys (this should be guaranteed if the
2195
+ // file is generated by a DB with zero as the largest sequence number).
2196
+ // We scan the entire SST files to validate sequence numbers.
2197
+ // Warning: If a DB contains ingested files generated by another DB/CF,
2198
+ // RepairDB() may not correctly recover these files. It may lose these files.
2199
+ bool allow_db_generated_files = false;
2090
2200
  };
2091
2201
 
2092
2202
  enum TraceFilterType : uint64_t {
@@ -2209,6 +2319,9 @@ struct WaitForCompactOptions {
2209
2319
  // A boolean to flush all column families before starting to wait.
2210
2320
  bool flush = false;
2211
2321
 
2322
+ // A boolean to wait for purge to complete
2323
+ bool wait_for_purge = false;
2324
+
2212
2325
  // A boolean to call Close() after waiting is done. By the time Close() is
2213
2326
  // called here, there should be no background jobs in progress and no new
2214
2327
  // background jobs should be added. DB may not have been closed if Close()
@@ -280,7 +280,7 @@ struct PerfContextBase {
280
280
  struct PerfContext : public PerfContextBase {
281
281
  ~PerfContext();
282
282
 
283
- PerfContext() {}
283
+ PerfContext() { Reset(); }
284
284
 
285
285
  PerfContext(const PerfContext&);
286
286
  PerfContext& operator=(const PerfContext&);
@@ -24,11 +24,21 @@ class SstFileReader {
24
24
  // Prepares to read from the file located at "file_path".
25
25
  Status Open(const std::string& file_path);
26
26
 
27
- // Returns a new iterator over the table contents.
27
+ // Returns a new iterator over the table contents as a DB iterator, a.k.a
28
+ // a `DBIter` that iterates logically visible entries, for example, a delete
29
+ // entry is not logically visible.
28
30
  // Most read options provide the same control as we read from DB.
29
31
  // If "snapshot" is nullptr, the iterator returns only the latest keys.
30
32
  Iterator* NewIterator(const ReadOptions& options);
31
33
 
34
+ // Returns a new iterator over the table contents as a raw table iterator,
35
+ // a.k.a a `TableIterator`that iterates all point data entries in the table
36
+ // including logically invisible entries like delete entries.
37
+ // This API is intended to provide a programmatic way to observe SST files
38
+ // created by a DB, to be used by third party tools. DB optimization
39
+ // capabilities like filling cache, read ahead are disabled.
40
+ std::unique_ptr<Iterator> NewTableIterator();
41
+
32
42
  std::shared_ptr<const TableProperties> GetTableProperties() const;
33
43
 
34
44
  // Verifies whether there is corruption in this table.
@@ -426,12 +426,12 @@ struct BlockBasedTableOptions {
426
426
  // the block cache better at using space it is allowed. (These issues
427
427
  // should not arise with partitioned filters.)
428
428
  //
429
- // NOTE: Do not set to true if you do not trust malloc_usable_size. With
430
- // this option, RocksDB might access an allocated memory object beyond its
431
- // original size if malloc_usable_size says it is safe to do so. While this
432
- // can be considered bad practice, it should not produce undefined behavior
433
- // unless malloc_usable_size is buggy or broken.
434
- bool optimize_filters_for_memory = false;
429
+ // NOTE: Set to false if you do not trust malloc_usable_size. When set to
430
+ // true, RocksDB might access an allocated memory object beyond its original
431
+ // size if malloc_usable_size says it is safe to do so. While this can be
432
+ // considered bad practice, it should not produce undefined behavior unless
433
+ // malloc_usable_size is buggy or broken.
434
+ bool optimize_filters_for_memory = true;
435
435
 
436
436
  // Use delta encoding to compress keys in blocks.
437
437
  // ReadOptions::pin_data requires this option to be disabled.
@@ -125,6 +125,8 @@ class TablePropertiesCollector {
125
125
  // Finish() will be called when a table has already been built and is ready
126
126
  // for writing the properties block.
127
127
  // It will be called only once by RocksDB internal.
128
+ // When the returned Status is not OK, the collected properties will not be
129
+ // written to the file's property block.
128
130
  //
129
131
  // @params properties User will add their collected statistics to
130
132
  // `properties`.
@@ -157,8 +159,25 @@ class TablePropertiesCollectorFactory : public Customizable {
157
159
  // The level at creating the SST file (i.e, table), of which the
158
160
  // properties are being collected.
159
161
  int level_at_creation = kUnknownLevelAtCreation;
162
+ int num_levels = kUnknownNumLevels;
163
+ // In the tiering case, data with seqnos smaller than or equal to this
164
+ // cutoff sequence number will be considered by a compaction job as eligible
165
+ // to be placed on the last level. When this is the maximum sequence number,
166
+ // it indicates tiering is disabled.
167
+ SequenceNumber last_level_inclusive_max_seqno_threshold;
160
168
  static const uint32_t kUnknownColumnFamily;
161
169
  static const int kUnknownLevelAtCreation = -1;
170
+ static const int kUnknownNumLevels = -1;
171
+
172
+ Context() {}
173
+
174
+ Context(uint32_t _column_family_id, int _level_at_creation, int _num_levels,
175
+ SequenceNumber _last_level_inclusive_max_seqno_threshold)
176
+ : column_family_id(_column_family_id),
177
+ level_at_creation(_level_at_creation),
178
+ num_levels(_num_levels),
179
+ last_level_inclusive_max_seqno_threshold(
180
+ _last_level_inclusive_max_seqno_threshold) {}
162
181
  };
163
182
 
164
183
  ~TablePropertiesCollectorFactory() override {}
@@ -14,8 +14,10 @@
14
14
 
15
15
  namespace ROCKSDB_NAMESPACE {
16
16
 
17
- class LogFile;
18
- using VectorLogPtr = std::vector<std::unique_ptr<LogFile>>;
17
+ class WalFile;
18
+ using VectorWalPtr = std::vector<std::unique_ptr<WalFile>>;
19
+ // DEPRECATED old name
20
+ using VectorLogPtr = VectorWalPtr;
19
21
 
20
22
  enum WalFileType {
21
23
  /* Indicates that WAL file is in archive directory. WAL files are moved from
@@ -30,10 +32,10 @@ enum WalFileType {
30
32
  kAliveLogFile = 1
31
33
  };
32
34
 
33
- class LogFile {
35
+ class WalFile {
34
36
  public:
35
- LogFile() {}
36
- virtual ~LogFile() {}
37
+ WalFile() {}
38
+ virtual ~WalFile() {}
37
39
 
38
40
  // Returns log file's pathname relative to the main db dir
39
41
  // Eg. For a live-log-file = /000003.log
@@ -50,10 +52,14 @@ class LogFile {
50
52
  // Starting sequence number of writebatch written in this log file
51
53
  virtual SequenceNumber StartSequence() const = 0;
52
54
 
53
- // Size of log file on disk in Bytes
55
+ // The position of the last flushed write to the file (which for
56
+ // recycled WAL files is typically less than the full file size).
54
57
  virtual uint64_t SizeFileBytes() const = 0;
55
58
  };
56
59
 
60
+ // DEPRECATED old name for WalFile. (Confusing with "Logger" etc.)
61
+ using LogFile = WalFile;
62
+
57
63
  struct BatchResult {
58
64
  SequenceNumber sequence = 0;
59
65
  std::unique_ptr<WriteBatch> writeBatchPtr;
@@ -67,9 +67,21 @@ enum EntryType {
67
67
  kEntryBlobIndex,
68
68
  kEntryDeleteWithTimestamp,
69
69
  kEntryWideColumnEntity,
70
+ kEntryTimedPut, // That hasn't yet converted to a standard Put entry
70
71
  kEntryOther,
71
72
  };
72
73
 
74
+ // Structured user-oriented representation of an internal key. It includes user
75
+ // key, sequence number, and type.
76
+ // If user-defined timestamp is enabled, `timestamp` contains the user-defined
77
+ // timestamp, it's otherwise an empty Slice.
78
+ struct ParsedEntryInfo {
79
+ Slice user_key;
80
+ Slice timestamp;
81
+ SequenceNumber sequence;
82
+ EntryType type;
83
+ };
84
+
73
85
  enum class WriteStallCause {
74
86
  // Beginning of CF-scope write stall causes
75
87
  //
@@ -65,6 +65,36 @@ class CompactionOptionsUniversal {
65
65
  // Default: -1
66
66
  int compression_size_percent;
67
67
 
68
+ // The limit on the number of sorted runs. RocksDB will try to keep
69
+ // the number of sorted runs at most this number. While compactions are
70
+ // running, the number of sorted runs may be temporarily higher than
71
+ // this number.
72
+ //
73
+ // Since universal compaction checks if there is compaction to do when
74
+ // the number of sorted runs is at least level0_file_num_compaction_trigger,
75
+ // it is suggested to set level0_file_num_compaction_trigger to be no larger
76
+ // than max_read_amp.
77
+ //
78
+ // Values:
79
+ // -1: special flag to let RocksDB pick default. Currently,
80
+ // RocksDB will fall back to the behavior before this option is introduced,
81
+ // which is to use level0_file_num_compaction_trigger as the limit.
82
+ // This may change in the future to behave as 0 below.
83
+ // 0: Let RocksDB auto-tune. Currently, we determine the max number of
84
+ // sorted runs based on the current DB size, size_ratio and
85
+ // write_buffer_size. Note that this is only supported for the default
86
+ // stop_style kCompactionStopStyleTotalSize. For
87
+ // kCompactionStopStyleSimilarSize, this behaves as if -1 is configured.
88
+ // N > 0: limit the number of sorted runs to be at most N.
89
+ // N should be at least the compaction trigger specified by
90
+ // level0_file_num_compaction_trigger. If 0 < max_read_amp <
91
+ // level0_file_num_compaction_trigger, Status::NotSupported() will be
92
+ // returned during DB open.
93
+ // N < -1: Status::NotSupported() will be returned during DB open.
94
+ //
95
+ // Default: -1
96
+ int max_read_amp;
97
+
68
98
  // The algorithm used to stop picking files into a single compaction run
69
99
  // Default: kCompactionStopStyleTotalSize
70
100
  CompactionStopStyle stop_style;
@@ -88,6 +118,7 @@ class CompactionOptionsUniversal {
88
118
  max_merge_width(UINT_MAX),
89
119
  max_size_amplification_percent(200),
90
120
  compression_size_percent(-1),
121
+ max_read_amp(-1),
91
122
  stop_style(kCompactionStopStyleTotalSize),
92
123
  allow_trivial_move(false),
93
124
  incremental(false) {}