@nxtedition/rocksdb 13.5.7 → 13.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (509) hide show
  1. package/binding.cc +248 -70
  2. package/binding.gyp +2 -2
  3. package/deps/rocksdb/rocksdb/BUCK +12 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -0
  5. package/deps/rocksdb/rocksdb/Makefile +28 -23
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +0 -1
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +1 -2
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +43 -39
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +0 -1
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +2 -3
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +1 -3
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +11 -1
  15. package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +13 -5
  16. package/deps/rocksdb/rocksdb/crash_test.mk +61 -15
  17. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +136 -45
  18. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +34 -16
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +10 -7
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -2
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +1 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +12 -9
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +3 -4
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +2 -2
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +3 -4
  26. package/deps/rocksdb/rocksdb/db/builder.cc +22 -8
  27. package/deps/rocksdb/rocksdb/db/builder.h +5 -4
  28. package/deps/rocksdb/rocksdb/db/c.cc +556 -15
  29. package/deps/rocksdb/rocksdb/db/c_test.c +133 -12
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +114 -50
  31. package/deps/rocksdb/rocksdb/db/column_family.h +53 -36
  32. package/deps/rocksdb/rocksdb/db/column_family_test.cc +6 -6
  33. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +0 -1
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +95 -70
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +71 -51
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -86
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +26 -68
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +0 -122
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +453 -258
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +117 -92
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +0 -1
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +38 -38
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +24 -17
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +34 -45
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -31
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -3
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +1 -1
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +10 -10
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +82 -34
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +267 -179
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +4 -1
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +273 -89
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +300 -14
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +4 -4
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +2 -2
  58. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +28 -23
  59. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +69 -51
  60. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +522 -245
  61. package/deps/rocksdb/rocksdb/db/convenience.cc +15 -4
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -3
  63. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +0 -2
  64. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +196 -17
  65. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +74 -62
  66. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +48 -0
  67. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +682 -250
  68. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +0 -1
  69. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +3 -4
  70. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +11 -16
  71. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +57 -0
  72. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +2 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -1
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +540 -490
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +347 -188
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +584 -217
  77. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +13 -9
  78. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -7
  79. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +40 -36
  80. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -3
  81. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +751 -372
  82. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +35 -32
  83. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +24 -2
  84. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +125 -63
  85. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +2 -2
  86. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +311 -196
  87. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +15 -5
  88. package/deps/rocksdb/rocksdb/db/db_iter.cc +42 -29
  89. package/deps/rocksdb/rocksdb/db/db_iter.h +96 -31
  90. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +3 -4
  91. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +168 -228
  92. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +454 -0
  93. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +8 -8
  94. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +0 -1
  95. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +90 -0
  96. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +60 -2
  97. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +7 -3
  98. package/deps/rocksdb/rocksdb/db/db_options_test.cc +85 -27
  99. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -1
  100. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +0 -2
  101. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +114 -2
  102. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -1
  103. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +0 -1
  104. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +51 -3
  105. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +0 -1
  106. package/deps/rocksdb/rocksdb/db/db_test.cc +325 -18
  107. package/deps/rocksdb/rocksdb/db/db_test2.cc +644 -20
  108. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  109. package/deps/rocksdb/rocksdb/db/db_test_util.h +9 -0
  110. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +64 -45
  111. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +203 -14
  112. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +259 -30
  113. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +0 -1
  114. package/deps/rocksdb/rocksdb/db/db_write_test.cc +75 -1
  115. package/deps/rocksdb/rocksdb/db/dbformat.h +70 -6
  116. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +0 -190
  117. package/deps/rocksdb/rocksdb/db/error_handler.cc +22 -7
  118. package/deps/rocksdb/rocksdb/db/error_handler.h +16 -1
  119. package/deps/rocksdb/rocksdb/db/event_helpers.cc +41 -26
  120. package/deps/rocksdb/rocksdb/db/experimental.cc +4 -3
  121. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +464 -78
  122. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +166 -69
  123. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +54 -25
  124. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -3
  125. package/deps/rocksdb/rocksdb/db/flush_job.cc +98 -81
  126. package/deps/rocksdb/rocksdb/db/flush_job.h +4 -9
  127. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +80 -84
  128. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +1 -1
  129. package/deps/rocksdb/rocksdb/db/forward_iterator.h +2 -2
  130. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +12 -19
  131. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +0 -2
  132. package/deps/rocksdb/rocksdb/db/internal_stats.cc +41 -15
  133. package/deps/rocksdb/rocksdb/db/internal_stats.h +63 -52
  134. package/deps/rocksdb/rocksdb/db/job_context.h +59 -24
  135. package/deps/rocksdb/rocksdb/db/listener_test.cc +69 -10
  136. package/deps/rocksdb/rocksdb/db/log_format.h +11 -2
  137. package/deps/rocksdb/rocksdb/db/log_reader.cc +147 -34
  138. package/deps/rocksdb/rocksdb/db/log_reader.h +40 -11
  139. package/deps/rocksdb/rocksdb/db/log_test.cc +16 -3
  140. package/deps/rocksdb/rocksdb/db/log_writer.cc +102 -55
  141. package/deps/rocksdb/rocksdb/db/log_writer.h +21 -2
  142. package/deps/rocksdb/rocksdb/db/malloc_stats.h +0 -2
  143. package/deps/rocksdb/rocksdb/db/memtable.cc +16 -47
  144. package/deps/rocksdb/rocksdb/db/memtable.h +76 -12
  145. package/deps/rocksdb/rocksdb/db/memtable_list.cc +23 -20
  146. package/deps/rocksdb/rocksdb/db/memtable_list.h +9 -11
  147. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +18 -37
  148. package/deps/rocksdb/rocksdb/db/merge_context.h +2 -1
  149. package/deps/rocksdb/rocksdb/db/merge_test.cc +8 -0
  150. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +3 -5
  151. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +15 -7
  152. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +6 -3
  153. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +22 -4
  154. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +41 -1
  155. package/deps/rocksdb/rocksdb/db/prefix_test.cc +0 -1
  156. package/deps/rocksdb/rocksdb/db/repair.cc +29 -34
  157. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -1
  158. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +14 -15
  159. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +1 -3
  160. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +47 -1
  161. package/deps/rocksdb/rocksdb/db/table_cache.cc +3 -3
  162. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +1 -3
  163. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +2 -1
  164. package/deps/rocksdb/rocksdb/db/version_builder.cc +2 -2
  165. package/deps/rocksdb/rocksdb/db/version_edit.cc +8 -37
  166. package/deps/rocksdb/rocksdb/db/version_edit.h +32 -1
  167. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +26 -18
  168. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -5
  169. package/deps/rocksdb/rocksdb/db/version_set.cc +282 -197
  170. package/deps/rocksdb/rocksdb/db/version_set.h +54 -57
  171. package/deps/rocksdb/rocksdb/db/version_set_test.cc +28 -35
  172. package/deps/rocksdb/rocksdb/db/version_util.h +2 -3
  173. package/deps/rocksdb/rocksdb/db/wal_manager.cc +3 -2
  174. package/deps/rocksdb/rocksdb/db/wal_manager.h +0 -1
  175. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +0 -1
  176. package/deps/rocksdb/rocksdb/db/wide/wide_columns.cc +1 -0
  177. package/deps/rocksdb/rocksdb/db/write_batch.cc +22 -8
  178. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +5 -4
  179. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +7 -6
  180. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -4
  181. package/deps/rocksdb/rocksdb/db/write_thread.h +3 -3
  182. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +13 -5
  183. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +9 -2
  184. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +39 -0
  185. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +65 -0
  186. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +45 -22
  187. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +7 -4
  188. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +22 -5
  189. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +28 -3
  190. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -38
  191. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -3
  192. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +80 -32
  193. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +51 -2
  194. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +23 -1
  195. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +305 -15
  196. package/deps/rocksdb/rocksdb/env/env.cc +32 -2
  197. package/deps/rocksdb/rocksdb/env/env_encryption.cc +0 -2
  198. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +2 -4
  199. package/deps/rocksdb/rocksdb/env/env_posix.cc +4 -2
  200. package/deps/rocksdb/rocksdb/env/env_test.cc +0 -1
  201. package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -11
  202. package/deps/rocksdb/rocksdb/env/fs_readonly.h +0 -2
  203. package/deps/rocksdb/rocksdb/env/fs_remap.cc +0 -2
  204. package/deps/rocksdb/rocksdb/env/fs_remap.h +0 -2
  205. package/deps/rocksdb/rocksdb/env/io_posix.cc +6 -4
  206. package/deps/rocksdb/rocksdb/env/io_posix.h +3 -2
  207. package/deps/rocksdb/rocksdb/env/mock_env.cc +0 -1
  208. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +2 -2
  209. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +0 -2
  210. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +0 -2
  211. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +30 -21
  212. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +16 -0
  213. package/deps/rocksdb/rocksdb/file/file_util.cc +32 -14
  214. package/deps/rocksdb/rocksdb/file/file_util.h +22 -5
  215. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +229 -76
  216. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +21 -12
  217. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +10 -7
  218. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +12 -8
  219. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +1 -2
  220. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +0 -2
  221. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +3 -3
  222. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +598 -0
  223. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_iterator.h +36 -0
  224. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +70 -11
  225. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +232 -11
  226. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -1
  227. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -1
  228. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +149 -15
  229. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +17 -2
  230. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +132 -34
  231. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +158 -79
  232. package/deps/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h +2 -1
  233. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -5
  234. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +1 -3
  235. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
  236. package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +275 -0
  237. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +2 -1
  238. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +50 -5
  239. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +10 -0
  240. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +13 -0
  241. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +0 -1
  242. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +5 -2
  243. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +13 -0
  244. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +237 -0
  245. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +230 -39
  246. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +15 -0
  247. package/deps/rocksdb/rocksdb/include/rocksdb/perf_level.h +31 -11
  248. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +41 -0
  249. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +1 -1
  250. package/deps/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h +0 -1
  251. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +5 -1
  252. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +0 -1
  253. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -3
  254. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +2 -0
  255. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -8
  256. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +19 -2
  257. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -1
  258. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +124 -0
  259. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +1 -0
  260. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +26 -1
  261. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +55 -6
  262. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +3 -5
  263. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +0 -2
  264. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -2
  265. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +0 -1
  266. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +1 -2
  267. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +0 -1
  268. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +96 -8
  269. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index_faiss.h +117 -0
  270. package/deps/rocksdb/rocksdb/{utilities/secondary_index/faiss_ivf_index.h → include/rocksdb/utilities/secondary_index_simple.h} +11 -14
  271. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +26 -11
  272. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +16 -3
  273. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -2
  274. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +63 -7
  275. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +0 -1
  276. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +28 -12
  277. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +3 -3
  278. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +0 -2
  279. package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +1 -2
  280. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +1 -1
  281. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +0 -1
  282. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +0 -1
  283. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +3 -1
  284. package/deps/rocksdb/rocksdb/memtable/skiplist.h +2 -2
  285. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +2 -4
  286. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +69 -8
  287. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +32 -9
  288. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +58 -45
  289. package/deps/rocksdb/rocksdb/monitoring/histogram.h +1 -1
  290. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -3
  291. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +5 -0
  292. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +1 -1
  293. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +3 -2
  294. package/deps/rocksdb/rocksdb/options/cf_options.cc +44 -13
  295. package/deps/rocksdb/rocksdb/options/cf_options.h +21 -7
  296. package/deps/rocksdb/rocksdb/options/configurable.cc +5 -5
  297. package/deps/rocksdb/rocksdb/options/configurable_test.h +1 -2
  298. package/deps/rocksdb/rocksdb/options/customizable.cc +0 -1
  299. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -11
  300. package/deps/rocksdb/rocksdb/options/db_options.cc +18 -15
  301. package/deps/rocksdb/rocksdb/options/db_options.h +2 -2
  302. package/deps/rocksdb/rocksdb/options/options.cc +296 -305
  303. package/deps/rocksdb/rocksdb/options/options_helper.cc +188 -62
  304. package/deps/rocksdb/rocksdb/options/options_helper.h +3 -3
  305. package/deps/rocksdb/rocksdb/options/options_parser.cc +2 -4
  306. package/deps/rocksdb/rocksdb/options/options_parser.h +0 -1
  307. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +17 -4
  308. package/deps/rocksdb/rocksdb/options/options_test.cc +101 -76
  309. package/deps/rocksdb/rocksdb/port/lang.h +2 -1
  310. package/deps/rocksdb/rocksdb/port/port_posix.cc +2 -1
  311. package/deps/rocksdb/rocksdb/port/stack_trace.cc +5 -4
  312. package/deps/rocksdb/rocksdb/port/win/env_win.cc +3 -2
  313. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +99 -1
  314. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +6 -0
  315. package/deps/rocksdb/rocksdb/src.mk +17 -11
  316. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +0 -1
  317. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1094 -929
  318. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +6 -19
  319. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +76 -22
  320. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +2 -0
  321. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +221 -131
  322. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +12 -9
  323. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +23 -24
  324. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -38
  325. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +7 -4
  326. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +5 -5
  327. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +10 -12
  328. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -4
  329. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +35 -43
  330. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +2 -1
  331. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +1 -1
  332. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -2
  333. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -4
  334. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +0 -1
  335. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +3 -3
  336. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -3
  337. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -4
  338. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  339. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -5
  340. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +4 -4
  341. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +37 -35
  342. package/deps/rocksdb/rocksdb/table/block_fetcher.h +11 -7
  343. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +4 -3
  344. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +31 -5
  345. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +2 -1
  346. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +0 -1
  347. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +0 -1
  348. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +0 -1
  349. package/deps/rocksdb/rocksdb/table/external_table.cc +483 -0
  350. package/deps/rocksdb/rocksdb/table/format.cc +62 -44
  351. package/deps/rocksdb/rocksdb/table/format.h +35 -12
  352. package/deps/rocksdb/rocksdb/table/internal_iterator.h +3 -13
  353. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  354. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +6 -0
  355. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +150 -141
  356. package/deps/rocksdb/rocksdb/table/meta_blocks.h +5 -0
  357. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -2
  358. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +8 -0
  359. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +0 -1
  360. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +0 -2
  361. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +0 -2
  362. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +0 -1
  363. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +6 -6
  364. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
  365. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +86 -7
  366. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +88 -2
  367. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +0 -1
  368. package/deps/rocksdb/rocksdb/table/table_builder.h +10 -1
  369. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +3 -2
  370. package/deps/rocksdb/rocksdb/table/table_test.cc +899 -22
  371. package/deps/rocksdb/rocksdb/test_util/testutil.cc +3 -4
  372. package/deps/rocksdb/rocksdb/test_util/testutil.h +132 -1
  373. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +0 -1
  374. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.h +0 -2
  375. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +163 -77
  376. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +0 -2
  377. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +0 -1
  378. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +0 -1
  379. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +120 -52
  380. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -0
  381. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -1
  382. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +0 -2
  383. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +2 -2
  384. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +0 -2
  385. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +2 -1
  386. package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +94 -0
  387. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +0 -1
  388. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +0 -1
  389. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +1 -1
  390. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +2 -1
  391. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +3 -5
  392. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  393. package/deps/rocksdb/rocksdb/util/async_file_reader.h +15 -8
  394. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +131 -0
  395. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +90 -0
  396. package/deps/rocksdb/rocksdb/util/autovector.h +1 -1
  397. package/deps/rocksdb/rocksdb/util/autovector_test.cc +2 -2
  398. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +0 -2
  399. package/deps/rocksdb/rocksdb/util/compression.cc +936 -4
  400. package/deps/rocksdb/rocksdb/util/compression.h +348 -232
  401. package/deps/rocksdb/rocksdb/util/compression_test.cc +229 -0
  402. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +10 -10
  403. package/deps/rocksdb/rocksdb/util/crc32c_ppc.c +1 -0
  404. package/deps/rocksdb/rocksdb/util/data_structure.cc +2 -0
  405. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -3
  406. package/deps/rocksdb/rocksdb/util/ppc-opcode.h +5 -5
  407. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +108 -0
  408. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +67 -0
  409. package/deps/rocksdb/rocksdb/util/slice_test.cc +83 -0
  410. package/deps/rocksdb/rocksdb/util/string_util.cc +0 -2
  411. package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
  412. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
  413. package/deps/rocksdb/rocksdb/util/udt_util.cc +18 -5
  414. package/deps/rocksdb/rocksdb/util/udt_util.h +10 -7
  415. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +650 -154
  416. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +438 -144
  417. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +0 -1
  418. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +0 -1
  419. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +0 -1
  420. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +16 -17
  421. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +2 -1
  422. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +0 -1
  423. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +0 -1
  424. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +7 -8
  425. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +4 -3
  426. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h +0 -1
  427. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +0 -1
  428. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +2 -2
  429. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
  430. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +0 -48
  431. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -1
  432. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +0 -1
  433. package/deps/rocksdb/rocksdb/utilities/debug.cc +7 -14
  434. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +0 -1
  435. package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +0 -2
  436. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +0 -1
  437. package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +0 -2
  438. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +5 -3
  439. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +10 -9
  440. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +0 -1
  441. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +0 -1
  442. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +1 -0
  443. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +0 -2
  444. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +0 -1
  445. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +0 -1
  446. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +0 -1
  447. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +0 -2
  448. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +0 -2
  449. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc +0 -1
  450. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +0 -2
  451. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h +0 -2
  452. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +0 -2
  453. package/deps/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h +0 -2
  454. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h +0 -2
  455. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc +0 -1
  456. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +0 -2
  457. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +0 -1
  458. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +0 -2
  459. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +183 -32
  460. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +258 -12
  461. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_helper.h +33 -0
  462. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_iterator.cc +99 -0
  463. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +280 -120
  464. package/deps/rocksdb/rocksdb/utilities/secondary_index/simple_secondary_index.cc +79 -0
  465. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +52 -16
  466. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h +10 -6
  467. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +55 -0
  468. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +0 -1
  469. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +0 -2
  470. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h +0 -1
  471. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +37 -12
  472. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +2 -0
  473. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +0 -2
  474. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc +0 -2
  475. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +1 -1
  476. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h +1 -1
  477. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  478. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc +2 -1
  479. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +2 -2
  480. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +0 -1
  481. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h +0 -2
  482. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +1 -3
  483. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +36 -10
  484. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -7
  485. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +4 -5
  486. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +1 -4
  487. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +1 -2
  488. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +0 -2
  489. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h +0 -1
  490. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1118 -37
  491. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +4 -7
  492. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +0 -2
  493. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +0 -2
  494. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +3 -3
  495. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +0 -1
  496. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +0 -2
  497. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +1 -2
  498. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +1 -2
  499. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +0 -1
  500. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +0 -3
  501. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +125 -127
  502. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +45 -23
  503. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +54 -22
  504. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +477 -58
  505. package/deps/rocksdb/rocksdb.gyp +9 -4
  506. package/index.js +50 -9
  507. package/package.json +8 -1
  508. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  509. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -162,6 +162,23 @@ class Directories {
162
162
  std::unique_ptr<FSDirectory> wal_dir_;
163
163
  };
164
164
 
165
+ struct DBOpenLogRecordReadReporter : public log::Reader::Reporter {
166
+ Env* env;
167
+ Logger* info_log;
168
+ const char* fname;
169
+ Status* status; // nullptr if immutable_db_options_.paranoid_checks==false
170
+ bool* old_log_record;
171
+ void Corruption(size_t bytes, const Status& s,
172
+ uint64_t log_number = kMaxSequenceNumber) override;
173
+
174
+ void OldLogRecord(size_t bytes) override;
175
+
176
+ uint64_t GetCorruptedLogNumber() const { return corrupted_wal_number_; }
177
+
178
+ private:
179
+ uint64_t corrupted_wal_number_ = kMaxSequenceNumber;
180
+ };
181
+
165
182
  // While DB is the public interface of RocksDB, and DBImpl is the actual
166
183
  // class implementing it. It's the entrance of the core RocksdB engine.
167
184
  // All other DB implementations, e.g. TransactionDB, BlobDB, etc, wrap a
@@ -239,6 +256,10 @@ class DBImpl : public DB {
239
256
  Status WriteWithCallback(const WriteOptions& options, WriteBatch* updates,
240
257
  UserWriteCallback* user_write_cb) override;
241
258
 
259
+ Status IngestWriteBatchWithIndex(
260
+ const WriteOptions& options,
261
+ std::shared_ptr<WriteBatchWithIndex> wbwi) override;
262
+
242
263
  using DB::Get;
243
264
  Status Get(const ReadOptions& _read_options,
244
265
  ColumnFamilyHandle* column_family, const Slice& key,
@@ -362,6 +383,11 @@ class DBImpl : public DB {
362
383
  const std::vector<ColumnFamilyHandle*>& column_families,
363
384
  std::vector<Iterator*>* iterators) override;
364
385
 
386
+ using DB::NewMultiScan;
387
+ std::unique_ptr<MultiScan> NewMultiScan(
388
+ const ReadOptions& _read_options, ColumnFamilyHandle* column_family,
389
+ const std::vector<ScanOptions>& scan_opts) override;
390
+
365
391
  const Snapshot* GetSnapshot() override;
366
392
  void ReleaseSnapshot(const Snapshot* snapshot) override;
367
393
 
@@ -480,6 +506,9 @@ class DBImpl : public DB {
480
506
  Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
481
507
  std::string* ts_low) override;
482
508
 
509
+ Status GetNewestUserDefinedTimestamp(ColumnFamilyHandle* column_family,
510
+ std::string* newest_timestamp) override;
511
+
483
512
  Status GetDbIdentity(std::string& identity) const override;
484
513
 
485
514
  virtual Status GetDbIdentityFromIdentityFile(const IOOptions& opts,
@@ -513,20 +542,19 @@ class DBImpl : public DB {
513
542
 
514
543
  // Get the known flushed sizes of WALs that might still be written to
515
544
  // or have pending sync.
516
- // NOTE: unlike alive_log_files_, this function includes WALs that might
545
+ // NOTE: unlike alive_wal_files_, this function includes WALs that might
517
546
  // be obsolete (but not obsolete to a pending Checkpoint) and not yet fully
518
547
  // synced.
519
548
  Status GetOpenWalSizes(std::map<uint64_t, uint64_t>& number_to_size);
520
- Status GetCurrentWalFile(std::unique_ptr<WalFile>* current_log_file) override;
549
+ Status GetCurrentWalFile(std::unique_ptr<WalFile>* current_wal_file) override;
521
550
  Status GetCreationTimeOfOldestFile(uint64_t* creation_time) override;
522
551
 
523
552
  Status GetUpdatesSince(
524
553
  SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
525
554
  const TransactionLogIterator::ReadOptions& read_options =
526
555
  TransactionLogIterator::ReadOptions()) override;
527
- Status DeleteFile(std::string name) override;
528
556
  Status DeleteFilesInRanges(ColumnFamilyHandle* column_family,
529
- const RangePtr* ranges, size_t n,
557
+ const RangeOpt* ranges, size_t n,
530
558
  bool include_end = true);
531
559
 
532
560
  void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) override;
@@ -635,6 +663,11 @@ class DBImpl : public DB {
635
663
  ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
636
664
  TablePropertiesCollection* props) override;
637
665
 
666
+ Status GetPropertiesOfTablesByLevel(
667
+ ColumnFamilyHandle* column_family,
668
+ std::vector<std::unique_ptr<TablePropertiesCollection>>* props_by_level)
669
+ override;
670
+
638
671
  // ---- End of implementations of the DB interface ----
639
672
  SystemClock* GetSystemClock() const;
640
673
 
@@ -939,13 +972,6 @@ class DBImpl : public DB {
939
972
  return num_running_flushes_;
940
973
  }
941
974
 
942
- // Returns the number of currently running compactions.
943
- // REQUIREMENT: mutex_ must be held when calling this function.
944
- int num_running_compactions() {
945
- mutex_.AssertHeld();
946
- return num_running_compactions_;
947
- }
948
-
949
975
  const WriteController& write_controller() { return write_controller_; }
950
976
 
951
977
  // hollow transactions shell used for recovery.
@@ -1054,7 +1080,7 @@ class DBImpl : public DB {
1054
1080
 
1055
1081
  void AddToLogsToFreeQueue(log::Writer* log_writer) {
1056
1082
  mutex_.AssertHeld();
1057
- logs_to_free_queue_.push_back(log_writer);
1083
+ wals_to_free_queue_.push_back(log_writer);
1058
1084
  }
1059
1085
 
1060
1086
  void AddSuperVersionsToFreeQueue(SuperVersion* sv) {
@@ -1064,10 +1090,7 @@ class DBImpl : public DB {
1064
1090
  void SetSnapshotChecker(SnapshotChecker* snapshot_checker);
1065
1091
 
1066
1092
  // Fill JobContext with snapshot information needed by flush and compaction.
1067
- void GetSnapshotContext(JobContext* job_context,
1068
- std::vector<SequenceNumber>* snapshot_seqs,
1069
- SequenceNumber* earliest_write_conflict_snapshot,
1070
- SnapshotChecker** snapshot_checker);
1093
+ void InitSnapshotContext(JobContext* job_context);
1071
1094
 
1072
1095
  // Not thread-safe.
1073
1096
  void SetRecoverableStatePreReleaseCallback(PreReleaseCallback* callback);
@@ -1081,9 +1104,10 @@ class DBImpl : public DB {
1081
1104
  // This is to be used only by internal rocksdb classes.
1082
1105
  static Status Open(const DBOptions& db_options, const std::string& name,
1083
1106
  const std::vector<ColumnFamilyDescriptor>& column_families,
1084
- std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
1085
- const bool seq_per_batch, const bool batch_per_txn,
1086
- const bool is_retry, bool* can_retry);
1107
+ std::vector<ColumnFamilyHandle*>* handles,
1108
+ std::unique_ptr<DB>* dbptr, const bool seq_per_batch,
1109
+ const bool batch_per_txn, const bool is_retry,
1110
+ bool* can_retry);
1087
1111
 
1088
1112
  static IOStatus CreateAndNewDirectory(
1089
1113
  FileSystem* fs, const std::string& dirname,
@@ -1118,7 +1142,7 @@ class DBImpl : public DB {
1118
1142
  bool TEST_UnableToReleaseOldestLog() { return unable_to_release_oldest_log_; }
1119
1143
 
1120
1144
  bool TEST_IsLogGettingFlushed() {
1121
- return alive_log_files_.begin()->getting_flushed;
1145
+ return alive_wal_files_.begin()->getting_flushed;
1122
1146
  }
1123
1147
 
1124
1148
  Status TEST_SwitchMemtable(ColumnFamilyData* cfd = nullptr);
@@ -1198,7 +1222,9 @@ class DBImpl : public DB {
1198
1222
 
1199
1223
  uint64_t TEST_LogfileNumber();
1200
1224
 
1201
- uint64_t TEST_total_log_size() const { return total_log_size_; }
1225
+ uint64_t TEST_wals_total_size() const {
1226
+ return wals_total_size_.LoadRelaxed();
1227
+ }
1202
1228
 
1203
1229
  void TEST_GetAllBlockCaches(std::unordered_set<const Cache*>* cache_set);
1204
1230
 
@@ -1257,27 +1283,24 @@ class DBImpl : public DB {
1257
1283
  // flush LOG out of application buffer
1258
1284
  void FlushInfoLog();
1259
1285
 
1260
- // record current sequence number to time mapping. If
1261
- // populate_historical_seconds > 0 then pre-populate all the
1262
- // sequence numbers from [1, last] to map to [now minus
1263
- // populate_historical_seconds, now].
1264
- void RecordSeqnoToTimeMapping(uint64_t populate_historical_seconds);
1265
-
1266
- // Everytime DB's seqno to time mapping changed (which already hold the db
1267
- // mutex), we install a new SuperVersion in each column family with a shared
1268
- // copy of the new mapping while holding the db mutex.
1269
- // This is done for all column families even though the column family does not
1270
- // explicitly enabled the
1271
- // `preclude_last_level_data_seconds` or `preserve_internal_time_seconds`
1272
- // features.
1273
- // This mapping supports iterators to fulfill the
1274
- // "rocksdb.iterator.write-time" iterator property for entries in memtables.
1275
- //
1276
- // Since this new SuperVersion doesn't involve an LSM tree shape change, we
1277
- // don't schedule work after installing this SuperVersion. It returns the used
1278
- // `SuperVersionContext` for clean up after release mutex.
1279
- void InstallSeqnoToTimeMappingInSV(
1280
- std::vector<SuperVersionContext>* sv_contexts);
1286
+ // For the background timer job
1287
+ void RecordSeqnoToTimeMapping();
1288
+
1289
+ // Compactions rely on an event triggers like flush/compaction/SetOptions.
1290
+ // We need to trigger periodic compactions even when there is no such trigger.
1291
+ // This function checks and schedules available compactions and will run
1292
+ // periodically.
1293
+ void TriggerPeriodicCompaction();
1294
+
1295
+ // REQUIRES: DB mutex held
1296
+ std::pair<SequenceNumber, uint64_t> GetSeqnoToTimeSample() const;
1297
+
1298
+ // REQUIRES: DB mutex held or during open
1299
+ void EnsureSeqnoToTimeMapping(const MinAndMaxPreserveSeconds& preserve_secs);
1300
+
1301
+ // Only called during open
1302
+ void PrepopulateSeqnoToTimeMapping(
1303
+ const MinAndMaxPreserveSeconds& preserve_secs);
1281
1304
 
1282
1305
  // Interface to block and signal the DB in case of stalling writes by
1283
1306
  // WriteBufferManager. Each DBImpl object contains ptr to WBMStallInterface.
@@ -1365,16 +1388,16 @@ class DBImpl : public DB {
1365
1388
 
1366
1389
  // State below is protected by mutex_
1367
1390
  // With two_write_queues enabled, some of the variables that accessed during
1368
- // WriteToWAL need different synchronization: log_empty_, alive_log_files_,
1369
- // logs_, logfile_number_. Refer to the definition of each variable below for
1391
+ // WriteToWAL need different synchronization: wal_empty_, alive_wal_files_,
1392
+ // logs_, cur_wal_number_. Refer to the definition of each variable below for
1370
1393
  // more description.
1371
1394
  //
1372
1395
  // `mutex_` can be a hot lock in some workloads, so it deserves dedicated
1373
1396
  // cachelines.
1374
1397
  mutable CacheAlignedInstrumentedMutex mutex_;
1375
1398
 
1376
- ColumnFamilyHandleImpl* default_cf_handle_;
1377
- InternalStats* default_cf_internal_stats_;
1399
+ ColumnFamilyHandleImpl* default_cf_handle_ = nullptr;
1400
+ InternalStats* default_cf_internal_stats_ = nullptr;
1378
1401
 
1379
1402
  // table_cache_ provides its own synchronization
1380
1403
  std::shared_ptr<Cache> table_cache_;
@@ -1386,7 +1409,7 @@ class DBImpl : public DB {
1386
1409
 
1387
1410
  // only used for dynamically adjusting max_total_wal_size. it is a sum of
1388
1411
  // [write_buffer_size * max_write_buffer_number] over all column families
1389
- std::atomic<uint64_t> max_total_in_memory_state_;
1412
+ std::atomic<uint64_t> max_total_in_memory_state_ = 0;
1390
1413
 
1391
1414
  // The options to access storage files
1392
1415
  const FileOptions file_options_;
@@ -1413,14 +1436,14 @@ class DBImpl : public DB {
1413
1436
 
1414
1437
  // Each flush or compaction gets its own job id. this counter makes sure
1415
1438
  // they're unique
1416
- std::atomic<int> next_job_id_;
1439
+ std::atomic<int> next_job_id_ = 1;
1417
1440
 
1418
- std::atomic<bool> shutting_down_;
1441
+ std::atomic<bool> shutting_down_ = false;
1419
1442
 
1420
1443
  // No new background jobs can be queued if true. This is used to prevent new
1421
1444
  // background jobs from being queued after WaitForCompact() completes waiting
1422
1445
  // all background jobs then attempts to close when close_db_ option is true.
1423
- bool reject_new_background_jobs_;
1446
+ bool reject_new_background_jobs_ = false;
1424
1447
 
1425
1448
  // RecoveryContext struct stores the context about version edits along
1426
1449
  // with corresponding column_family_data and column_family_options.
@@ -1440,7 +1463,6 @@ class DBImpl : public DB {
1440
1463
  uint32_t size = static_cast<uint32_t>(map_.size());
1441
1464
  map_.emplace(cfd->GetID(), size);
1442
1465
  cfds_.emplace_back(cfd);
1443
- mutable_cf_opts_.emplace_back(cfd->GetLatestMutableCFOptions());
1444
1466
  edit_lists_.emplace_back(autovector<VersionEdit*>());
1445
1467
  }
1446
1468
  uint32_t i = map_[cfd->GetID()];
@@ -1449,7 +1471,6 @@ class DBImpl : public DB {
1449
1471
 
1450
1472
  std::unordered_map<uint32_t, uint32_t> map_; // cf_id to index;
1451
1473
  autovector<ColumnFamilyData*> cfds_;
1452
- autovector<const MutableCFOptions*> mutable_cf_opts_;
1453
1474
  autovector<autovector<VersionEdit*>> edit_lists_;
1454
1475
  // All existing data files (SST files and Blob files) found during DB::Open.
1455
1476
  std::vector<std::string> existing_data_files_;
@@ -1520,11 +1541,11 @@ class DBImpl : public DB {
1520
1541
  // ingests `wbwi` is done.
1521
1542
  // @param memtable_updated Whether the same write that ingests wbwi has
1522
1543
  // updated memtable. This is useful for determining whether to set bg
1523
- // error when IngestWBWI fails.
1524
- Status IngestWBWI(std::shared_ptr<WriteBatchWithIndex> wbwi,
1525
- const WBWIMemTable::SeqnoRange& assigned_seqno,
1526
- uint64_t min_prep_log, SequenceNumber last_seqno,
1527
- bool memtable_updated, bool ignore_missing_cf);
1544
+ // error when IngestWBWIAsMemtable fails.
1545
+ Status IngestWBWIAsMemtable(std::shared_ptr<WriteBatchWithIndex> wbwi,
1546
+ const WBWIMemTable::SeqnoRange& assigned_seqno,
1547
+ uint64_t min_prep_log, SequenceNumber last_seqno,
1548
+ bool memtable_updated, bool ignore_missing_cf);
1528
1549
 
1529
1550
  // If disable_memtable is set the application logic must guarantee that the
1530
1551
  // batch will still be skipped from memtable during the recovery. An excption
@@ -1554,18 +1575,17 @@ class DBImpl : public DB {
1554
1575
  Status WriteImpl(const WriteOptions& options, WriteBatch* updates,
1555
1576
  WriteCallback* callback = nullptr,
1556
1577
  UserWriteCallback* user_write_cb = nullptr,
1557
- uint64_t* log_used = nullptr, uint64_t log_ref = 0,
1578
+ uint64_t* wal_used = nullptr, uint64_t log_ref = 0,
1558
1579
  bool disable_memtable = false, uint64_t* seq_used = nullptr,
1559
1580
  size_t batch_cnt = 0,
1560
1581
  PreReleaseCallback* pre_release_callback = nullptr,
1561
1582
  PostMemTableCallback* post_memtable_callback = nullptr,
1562
- std::shared_ptr<WriteBatchWithIndex> wbwi = nullptr,
1563
- uint64_t min_prep_log = 0);
1583
+ std::shared_ptr<WriteBatchWithIndex> wbwi = nullptr);
1564
1584
 
1565
1585
  Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
1566
1586
  WriteCallback* callback = nullptr,
1567
1587
  UserWriteCallback* user_write_cb = nullptr,
1568
- uint64_t* log_used = nullptr, uint64_t log_ref = 0,
1588
+ uint64_t* wal_used = nullptr, uint64_t log_ref = 0,
1569
1589
  bool disable_memtable = false,
1570
1590
  uint64_t* seq_used = nullptr);
1571
1591
 
@@ -1592,7 +1612,7 @@ class DBImpl : public DB {
1592
1612
  Status WriteImplWALOnly(
1593
1613
  WriteThread* write_thread, const WriteOptions& options,
1594
1614
  WriteBatch* updates, WriteCallback* callback,
1595
- UserWriteCallback* user_write_cb, uint64_t* log_used,
1615
+ UserWriteCallback* user_write_cb, uint64_t* wal_used,
1596
1616
  const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
1597
1617
  PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
1598
1618
  const PublishLastSeq publish_last_seq, const bool disable_memtable);
@@ -1753,9 +1773,9 @@ class DBImpl : public DB {
1753
1773
  }
1754
1774
  };
1755
1775
 
1756
- struct LogFileNumberSize {
1757
- explicit LogFileNumberSize(uint64_t _number) : number(_number) {}
1758
- LogFileNumberSize() {}
1776
+ struct WalFileNumberSize {
1777
+ explicit WalFileNumberSize(uint64_t _number) : number(_number) {}
1778
+ WalFileNumberSize() {}
1759
1779
  void AddSize(uint64_t new_size) { size += new_size; }
1760
1780
  uint64_t number;
1761
1781
  uint64_t size = 0;
@@ -1777,6 +1797,13 @@ class DBImpl : public DB {
1777
1797
  if (writer->file()) {
1778
1798
  // TODO: plumb Env::IOActivity, Env::IOPriority
1779
1799
  s = writer->WriteBuffer(WriteOptions());
1800
+ if (attempt_truncate_size < SIZE_MAX &&
1801
+ attempt_truncate_size < writer->file()->GetFileSize()) {
1802
+ Status s2 = writer->file()->writable_file()->Truncate(
1803
+ attempt_truncate_size, IOOptions{}, nullptr);
1804
+ // This is just a best effort attempt
1805
+ s2.PermitUncheckedError();
1806
+ }
1780
1807
  }
1781
1808
  delete writer;
1782
1809
  writer = nullptr;
@@ -1809,6 +1836,11 @@ class DBImpl : public DB {
1809
1836
  getting_synced = false;
1810
1837
  }
1811
1838
 
1839
+ void SetAttemptTruncateSize(uint64_t size) {
1840
+ assert(attempt_truncate_size == SIZE_MAX);
1841
+ attempt_truncate_size = size;
1842
+ }
1843
+
1812
1844
  uint64_t number;
1813
1845
  // Visual Studio doesn't support deque's member to be noncopyable because
1814
1846
  // of a std::unique_ptr as a member.
@@ -1821,15 +1853,20 @@ class DBImpl : public DB {
1821
1853
  // to be persisted even if appends happen during sync so it can be used for
1822
1854
  // tracking the synced size in MANIFEST.
1823
1855
  uint64_t pre_sync_size = 0;
1856
+ // When < SIZE_MAX, attempt to truncate the WAL to this size on close,
1857
+ // because a bad entry was written to it beyond that point and it likely
1858
+ // won't be recoverable with the bad entry.
1859
+ uint64_t attempt_truncate_size = SIZE_MAX;
1824
1860
  };
1825
1861
 
1826
- struct LogContext {
1827
- explicit LogContext(bool need_sync = false)
1828
- : need_log_sync(need_sync), need_log_dir_sync(need_sync) {}
1829
- bool need_log_sync = false;
1830
- bool need_log_dir_sync = false;
1862
+ struct WalContext {
1863
+ explicit WalContext(bool need_sync = false)
1864
+ : need_wal_sync(need_sync), need_wal_dir_sync(need_sync) {}
1865
+ bool need_wal_sync = false;
1866
+ bool need_wal_dir_sync = false;
1831
1867
  log::Writer* writer = nullptr;
1832
- LogFileNumberSize* log_file_number_size = nullptr;
1868
+ WalFileNumberSize* wal_file_number_size = nullptr;
1869
+ uint64_t prev_size = SIZE_MAX;
1833
1870
  };
1834
1871
 
1835
1872
  // PurgeFileInfo is a structure to hold information of files to be deleted in
@@ -1910,8 +1947,8 @@ class DBImpl : public DB {
1910
1947
  const InternalKey* begin = nullptr; // nullptr means beginning of key range
1911
1948
  const InternalKey* end = nullptr; // nullptr means end of key range
1912
1949
  InternalKey* manual_end = nullptr; // how far we are compacting
1913
- InternalKey tmp_storage; // Used to keep track of compaction progress
1914
- InternalKey tmp_storage1; // Used to keep track of compaction progress
1950
+ InternalKey tmp_storage; // Used to keep track of compaction progress
1951
+ InternalKey tmp_storage1; // Used to keep track of compaction progress
1915
1952
 
1916
1953
  // When the user provides a canceled pointer in CompactRangeOptions, the
1917
1954
  // above varaibe is the reference of the user-provided
@@ -1921,12 +1958,19 @@ class DBImpl : public DB {
1921
1958
  };
1922
1959
  struct PrepickedCompaction {
1923
1960
  // background compaction takes ownership of `compaction`.
1961
+ // TODO(hx235): consider using std::shared_ptr for easier ownership
1962
+ // management
1924
1963
  Compaction* compaction;
1925
1964
  // caller retains ownership of `manual_compaction_state` as it is reused
1926
1965
  // across background compactions.
1927
1966
  ManualCompactionState* manual_compaction_state; // nullptr if non-manual
1928
1967
  // task limiter token is requested during compaction picking.
1929
1968
  std::unique_ptr<TaskLimiterToken> task_token;
1969
+ // If true, `compaction` is picked temporarily to express compaction intent
1970
+ // and will be released before re-picking a real compaction based on the
1971
+ // updated LSM shape when thread associated with `compaction` is ready to
1972
+ // run
1973
+ bool need_repick;
1930
1974
  };
1931
1975
 
1932
1976
  struct CompactionArg {
@@ -1971,7 +2015,7 @@ class DBImpl : public DB {
1971
2015
 
1972
2016
  // Follow-up work to user creating a column family or (families)
1973
2017
  Status WrapUpCreateColumnFamilies(
1974
- const ReadOptions& read_options, const WriteOptions& write_options,
2018
+ const WriteOptions& write_options,
1975
2019
  const std::vector<const ColumnFamilyOptions*>& cf_options);
1976
2020
 
1977
2021
  Status DropColumnFamilyImpl(ColumnFamilyHandle* column_family);
@@ -2017,14 +2061,13 @@ class DBImpl : public DB {
2017
2061
  // Flush the in-memory write buffer to storage. Switches to a new
2018
2062
  // log-file/memtable and writes a new descriptor iff successful. Then
2019
2063
  // installs a new super version for the column family.
2020
- Status FlushMemTableToOutputFile(
2021
- ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
2022
- bool* madeProgress, JobContext* job_context, FlushReason flush_reason,
2023
- SuperVersionContext* superversion_context,
2024
- std::vector<SequenceNumber>& snapshot_seqs,
2025
- SequenceNumber earliest_write_conflict_snapshot,
2026
- SnapshotChecker* snapshot_checker, LogBuffer* log_buffer,
2027
- Env::Priority thread_pri);
2064
+ Status FlushMemTableToOutputFile(ColumnFamilyData* cfd,
2065
+ const MutableCFOptions& mutable_cf_options,
2066
+ bool* madeProgress, JobContext* job_context,
2067
+ FlushReason flush_reason,
2068
+ SuperVersionContext* superversion_context,
2069
+ LogBuffer* log_buffer,
2070
+ Env::Priority thread_pri);
2028
2071
 
2029
2072
  // Flush the memtables of (multiple) column families to multiple files on
2030
2073
  // persistent storage.
@@ -2037,12 +2080,105 @@ class DBImpl : public DB {
2037
2080
  JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri);
2038
2081
 
2039
2082
  // REQUIRES: log_numbers are sorted in ascending order
2040
- // corrupted_log_found is set to true if we recover from a corrupted log file.
2083
+ // corrupted_wal_found is set to true if we recover from a corrupted log file.
2041
2084
  Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
2042
2085
  SequenceNumber* next_sequence, bool read_only,
2043
- bool is_retry, bool* corrupted_log_found,
2086
+ bool is_retry, bool* corrupted_wal_found,
2044
2087
  RecoveryContext* recovery_ctx);
2045
2088
 
2089
+ void SetupLogFilesRecovery(
2090
+ const std::vector<uint64_t>& wal_numbers,
2091
+ std::unordered_map<int, VersionEdit>* version_edits, int* job_id,
2092
+ uint64_t* min_wal_number);
2093
+
2094
+ Status ProcessLogFiles(const std::vector<uint64_t>& wal_numbers,
2095
+ bool read_only, bool is_retry, uint64_t min_wal_number,
2096
+ int job_id, SequenceNumber* next_sequence,
2097
+ std::unordered_map<int, VersionEdit>* version_edits,
2098
+ bool* corrupted_wal_found,
2099
+ RecoveryContext* recovery_ctx);
2100
+
2101
+ Status ProcessLogFile(
2102
+ uint64_t wal_number, uint64_t min_wal_number, bool is_retry,
2103
+ bool read_only, int job_id, SequenceNumber* next_sequence,
2104
+ bool* stop_replay_for_corruption, bool* stop_replay_by_wal_filter,
2105
+ uint64_t* corrupted_wal_number, bool* corrupted_wal_found,
2106
+ std::unordered_map<int, VersionEdit>* version_edits, bool* flushed,
2107
+ PredecessorWALInfo& predecessor_wal_info);
2108
+
2109
+ void SetupLogFileProcessing(uint64_t wal_number);
2110
+
2111
+ Status InitializeLogReader(uint64_t wal_number, bool is_retry,
2112
+ std::string& fname,
2113
+
2114
+ bool stop_replay_for_corruption,
2115
+ uint64_t min_wal_number,
2116
+ const PredecessorWALInfo& predecessor_wal_info,
2117
+ bool* const old_log_record,
2118
+ Status* const reporter_status,
2119
+ DBOpenLogRecordReadReporter* reporter,
2120
+ std::unique_ptr<log::Reader>& reader);
2121
+ Status ProcessLogRecord(
2122
+ Slice record, const std::unique_ptr<log::Reader>& reader,
2123
+ const UnorderedMap<uint32_t, size_t>& running_ts_sz, uint64_t wal_number,
2124
+ const std::string& fname, bool read_only, int job_id,
2125
+ const std::function<void()>& logFileDropped,
2126
+ DBOpenLogRecordReadReporter* reporter, uint64_t* record_checksum,
2127
+ SequenceNumber* last_seqno_observed, SequenceNumber* next_sequence,
2128
+ bool* stop_replay_for_corruption, Status* status,
2129
+ bool* stop_replay_by_wal_filter,
2130
+ std::unordered_map<int, VersionEdit>* version_edits, bool* flushed);
2131
+
2132
+ Status InitializeWriteBatchForLogRecord(
2133
+ Slice record, const std::unique_ptr<log::Reader>& reader,
2134
+ const UnorderedMap<uint32_t, size_t>& running_ts_sz, WriteBatch* batch,
2135
+ std::unique_ptr<WriteBatch>& new_batch, WriteBatch*& batch_to_use,
2136
+ uint64_t* record_checksum);
2137
+
2138
+ void MaybeReviseStopReplayForCorruption(
2139
+ SequenceNumber sequence, SequenceNumber const* const next_sequence,
2140
+ bool* stop_replay_for_corruption);
2141
+
2142
+ Status InsertLogRecordToMemtable(WriteBatch* batch_to_use,
2143
+ uint64_t wal_number,
2144
+ SequenceNumber* next_sequence,
2145
+ bool* has_valid_writes);
2146
+
2147
+ Status MaybeWriteLevel0TableForRecovery(
2148
+ bool has_valid_writes, bool read_only, uint64_t wal_number, int job_id,
2149
+ SequenceNumber const* const next_sequence,
2150
+ std::unordered_map<int, VersionEdit>* version_edits, bool* flushed);
2151
+
2152
+ Status HandleNonOkStatusOrOldLogRecord(
2153
+ uint64_t wal_number, SequenceNumber const* const next_sequence,
2154
+ Status status, const DBOpenLogRecordReadReporter& reporter,
2155
+ bool* old_log_record, bool* stop_replay_for_corruption,
2156
+ uint64_t* corrupted_wal_number, bool* corrupted_wal_found);
2157
+
2158
+ Status UpdatePredecessorWALInfo(uint64_t wal_number,
2159
+ const SequenceNumber last_seqno_observed,
2160
+ const std::string& fname,
2161
+ PredecessorWALInfo& predecessor_wal_info);
2162
+
2163
+ void FinishLogFileProcessing(const Status& status,
2164
+ const SequenceNumber* next_sequence);
2165
+
2166
+ // Return `Status::Corruption()` when `stop_replay_for_corruption == true` and
2167
+ // exits inconsistency between SST and WAL data
2168
+ Status MaybeHandleStopReplayForCorruptionForInconsistency(
2169
+ bool stop_replay_for_corruption, uint64_t corrupted_wal_number);
2170
+
2171
+ Status MaybeFlushFinalMemtableOrRestoreActiveLogFiles(
2172
+ const std::vector<uint64_t>& wal_numbers, bool read_only, int job_id,
2173
+ bool flushed, std::unordered_map<int, VersionEdit>* version_edits,
2174
+ RecoveryContext* recovery_ctx);
2175
+
2176
+ // Check that DB sequence number is not set back during recovery between
2177
+ // replaying of WAL files and between replaying of WriteBatches.
2178
+ Status CheckSeqnoNotSetBackDuringRecovery(SequenceNumber prev_next_seqno,
2179
+ SequenceNumber current_next_seqno);
2180
+
2181
+ void FinishLogFilesRecovery(int job_id, const Status& status);
2046
2182
  // The following two methods are used to flush a memtable to
2047
2183
  // storage. The first one is used at database RecoveryTime (when the
2048
2184
  // database is opened) and is heavyweight because it holds the mutex
@@ -2055,12 +2191,12 @@ class DBImpl : public DB {
2055
2191
  // log file to its actual size, thereby freeing preallocated space.
2056
2192
  // Return success even if truncate fails
2057
2193
  Status GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate,
2058
- LogFileNumberSize* log);
2194
+ WalFileNumberSize* log);
2059
2195
 
2060
- // Restore alive_log_files_ and total_log_size_ after recovery.
2196
+ // Restore alive_wal_files_ and wals_total_size_ after recovery.
2061
2197
  // It needs to run only when there's no flush during recovery
2062
2198
  // (e.g. avoid_flush_during_recovery=true). May also trigger flush
2063
- // in case total_log_size > max_total_wal_size.
2199
+ // in case wals_total_size > max_total_wal_size.
2064
2200
  Status RestoreAliveLogFiles(const std::vector<uint64_t>& log_numbers);
2065
2201
 
2066
2202
  // num_bytes: for slowdown case, delay time is calculated based on
@@ -2209,7 +2345,7 @@ class DBImpl : public DB {
2209
2345
 
2210
2346
  // REQUIRES: mutex locked
2211
2347
  Status PreprocessWrite(const WriteOptions& write_options,
2212
- LogContext* log_context, WriteContext* write_context);
2348
+ WalContext* log_context, WriteContext* write_context);
2213
2349
 
2214
2350
  // Merge write batches in the write group into merged_batch.
2215
2351
  // Returns OK if merge is successful.
@@ -2220,19 +2356,21 @@ class DBImpl : public DB {
2220
2356
 
2221
2357
  IOStatus WriteToWAL(const WriteBatch& merged_batch,
2222
2358
  const WriteOptions& write_options,
2223
- log::Writer* log_writer, uint64_t* log_used,
2359
+ log::Writer* log_writer, uint64_t* wal_used,
2224
2360
  uint64_t* log_size,
2225
- LogFileNumberSize& log_file_number_size);
2361
+ WalFileNumberSize& wal_file_number_size,
2362
+ SequenceNumber sequence);
2226
2363
 
2227
- IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group,
2228
- log::Writer* log_writer, uint64_t* log_used,
2229
- bool need_log_sync, bool need_log_dir_sync,
2230
- SequenceNumber sequence,
2231
- LogFileNumberSize& log_file_number_size);
2364
+ IOStatus WriteGroupToWAL(const WriteThread::WriteGroup& write_group,
2365
+ log::Writer* log_writer, uint64_t* wal_used,
2366
+ bool need_wal_sync, bool need_wal_dir_sync,
2367
+ SequenceNumber sequence,
2368
+ WalFileNumberSize& wal_file_number_size);
2232
2369
 
2233
- IOStatus ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group,
2234
- uint64_t* log_used,
2235
- SequenceNumber* last_sequence, size_t seq_inc);
2370
+ IOStatus ConcurrentWriteGroupToWAL(const WriteThread::WriteGroup& write_group,
2371
+ uint64_t* wal_used,
2372
+ SequenceNumber* last_sequence,
2373
+ size_t seq_inc);
2236
2374
 
2237
2375
  // Used by WriteImpl to update bg_error_ if paranoid check is enabled.
2238
2376
  // Caller must hold mutex_.
@@ -2246,7 +2384,7 @@ class DBImpl : public DB {
2246
2384
  void WALIOStatusCheck(const IOStatus& status);
2247
2385
 
2248
2386
  // Used by WriteImpl to update bg_error_ in case of memtable insert error.
2249
- void MemTableInsertStatusCheck(const Status& memtable_insert_status);
2387
+ void HandleMemTableInsertFailure(const Status& nonok_memtable_insert_status);
2250
2388
 
2251
2389
  Status CompactFilesImpl(const CompactionOptions& compact_options,
2252
2390
  ColumnFamilyData* cfd, Version* version,
@@ -2331,6 +2469,8 @@ class DBImpl : public DB {
2331
2469
  bool* flush_rescheduled_to_retain_udt,
2332
2470
  Env::Priority thread_pri);
2333
2471
 
2472
+ Compaction* CreateIntendedCompactionForwardedToBottomPriorityPool(
2473
+ Compaction* c);
2334
2474
  bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
2335
2475
  const std::vector<CompactionInputFiles>& inputs,
2336
2476
  bool* sfm_bookkeeping, LogBuffer* log_buffer);
@@ -2353,9 +2493,7 @@ class DBImpl : public DB {
2353
2493
  // Cancel scheduled periodic tasks
2354
2494
  Status CancelPeriodicTaskScheduler();
2355
2495
 
2356
- Status RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
2357
- const WriteOptions& write_options,
2358
- bool is_new_db);
2496
+ Status RegisterRecordSeqnoTimeWorker();
2359
2497
 
2360
2498
  void PrintStatistics();
2361
2499
 
@@ -2421,14 +2559,22 @@ class DBImpl : public DB {
2421
2559
 
2422
2560
  // Background threads call this function, which is just a wrapper around
2423
2561
  // the InstallSuperVersion() function. Background threads carry
2424
- // sv_context which can have new_superversion already
2425
- // allocated.
2562
+ // sv_context to allow allocation of SuperVersion object outside of holding
2563
+ // the DB mutex.
2426
2564
  // All ColumnFamily state changes go through this function. Here we analyze
2427
2565
  // the new state and we schedule background work if we detect that the new
2428
2566
  // state needs flush or compaction.
2567
+ // See also InstallSuperVersionForConfigChange().
2429
2568
  void InstallSuperVersionAndScheduleWork(
2430
2569
  ColumnFamilyData* cfd, SuperVersionContext* sv_context,
2431
- const MutableCFOptions& mutable_cf_options);
2570
+ std::optional<std::shared_ptr<SeqnoToTimeMapping>>
2571
+ new_seqno_to_time_mapping = {});
2572
+
2573
+ // A variant of InstallSuperVersionAndScheduleWork() that must be used for
2574
+ // new CFs or for changes to mutable_cf_options. This is so that it can
2575
+ // update seqno_to_time_mapping cached for the new SuperVersion as relevant.
2576
+ void InstallSuperVersionForConfigChange(ColumnFamilyData* cfd,
2577
+ SuperVersionContext* sv_context);
2432
2578
 
2433
2579
  bool GetIntPropertyInternal(ColumnFamilyData* cfd,
2434
2580
  const DBPropertyInfo& property_info,
@@ -2442,7 +2588,7 @@ class DBImpl : public DB {
2442
2588
  bool ShouldntRunManualCompaction(ManualCompactionState* m);
2443
2589
  bool HaveManualCompaction(ColumnFamilyData* cfd);
2444
2590
  bool MCOverlap(ManualCompactionState* m, ManualCompactionState* m1);
2445
- void UpdateDeletionCompactionStats(const std::unique_ptr<Compaction>& c);
2591
+ void UpdateFIFOCompactionStatus(const std::unique_ptr<Compaction>& c);
2446
2592
 
2447
2593
  // May open and read table files for table property.
2448
2594
  // Should not be called while holding mutex_.
@@ -2467,6 +2613,7 @@ class DBImpl : public DB {
2467
2613
 
2468
2614
  IOStatus CreateWAL(const WriteOptions& write_options, uint64_t log_file_num,
2469
2615
  uint64_t recycle_log_number, size_t preallocate_block_size,
2616
+ const PredecessorWALInfo& predecessor_wal_info,
2470
2617
  log::Writer** new_log);
2471
2618
 
2472
2619
  // Validate self-consistency of DB options
@@ -2591,8 +2738,13 @@ class DBImpl : public DB {
2591
2738
  const std::vector<ColumnFamilyHandle*>& column_families,
2592
2739
  ErrorIteratorFuncType error_iterator_func);
2593
2740
 
2741
+ bool ShouldPickCompaction(bool is_prepicked,
2742
+ const PrepickedCompaction* prepicked_compaction);
2743
+
2744
+ void ResetBottomPriCompactionIntent(ColumnFamilyData* cfd,
2745
+ std::unique_ptr<Compaction>& c);
2594
2746
  // Lock over the persistent DB state. Non-nullptr iff successfully acquired.
2595
- FileLock* db_lock_;
2747
+ FileLock* db_lock_ = nullptr;
2596
2748
 
2597
2749
  // Guards changes to DB and CF options to ensure consistency between
2598
2750
  // * In-memory options objects
@@ -2606,20 +2758,20 @@ class DBImpl : public DB {
2606
2758
  // Guards reads and writes to in-memory stats_history_.
2607
2759
  InstrumentedMutex stats_history_mutex_;
2608
2760
 
2609
- // In addition to mutex_, log_write_mutex_ protects writes to logs_ and
2610
- // logfile_number_. With two_write_queues it also protects alive_log_files_,
2611
- // and log_empty_. Refer to the definition of each variable below for more
2761
+ // In addition to mutex_, wal_write_mutex_ protects writes to logs_ and
2762
+ // cur_wal_number_. With two_write_queues it also protects alive_wal_files_,
2763
+ // and wal_empty_. Refer to the definition of each variable below for more
2612
2764
  // details.
2613
- // Note: to avoid deadlock, if needed to acquire both log_write_mutex_ and
2614
- // mutex_, the order should be first mutex_ and then log_write_mutex_.
2615
- InstrumentedMutex log_write_mutex_;
2765
+ // Note: to avoid deadlock, if needed to acquire both wal_write_mutex_ and
2766
+ // mutex_, the order should be first mutex_ and then wal_write_mutex_.
2767
+ InstrumentedMutex wal_write_mutex_;
2616
2768
 
2617
2769
  // If zero, manual compactions are allowed to proceed. If non-zero, manual
2618
2770
  // compactions may still be running, but will quickly fail with
2619
2771
  // `Status::Incomplete`. The value indicates how many threads have paused
2620
2772
  // manual compactions. It is accessed in read mode outside the DB mutex in
2621
2773
  // compaction code paths.
2622
- std::atomic<int> manual_compaction_paused_;
2774
+ std::atomic<int> manual_compaction_paused_ = false;
2623
2775
 
2624
2776
  // This condition variable is signaled on these conditions:
2625
2777
  // * whenever bg_compaction_scheduled_ goes down to 0
@@ -2635,106 +2787,114 @@ class DBImpl : public DB {
2635
2787
  // * whenever SetOptions successfully updates options.
2636
2788
  // * whenever a column family is dropped.
2637
2789
  InstrumentedCondVar bg_cv_;
2638
- // Writes are protected by locking both mutex_ and log_write_mutex_, and reads
2639
- // must be under either mutex_ or log_write_mutex_. Since after ::Open,
2640
- // logfile_number_ is currently updated only in write_thread_, it can be read
2790
+
2791
+ ColumnFamilyHandleImpl* persist_stats_cf_handle_ = nullptr;
2792
+
2793
+ bool persistent_stats_cfd_exists_ = true;
2794
+
2795
+ // Writes are protected by locking both mutex_ and wal_write_mutex_, and reads
2796
+ // must be under either mutex_ or wal_write_mutex_. Since after ::Open,
2797
+ // cur_wal_number_ is currently updated only in write_thread_, it can be read
2641
2798
  // from the same write_thread_ without any locks.
2642
- uint64_t logfile_number_;
2799
+ uint64_t cur_wal_number_ = 0;
2800
+
2643
2801
  // Log files that we can recycle. Must be protected by db mutex_.
2644
- std::deque<uint64_t> log_recycle_files_;
2802
+ std::deque<uint64_t> wal_recycle_files_;
2803
+
2645
2804
  // The minimum log file number taht can be recycled, if log recycling is
2646
2805
  // enabled. This is used to ensure that log files created by previous
2647
2806
  // instances of the database are not recycled, as we cannot be sure they
2648
2807
  // were created in the recyclable format.
2649
- uint64_t min_log_number_to_recycle_;
2650
- // Protected by log_write_mutex_.
2651
- bool log_dir_synced_;
2652
- // Without two_write_queues, read and writes to log_empty_ are protected by
2808
+ uint64_t min_wal_number_to_recycle_ = 0;
2809
+
2810
+ // Protected by wal_write_mutex_.
2811
+ bool wal_dir_synced_ = false;
2812
+
2813
+ // Without two_write_queues, read and writes to wal_empty_ are protected by
2653
2814
  // mutex_. Since it is currently updated/read only in write_thread_, it can be
2654
2815
  // accessed from the same write_thread_ without any locks. With
2655
2816
  // two_write_queues writes, where it can be updated in different threads,
2656
- // read and writes are protected by log_write_mutex_ instead. This is to avoid
2657
- // expensive mutex_ lock during WAL write, which update log_empty_.
2658
- bool log_empty_;
2659
-
2660
- ColumnFamilyHandleImpl* persist_stats_cf_handle_;
2661
-
2662
- bool persistent_stats_cfd_exists_ = true;
2817
+ // read and writes are protected by wal_write_mutex_ instead. This is to avoid
2818
+ // expensive mutex_ lock during WAL write, which update wal_empty_.
2819
+ bool wal_empty_ = true;
2663
2820
 
2664
2821
  // The current WAL file and those that have not been found obsolete from
2665
2822
  // memtable flushes. A WAL not on this list might still be pending writer
2666
- // flush and/or sync and close and might still be in logs_. alive_log_files_
2667
- // is protected by mutex_ and log_write_mutex_ with details as follows:
2823
+ // flush and/or sync and close and might still be in logs_. alive_wal_files_
2824
+ // is protected by mutex_ and wal_write_mutex_ with details as follows:
2668
2825
  // 1. read by FindObsoleteFiles() which can be called in either application
2669
- // thread or RocksDB bg threads, both mutex_ and log_write_mutex_ are
2826
+ // thread or RocksDB bg threads, both mutex_ and wal_write_mutex_ are
2670
2827
  // held.
2671
- // 2. pop_front() by FindObsoleteFiles(), both mutex_ and log_write_mutex_
2828
+ // 2. pop_front() by FindObsoleteFiles(), both mutex_ and wal_write_mutex_
2672
2829
  // are held.
2673
2830
  // 3. push_back() by DBImpl::Open() and DBImpl::RestoreAliveLogFiles()
2674
2831
  // (actually called by Open()), only mutex_ is held because at this point,
2675
2832
  // the DB::Open() call has not returned success to application, and the
2676
2833
  // only other thread(s) that can conflict are bg threads calling
2677
- // FindObsoleteFiles() which ensure that both mutex_ and log_write_mutex_
2678
- // are held when accessing alive_log_files_.
2834
+ // FindObsoleteFiles() which ensure that both mutex_ and wal_write_mutex_
2835
+ // are held when accessing alive_wal_files_.
2679
2836
  // 4. read by DBImpl::Open() is protected by mutex_.
2680
- // 5. push_back() by SwitchMemtable(). Both mutex_ and log_write_mutex_ are
2837
+ // 5. push_back() by SwitchMemtable(). Both mutex_ and wal_write_mutex_ are
2681
2838
  // held. This is done by the write group leader. Note that in the case of
2682
2839
  // two-write-queues, another WAL-only write thread can be writing to the
2683
2840
  // WAL concurrently. See 9.
2684
- // 6. read by SwitchWAL() with both mutex_ and log_write_mutex_ held. This is
2841
+ // 6. read by SwitchWAL() with both mutex_ and wal_write_mutex_ held. This is
2685
2842
  // done by write group leader.
2686
2843
  // 7. read by ConcurrentWriteToWAL() by the write group leader in the case of
2687
- // two-write-queues. Only log_write_mutex_ is held to protect concurrent
2844
+ // two-write-queues. Only wal_write_mutex_ is held to protect concurrent
2688
2845
  // pop_front() by FindObsoleteFiles().
2689
- // 8. read by PreprocessWrite() by the write group leader. log_write_mutex_
2846
+ // 8. read by PreprocessWrite() by the write group leader. wal_write_mutex_
2690
2847
  // is held to protect the data structure from concurrent pop_front() by
2691
2848
  // FindObsoleteFiles().
2692
2849
  // 9. read by ConcurrentWriteToWAL() by a WAL-only write thread in the case
2693
- // of two-write-queues. Only log_write_mutex_ is held. This suffices to
2850
+ // of two-write-queues. Only wal_write_mutex_ is held. This suffices to
2694
2851
  // protect the data structure from concurrent push_back() by current
2695
2852
  // write group leader as well as pop_front() by FindObsoleteFiles().
2696
- std::deque<LogFileNumberSize> alive_log_files_;
2853
+ std::deque<WalFileNumberSize> alive_wal_files_;
2854
+
2855
+ // Total size of all "alive" WALs (for easy access without synchronization)
2856
+ RelaxedAtomic<uint64_t> wals_total_size_{0};
2697
2857
 
2698
2858
  // Log files that aren't fully synced, and the current log file.
2699
2859
  // Synchronization:
2700
2860
  // 1. read by FindObsoleteFiles() which can be called either in application
2701
- // thread or RocksDB bg threads. log_write_mutex_ is always held, while
2861
+ // thread or RocksDB bg threads. wal_write_mutex_ is always held, while
2702
2862
  // some reads are performed without mutex_.
2703
- // 2. pop_front() by FindObsoleteFiles() with only log_write_mutex_ held.
2704
- // 3. read by DBImpl::Open() with both mutex_ and log_write_mutex_.
2705
- // 4. emplace_back() by DBImpl::Open() with both mutex_ and log_write_mutex.
2863
+ // 2. pop_front() by FindObsoleteFiles() with only wal_write_mutex_ held.
2864
+ // 3. read by DBImpl::Open() with both mutex_ and wal_write_mutex_.
2865
+ // 4. emplace_back() by DBImpl::Open() with both mutex_ and wal_write_mutex.
2706
2866
  // Note that at this point, DB::Open() has not returned success to
2707
2867
  // application, thus the only other thread(s) that can conflict are bg
2708
2868
  // threads calling FindObsoleteFiles(). See 1.
2709
- // 5. iteration and clear() from CloseHelper() always hold log_write_mutex
2869
+ // 5. iteration and clear() from CloseHelper() always hold wal_write_mutex
2710
2870
  // and mutex_.
2711
2871
  // 6. back() called by APIs FlushWAL() and LockWAL() are protected by only
2712
- // log_write_mutex_. These two can be called by application threads after
2872
+ // wal_write_mutex_. These two can be called by application threads after
2713
2873
  // DB::Open() returns success to applications.
2714
- // 7. read by SyncWAL(), another API, protected by only log_write_mutex_.
2874
+ // 7. read by SyncWAL(), another API, protected by only wal_write_mutex_.
2715
2875
  // 8. read by MarkLogsNotSynced() and MarkLogsSynced() are protected by
2716
- // log_write_mutex_.
2717
- // 9. erase() by MarkLogsSynced() protected by log_write_mutex_.
2718
- // 10. read by SyncClosedWals() protected by only log_write_mutex_. This can
2876
+ // wal_write_mutex_.
2877
+ // 9. erase() by MarkLogsSynced() protected by wal_write_mutex_.
2878
+ // 10. read by SyncClosedWals() protected by only wal_write_mutex_. This can
2719
2879
  // happen in bg flush threads after DB::Open() returns success to
2720
2880
  // applications.
2721
2881
  // 11. reads, e.g. front(), iteration, and back() called by PreprocessWrite()
2722
- // holds only the log_write_mutex_. This is done by the write group
2882
+ // holds only the wal_write_mutex_. This is done by the write group
2723
2883
  // leader. A bg thread calling FindObsoleteFiles() or MarkLogsSynced()
2724
- // can happen concurrently. This is fine because log_write_mutex_ is used
2884
+ // can happen concurrently. This is fine because wal_write_mutex_ is used
2725
2885
  // by all parties. See 2, 5, 9.
2726
2886
  // 12. reads, empty(), back() called by SwitchMemtable() hold both mutex_ and
2727
- // log_write_mutex_. This happens in the write group leader.
2887
+ // wal_write_mutex_. This happens in the write group leader.
2728
2888
  // 13. emplace_back() by SwitchMemtable() hold both mutex_ and
2729
- // log_write_mutex_. This happens in the write group leader. Can conflict
2889
+ // wal_write_mutex_. This happens in the write group leader. Can conflict
2730
2890
  // with bg threads calling FindObsoleteFiles(), MarkLogsSynced(),
2731
2891
  // SyncClosedWals(), etc. as well as application threads calling
2732
2892
  // FlushWAL(), SyncWAL(), LockWAL(). This is fine because all parties
2733
- // require at least log_write_mutex_.
2893
+ // require at least wal_write_mutex_.
2734
2894
  // 14. iteration called in WriteToWAL(write_group) protected by
2735
- // log_write_mutex_. This is done by write group leader when
2895
+ // wal_write_mutex_. This is done by write group leader when
2736
2896
  // two-write-queues is disabled and write needs to sync logs.
2737
- // 15. back() called in ConcurrentWriteToWAL() protected by log_write_mutex_.
2897
+ // 15. back() called in ConcurrentWriteToWAL() protected by wal_write_mutex_.
2738
2898
  // This can be done by the write group leader if two-write-queues is
2739
2899
  // enabled. It can also be done by another WAL-only write thread.
2740
2900
  //
@@ -2751,23 +2911,22 @@ class DBImpl : public DB {
2751
2911
  std::deque<LogWriterNumber> logs_;
2752
2912
 
2753
2913
  // Signaled when getting_synced becomes false for some of the logs_.
2754
- InstrumentedCondVar log_sync_cv_;
2914
+ InstrumentedCondVar wal_sync_cv_;
2755
2915
  // This is the app-level state that is written to the WAL but will be used
2756
2916
  // only during recovery. Using this feature enables not writing the state to
2757
2917
  // memtable on normal writes and hence improving the throughput. Each new
2758
2918
  // write of the state will replace the previous state entirely even if the
2759
2919
  // keys in the two consecutive states do not overlap.
2760
- // It is protected by log_write_mutex_ when two_write_queues_ is enabled.
2920
+ // It is protected by wal_write_mutex_ when two_write_queues_ is enabled.
2761
2921
  // Otherwise only the heaad of write_thread_ can access it.
2762
2922
  WriteBatch cached_recoverable_state_;
2763
2923
  std::atomic<bool> cached_recoverable_state_empty_ = {true};
2764
- std::atomic<uint64_t> total_log_size_;
2765
2924
 
2766
2925
  // If this is non-empty, we need to delete these log files in background
2767
- // threads. Protected by log_write_mutex_.
2768
- autovector<log::Writer*> logs_to_free_;
2926
+ // threads. Protected by wal_write_mutex_.
2927
+ autovector<log::Writer*> wals_to_free_;
2769
2928
 
2770
- bool is_snapshot_supported_;
2929
+ bool is_snapshot_supported_ = true;
2771
2930
 
2772
2931
  std::map<uint64_t, std::map<std::string, uint64_t>> stats_history_;
2773
2932
 
@@ -2791,7 +2950,7 @@ class DBImpl : public DB {
2791
2950
  // sleep if it uses up the quota.
2792
2951
  // Note: This is to protect memtable and compaction. If the batch only writes
2793
2952
  // to the WAL its size need not to be included in this.
2794
- uint64_t last_batch_group_size_;
2953
+ uint64_t last_batch_group_size_ = 0;
2795
2954
 
2796
2955
  FlushScheduler flush_scheduler_;
2797
2956
 
@@ -2850,32 +3009,32 @@ class DBImpl : public DB {
2850
3009
  std::unordered_set<uint64_t> files_grabbed_for_purge_;
2851
3010
 
2852
3011
  // A queue to store log writers to close. Protected by db mutex_.
2853
- std::deque<log::Writer*> logs_to_free_queue_;
3012
+ std::deque<log::Writer*> wals_to_free_queue_;
2854
3013
 
2855
3014
  std::deque<SuperVersion*> superversions_to_free_queue_;
2856
3015
 
2857
- int unscheduled_flushes_;
3016
+ int unscheduled_flushes_ = 0;
2858
3017
 
2859
- int unscheduled_compactions_;
3018
+ int unscheduled_compactions_ = 0;
2860
3019
 
2861
3020
  // count how many background compactions are running or have been scheduled in
2862
3021
  // the BOTTOM pool
2863
- int bg_bottom_compaction_scheduled_;
3022
+ int bg_bottom_compaction_scheduled_ = 0;
2864
3023
 
2865
3024
  // count how many background compactions are running or have been scheduled
2866
- int bg_compaction_scheduled_;
3025
+ int bg_compaction_scheduled_ = 0;
2867
3026
 
2868
3027
  // stores the number of compactions are currently running
2869
- int num_running_compactions_;
3028
+ int num_running_compactions_ = 0;
2870
3029
 
2871
3030
  // number of background memtable flush jobs, submitted to the HIGH pool
2872
- int bg_flush_scheduled_;
3031
+ int bg_flush_scheduled_ = 0;
2873
3032
 
2874
3033
  // stores the number of flushes are currently running
2875
- int num_running_flushes_;
3034
+ int num_running_flushes_ = 0;
2876
3035
 
2877
3036
  // number of background obsolete file purge jobs, submitted to the HIGH pool
2878
- int bg_purge_scheduled_;
3037
+ int bg_purge_scheduled_ = 0;
2879
3038
 
2880
3039
  std::deque<ManualCompactionState*> manual_compaction_dequeue_;
2881
3040
 
@@ -2885,11 +3044,11 @@ class DBImpl : public DB {
2885
3044
  // This enables two different threads to call
2886
3045
  // EnableFileDeletions() and DisableFileDeletions()
2887
3046
  // without any synchronization
2888
- int disable_delete_obsolete_files_;
3047
+ int disable_delete_obsolete_files_ = 0;
2889
3048
 
2890
3049
  // Number of times FindObsoleteFiles has found deletable files and the
2891
3050
  // corresponding call to PurgeObsoleteFiles has not yet finished.
2892
- int pending_purge_obsolete_files_;
3051
+ int pending_purge_obsolete_files_ = 0;
2893
3052
 
2894
3053
  // last time when DeleteObsoleteFiles with full scan was executed. Originally
2895
3054
  // initialized with startup time.
@@ -2901,12 +3060,12 @@ class DBImpl : public DB {
2901
3060
  // The mutex used by switch_cv_. mutex_ should be acquired beforehand.
2902
3061
  std::mutex switch_mutex_;
2903
3062
  // Number of threads intending to write to memtable
2904
- std::atomic<size_t> pending_memtable_writes_ = {};
3063
+ std::atomic<size_t> pending_memtable_writes_{0};
2905
3064
 
2906
3065
  // A flag indicating whether the current rocksdb database has any
2907
3066
  // data that is not yet persisted into either WAL or SST file.
2908
3067
  // Used when disableWAL is true.
2909
- std::atomic<bool> has_unpersisted_data_;
3068
+ std::atomic<bool> has_unpersisted_data_{false};
2910
3069
 
2911
3070
  // if an attempt was made to flush all column families that
2912
3071
  // the oldest log depends on but uncommitted data in the oldest
@@ -2914,26 +3073,26 @@ class DBImpl : public DB {
2914
3073
  // We must attempt to free the dependent memtables again
2915
3074
  // at a later time after the transaction in the oldest
2916
3075
  // log is fully commited.
2917
- bool unable_to_release_oldest_log_;
3076
+ bool unable_to_release_oldest_log_{false};
2918
3077
 
2919
3078
  // Number of running IngestExternalFile() or CreateColumnFamilyWithImport()
2920
3079
  // calls.
2921
3080
  // REQUIRES: mutex held
2922
- int num_running_ingest_file_;
3081
+ int num_running_ingest_file_ = 0;
2923
3082
 
2924
3083
  WalManager wal_manager_;
2925
3084
 
2926
3085
  // A value of > 0 temporarily disables scheduling of background work
2927
- int bg_work_paused_;
3086
+ int bg_work_paused_ = 0;
2928
3087
 
2929
3088
  // A value of > 0 temporarily disables scheduling of background compaction
2930
- int bg_compaction_paused_;
3089
+ int bg_compaction_paused_ = 0;
2931
3090
 
2932
3091
  // Guard against multiple concurrent refitting
2933
- bool refitting_level_;
3092
+ bool refitting_level_ = false;
2934
3093
 
2935
3094
  // Indicate DB was opened successfully
2936
- bool opened_successfully_;
3095
+ bool opened_successfully_ = false;
2937
3096
 
2938
3097
  // The min threshold to triggere bottommost compaction for removing
2939
3098
  // garbages, among all column families.
@@ -2979,13 +3138,13 @@ class DBImpl : public DB {
2979
3138
  // error recovery from going on in parallel. The latter, shutting_down_,
2980
3139
  // is set a little later during the shutdown after scheduling memtable
2981
3140
  // flushes
2982
- std::atomic<bool> shutdown_initiated_;
3141
+ std::atomic<bool> shutdown_initiated_{false};
2983
3142
  // Flag to indicate whether sst_file_manager object was allocated in
2984
3143
  // DB::Open() or passed to us
2985
3144
  bool own_sfm_;
2986
3145
 
2987
3146
  // Flag to check whether Close() has been called on this DB
2988
- bool closed_;
3147
+ bool closed_ = false;
2989
3148
  // save the closing status, for re-calling the close()
2990
3149
  Status closing_status_;
2991
3150
  // mutex for DB::Close()
@@ -3021,7 +3180,7 @@ class DBImpl : public DB {
3021
3180
 
3022
3181
  // The number of LockWAL called without matching UnlockWAL call.
3023
3182
  // See also lock_wal_write_token_
3024
- uint32_t lock_wal_count_;
3183
+ uint32_t lock_wal_count_ = 0;
3025
3184
  };
3026
3185
 
3027
3186
  class GetWithTimestampReadCallback : public ReadCallback {