@nxtedition/rocksdb 5.2.21 → 5.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (923) hide show
  1. package/binding.cc +510 -967
  2. package/binding.gyp +78 -72
  3. package/chained-batch.js +1 -2
  4. package/deps/rocksdb/build_version.cc +70 -4
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
  6. package/deps/rocksdb/rocksdb/Makefile +459 -469
  7. package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
  10. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
  11. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
  12. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
  13. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
  14. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
  15. package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
  16. package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
  20. package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
  22. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
  23. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
  24. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
  25. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
  26. package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
  27. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
  28. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
  29. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
  30. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  37. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
  38. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  39. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  40. package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
  41. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
  42. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
  43. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
  57. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
  59. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
  60. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
  61. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
  64. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
  65. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
  66. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
  67. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
  68. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
  69. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
  70. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
  71. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
  72. package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
  73. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
  74. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
  75. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
  76. package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
  77. package/deps/rocksdb/rocksdb/db/builder.h +16 -37
  78. package/deps/rocksdb/rocksdb/db/c.cc +413 -208
  79. package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
  80. package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
  81. package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
  82. package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
  83. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
  95. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
  96. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
  97. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
  98. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
  99. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
  100. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
  101. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
  102. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
  103. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
  104. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
  105. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
  106. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
  107. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
  108. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
  110. package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
  111. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
  112. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
  113. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
  114. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
  115. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
  116. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
  117. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
  118. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
  119. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
  120. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
  121. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
  122. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
  123. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
  124. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
  125. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
  126. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
  127. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
  128. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
  129. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
  130. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
  131. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
  132. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
  133. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
  134. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
  135. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
  136. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
  137. package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
  138. package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
  139. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
  140. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
  141. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
  142. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
  143. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
  144. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
  145. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
  146. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
  147. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
  148. package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
  149. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
  150. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
  151. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
  152. package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
  153. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
  154. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
  155. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
  156. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
  157. package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
  158. package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
  159. package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
  160. package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
  161. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
  162. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
  163. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
  164. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
  165. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
  166. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
  167. package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
  168. package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
  169. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
  170. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
  171. package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
  172. package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
  173. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
  174. package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
  175. package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
  176. package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
  177. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
  178. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
  179. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
  180. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
  181. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
  182. package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
  183. package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
  184. package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
  185. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
  186. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
  187. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
  188. package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
  189. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
  190. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
  191. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
  192. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
  193. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
  194. package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
  195. package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
  196. package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
  197. package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
  198. package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
  199. package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
  200. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
  201. package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
  202. package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
  203. package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
  204. package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
  205. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
  206. package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
  207. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
  208. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
  209. package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
  210. package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
  211. package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
  212. package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
  213. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
  214. package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
  215. package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
  216. package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
  217. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
  218. package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
  219. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
  220. package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
  221. package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
  222. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
  224. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
  225. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
  226. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
  227. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
  228. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
  229. package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
  230. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
  231. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
  232. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
  233. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
  234. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
  235. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
  236. package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
  237. package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
  238. package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
  239. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
  240. package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
  241. package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
  242. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
  243. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
  244. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
  245. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
  246. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
  247. package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
  248. package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
  249. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
  250. package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
  251. package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
  252. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
  253. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
  254. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
  255. package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
  256. package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
  257. package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
  258. package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
  260. package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
  261. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
  262. package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
  263. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
  264. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
  284. package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
  290. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
  291. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
  292. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
  293. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
  294. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
  295. package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
  296. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
  297. package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
  298. package/deps/rocksdb/rocksdb/env/env.cc +632 -42
  299. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
  300. package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
  301. package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
  302. package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
  303. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
  304. package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
  305. package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
  306. package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
  307. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
  308. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
  309. package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
  310. package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
  311. package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
  312. package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
  313. package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
  314. package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
  315. package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
  316. package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
  317. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
  318. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
  319. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
  320. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
  321. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
  322. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
  323. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
  324. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
  325. package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
  326. package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
  327. package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
  328. package/deps/rocksdb/rocksdb/file/filename.h +13 -8
  329. package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
  330. package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
  331. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
  332. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
  333. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
  334. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
  335. package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
  336. package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
  337. package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
  338. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
  339. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
  340. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
  341. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
  342. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
  343. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
  344. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
  345. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
  346. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
  347. package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
  349. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
  350. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
  351. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
  352. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
  353. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
  354. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
  355. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
  356. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
  358. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
  359. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
  360. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
  362. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
  363. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
  364. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
  365. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
  367. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
  368. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
  369. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
  370. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
  371. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
  372. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
  373. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
  374. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
  375. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
  376. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
  377. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
  378. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
  381. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
  382. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
  383. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
  385. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
  386. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
  388. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
  390. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
  391. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
  392. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
  393. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
  396. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
  397. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
  417. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
  418. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
  419. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
  420. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
  422. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
  423. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
  425. package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
  426. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
  427. package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
  428. package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
  429. package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
  430. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
  431. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
  432. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
  433. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
  434. package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
  435. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
  436. package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
  437. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
  438. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
  439. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
  440. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
  441. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
  442. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
  443. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
  444. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
  445. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
  446. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
  447. package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
  448. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
  449. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
  450. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
  451. package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
  452. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
  453. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
  454. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
  455. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
  456. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
  457. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
  458. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
  459. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
  460. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
  461. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
  462. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
  463. package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
  464. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
  465. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
  466. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
  467. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
  468. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
  469. package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
  470. package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
  471. package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
  472. package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
  473. package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
  474. package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
  475. package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
  476. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
  477. package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
  478. package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
  479. package/deps/rocksdb/rocksdb/options/options.cc +49 -17
  480. package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
  481. package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
  482. package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
  483. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
  484. package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
  485. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
  486. package/deps/rocksdb/rocksdb/port/lang.h +52 -0
  487. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  488. package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
  489. package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
  490. package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
  491. package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
  492. package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
  493. package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
  494. package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
  495. package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
  496. package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
  497. package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
  498. package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
  499. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
  500. package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
  501. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
  502. package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
  503. package/deps/rocksdb/rocksdb/python.mk +9 -0
  504. package/deps/rocksdb/rocksdb/src.mk +82 -34
  505. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
  506. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
  507. package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
  508. package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
  509. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
  510. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
  511. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
  512. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
  513. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
  514. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
  515. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
  516. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
  517. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
  518. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
  519. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
  520. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
  521. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
  522. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
  523. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
  524. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
  525. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
  526. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
  527. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
  528. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
  529. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
  530. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
  531. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
  532. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
  533. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
  534. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
  535. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
  536. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
  537. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
  538. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
  539. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
  540. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  541. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
  542. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
  543. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
  544. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
  545. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
  547. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
  548. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
  549. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
  550. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
  551. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
  552. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
  553. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
  554. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
  555. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
  556. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
  557. package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
  558. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
  559. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
  560. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
  561. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
  562. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
  563. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
  564. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
  565. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
  566. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
  567. package/deps/rocksdb/rocksdb/table/format.cc +258 -104
  568. package/deps/rocksdb/rocksdb/table/format.h +120 -109
  569. package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
  570. package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
  571. package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
  572. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  573. package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
  574. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
  575. package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
  576. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
  577. package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
  578. package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
  579. package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
  580. package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
  581. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
  582. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
  583. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
  584. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
  585. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
  586. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
  587. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
  588. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
  589. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
  590. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
  591. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
  592. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
  593. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
  594. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
  595. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
  596. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
  597. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
  598. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
  599. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
  600. package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
  601. package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
  602. package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
  603. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
  604. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
  605. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
  606. package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
  607. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
  608. package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
  609. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
  610. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
  611. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
  612. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
  613. package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
  614. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
  615. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
  616. package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
  617. package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
  618. package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
  619. package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
  620. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
  621. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
  622. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
  623. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
  624. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
  625. package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
  626. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
  627. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
  628. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
  629. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
  630. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
  631. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
  632. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
  633. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
  634. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
  635. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
  636. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
  637. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
  638. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
  639. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
  640. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
  641. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
  642. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
  643. package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
  644. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
  645. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
  646. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
  647. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
  648. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
  649. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
  650. package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
  651. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
  652. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
  653. package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
  654. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
  655. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
  656. package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
  657. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
  658. package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
  659. package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
  660. package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
  661. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
  662. package/deps/rocksdb/rocksdb/util/channel.h +2 -0
  663. package/deps/rocksdb/rocksdb/util/coding.h +1 -33
  664. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
  665. package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
  666. package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
  667. package/deps/rocksdb/rocksdb/util/compression.h +212 -7
  668. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
  669. package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
  670. package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
  671. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
  672. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
  673. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
  674. package/deps/rocksdb/rocksdb/util/defer.h +30 -1
  675. package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
  676. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
  677. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
  678. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
  679. package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
  680. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
  681. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
  682. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
  683. package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
  684. package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
  685. package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
  686. package/deps/rocksdb/rocksdb/util/hash.h +31 -1
  687. package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
  688. package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
  689. package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
  690. package/deps/rocksdb/rocksdb/util/heap.h +6 -1
  691. package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
  692. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
  693. package/deps/rocksdb/rocksdb/util/math.h +74 -7
  694. package/deps/rocksdb/rocksdb/util/math128.h +13 -1
  695. package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
  696. package/deps/rocksdb/rocksdb/util/random.cc +9 -0
  697. package/deps/rocksdb/rocksdb/util/random.h +6 -0
  698. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
  699. package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
  700. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
  701. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
  702. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
  703. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
  704. package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
  705. package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
  706. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
  707. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
  708. package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
  709. package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
  710. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
  711. package/deps/rocksdb/rocksdb/util/status.cc +32 -29
  712. package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
  713. package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
  714. package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
  715. package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
  716. package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
  717. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
  718. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
  719. package/deps/rocksdb/rocksdb/util/timer.h +55 -46
  720. package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
  721. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
  722. package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
  723. package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
  724. package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
  725. package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
  726. package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
  727. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
  728. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
  729. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
  730. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
  731. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
  732. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
  733. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
  734. package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
  735. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
  736. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
  737. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
  738. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
  739. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
  740. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
  741. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
  742. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
  743. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
  744. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
  745. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
  746. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
  747. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
  748. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
  749. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
  750. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
  751. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
  752. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
  753. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
  754. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
  755. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
  756. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
  757. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
  758. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
  759. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
  760. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
  761. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
  762. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
  763. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
  764. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
  765. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
  766. package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
  767. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
  768. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
  769. package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
  770. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
  771. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
  772. package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
  773. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
  774. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
  775. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
  776. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
  777. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
  778. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
  779. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
  780. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
  781. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
  782. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
  783. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
  784. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
  785. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
  786. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
  787. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
  788. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
  789. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
  790. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
  791. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
  792. package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
  793. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
  794. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
  795. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
  796. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
  797. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
  798. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
  799. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
  800. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
  801. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
  802. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
  803. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
  804. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
  805. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
  806. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
  807. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  808. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
  809. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
  810. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
  811. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
  812. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
  813. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
  814. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
  815. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
  816. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
  817. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
  818. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
  819. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
  820. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
  821. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  822. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
  823. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
  824. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
  825. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
  826. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  827. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
  828. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
  829. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
  830. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
  831. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
  832. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
  833. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
  834. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
  835. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
  836. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
  837. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
  838. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
  839. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
  840. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
  841. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
  842. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
  843. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
  844. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
  845. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
  846. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
  847. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
  848. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
  849. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
  850. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
  851. package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
  852. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
  853. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
  854. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
  855. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
  856. package/deps/rocksdb/rocksdb.gyp +425 -446
  857. package/index.js +5 -87
  858. package/package-lock.json +23687 -0
  859. package/package.json +8 -9
  860. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  861. package/prebuilds/darwin-x64/node.napi.node +0 -0
  862. package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
  863. package/deps/rocksdb/rocksdb/README.md +0 -32
  864. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
  865. package/deps/rocksdb/rocksdb/hdfs/README +0 -23
  866. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
  867. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
  868. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
  869. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
  870. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
  871. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
  872. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
  873. package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
  874. package/deps/rocksdb/rocksdb/port/README +0 -10
  875. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
  876. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
  877. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
  878. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
  879. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
  880. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
  881. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
  882. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
  883. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
  884. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
  885. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
  886. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
  887. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
  888. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
  889. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
  890. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
  891. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
  892. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
  893. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
  894. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
  895. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
  896. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
  897. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
  898. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
  899. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
  900. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
  901. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
  902. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
  903. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
  904. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
  905. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
  906. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
  907. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
  908. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
  909. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
  910. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
  911. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
  912. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
  913. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
  914. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
  915. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
  916. package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
  917. package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
  918. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
  919. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
  920. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
  921. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  922. package/deps/snappy/snappy-1.1.7/README.md +0 -149
  923. package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
@@ -13,9 +13,11 @@
13
13
  #include "db/error_handler.h"
14
14
  #include "db/periodic_work_scheduler.h"
15
15
  #include "env/composite_env_wrapper.h"
16
+ #include "file/filename.h"
16
17
  #include "file/read_write_util.h"
17
18
  #include "file/sst_file_manager_impl.h"
18
19
  #include "file/writable_file_writer.h"
20
+ #include "logging/logging.h"
19
21
  #include "monitoring/persistent_stats_history.h"
20
22
  #include "options/options_helper.h"
21
23
  #include "rocksdb/table.h"
@@ -24,15 +26,17 @@
24
26
  #include "util/rate_limiter.h"
25
27
 
26
28
  namespace ROCKSDB_NAMESPACE {
27
- Options SanitizeOptions(const std::string& dbname, const Options& src) {
28
- auto db_options = SanitizeOptions(dbname, DBOptions(src));
29
+ Options SanitizeOptions(const std::string& dbname, const Options& src,
30
+ bool read_only) {
31
+ auto db_options = SanitizeOptions(dbname, DBOptions(src), read_only);
29
32
  ImmutableDBOptions immutable_db_options(db_options);
30
33
  auto cf_options =
31
34
  SanitizeOptions(immutable_db_options, ColumnFamilyOptions(src));
32
35
  return Options(db_options, cf_options);
33
36
  }
34
37
 
35
- DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
38
+ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
39
+ bool read_only) {
36
40
  DBOptions result(src);
37
41
 
38
42
  if (result.env == nullptr) {
@@ -50,7 +54,7 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
50
54
  &result.max_open_files);
51
55
  }
52
56
 
53
- if (result.info_log == nullptr) {
57
+ if (result.info_log == nullptr && !read_only) {
54
58
  Status s = CreateLoggerFromOptions(dbname, result, &result.info_log);
55
59
  if (!s.ok()) {
56
60
  // No place suitable for logging
@@ -109,16 +113,28 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
109
113
  result.recycle_log_file_num = 0;
110
114
  }
111
115
 
112
- if (result.wal_dir.empty()) {
116
+ if (result.db_paths.size() == 0) {
117
+ result.db_paths.emplace_back(dbname, std::numeric_limits<uint64_t>::max());
118
+ } else if (result.wal_dir.empty()) {
113
119
  // Use dbname as default
114
120
  result.wal_dir = dbname;
115
121
  }
116
- if (result.wal_dir.back() == '/') {
117
- result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1);
122
+ if (!result.wal_dir.empty()) {
123
+ // If there is a wal_dir already set, check to see if the wal_dir is the
124
+ // same as the dbname AND the same as the db_path[0] (which must exist from
125
+ // a few lines ago). If the wal_dir matches both of these values, then clear
126
+ // the wal_dir value, which will make wal_dir == dbname. Most likely this
127
+ // condition was the result of reading an old options file where we forced
128
+ // wal_dir to be set (to dbname).
129
+ auto npath = NormalizePath(dbname + "/");
130
+ if (npath == NormalizePath(result.wal_dir + "/") &&
131
+ npath == NormalizePath(result.db_paths[0].path + "/")) {
132
+ result.wal_dir.clear();
133
+ }
118
134
  }
119
135
 
120
- if (result.db_paths.size() == 0) {
121
- result.db_paths.emplace_back(dbname, std::numeric_limits<uint64_t>::max());
136
+ if (!result.wal_dir.empty() && result.wal_dir.back() == '/') {
137
+ result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1);
122
138
  }
123
139
 
124
140
  if (result.use_direct_reads && result.compaction_readahead_size == 0) {
@@ -126,10 +142,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
126
142
  result.compaction_readahead_size = 1024 * 1024 * 2;
127
143
  }
128
144
 
129
- if (result.compaction_readahead_size > 0 || result.use_direct_reads) {
130
- result.new_table_reader_for_compaction_inputs = true;
131
- }
132
-
133
145
  // Force flush on DB open if 2PC is enabled, since with 2PC we have no
134
146
  // guarantee that consecutive log files have consecutive sequence id, which
135
147
  // make recovery complicated.
@@ -139,7 +151,7 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
139
151
 
140
152
  #ifndef ROCKSDB_LITE
141
153
  ImmutableDBOptions immutable_db_options(result);
142
- if (!IsWalDirSameAsDBPath(&immutable_db_options)) {
154
+ if (!immutable_db_options.IsWalDirSameAsDBPath()) {
143
155
  // Either the WAL dir and db_paths[0]/db_name are not the same, or we
144
156
  // cannot tell for sure. In either case, assume they're different and
145
157
  // explicitly cleanup the trash log files (bypass DeleteScheduler)
@@ -147,13 +159,14 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
147
159
  // DeleteScheduler::CleanupDirectory on the same dir later, it will be
148
160
  // safe
149
161
  std::vector<std::string> filenames;
150
- Status s = result.env->GetChildren(result.wal_dir, &filenames);
162
+ auto wal_dir = immutable_db_options.GetWalDir();
163
+ Status s = result.env->GetChildren(wal_dir, &filenames);
151
164
  s.PermitUncheckedError(); //**TODO: What to do on error?
152
165
  for (std::string& filename : filenames) {
153
166
  if (filename.find(".log.trash", filename.length() -
154
167
  std::string(".log.trash").length()) !=
155
168
  std::string::npos) {
156
- std::string trash_file = result.wal_dir + "/" + filename;
169
+ std::string trash_file = wal_dir + "/" + filename;
157
170
  result.env->DeleteFile(trash_file).PermitUncheckedError();
158
171
  }
159
172
  }
@@ -175,7 +188,14 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
175
188
  NewSstFileManager(result.env, result.info_log));
176
189
  result.sst_file_manager = sst_file_manager;
177
190
  }
178
- #endif
191
+ #endif // !ROCKSDB_LITE
192
+
193
+ // Supported wal compression types
194
+ if (!StreamingCompressionTypeSupported(result.wal_compression)) {
195
+ result.wal_compression = kNoCompression;
196
+ ROCKS_LOG_WARN(result.info_log,
197
+ "wal_compression is disabled since only zstd is supported");
198
+ }
179
199
 
180
200
  if (!result.paranoid_checks) {
181
201
  result.skip_checking_sst_file_sizes_on_db_open = true;
@@ -262,6 +282,12 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
262
282
  "atomic_flush is currently incompatible with best-efforts recovery");
263
283
  }
264
284
 
285
+ if (db_options.use_direct_io_for_flush_and_compaction &&
286
+ 0 == db_options.writable_file_max_buffer_size) {
287
+ return Status::InvalidArgument(
288
+ "writes in direct IO require writable_file_max_buffer_size > 0");
289
+ }
290
+
265
291
  return Status::OK();
266
292
  }
267
293
 
@@ -283,23 +309,29 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
283
309
  ROCKS_LOG_INFO(immutable_db_options_.info_log, "Creating manifest 1 \n");
284
310
  const std::string manifest = DescriptorFileName(dbname_, 1);
285
311
  {
312
+ if (fs_->FileExists(manifest, IOOptions(), nullptr).ok()) {
313
+ fs_->DeleteFile(manifest, IOOptions(), nullptr).PermitUncheckedError();
314
+ }
286
315
  std::unique_ptr<FSWritableFile> file;
287
316
  FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_);
288
317
  s = NewWritableFile(fs_.get(), manifest, &file, file_options);
289
318
  if (!s.ok()) {
290
319
  return s;
291
320
  }
321
+ FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types;
292
322
  file->SetPreallocationBlockSize(
293
323
  immutable_db_options_.manifest_preallocation_size);
294
324
  std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
295
- std::move(file), manifest, file_options, env_, io_tracer_,
296
- nullptr /* stats */, immutable_db_options_.listeners));
325
+ std::move(file), manifest, file_options, immutable_db_options_.clock,
326
+ io_tracer_, nullptr /* stats */, immutable_db_options_.listeners,
327
+ nullptr, tmp_set.Contains(FileType::kDescriptorFile),
328
+ tmp_set.Contains(FileType::kDescriptorFile)));
297
329
  log::Writer log(std::move(file_writer), 0, false);
298
330
  std::string record;
299
331
  new_db.EncodeTo(&record);
300
332
  s = log.AddRecord(record);
301
333
  if (s.ok()) {
302
- s = SyncManifest(env_, &immutable_db_options_, log.file());
334
+ s = SyncManifest(&immutable_db_options_, log.file());
303
335
  }
304
336
  }
305
337
  if (s.ok()) {
@@ -310,7 +342,7 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
310
342
  manifest.substr(manifest.find_last_of("/\\") + 1));
311
343
  }
312
344
  } else {
313
- fs_->DeleteFile(manifest, IOOptions(), nullptr);
345
+ fs_->DeleteFile(manifest, IOOptions(), nullptr).PermitUncheckedError();
314
346
  }
315
347
  return s;
316
348
  }
@@ -367,7 +399,7 @@ IOStatus Directories::SetDirectories(FileSystem* fs, const std::string& dbname,
367
399
  Status DBImpl::Recover(
368
400
  const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
369
401
  bool error_if_wal_file_exists, bool error_if_data_exists_in_wals,
370
- uint64_t* recovered_seq) {
402
+ uint64_t* recovered_seq, RecoveryContext* recovery_ctx) {
371
403
  mutex_.AssertHeld();
372
404
 
373
405
  bool is_new_db = false;
@@ -486,9 +518,10 @@ Status DBImpl::Recover(
486
518
  if (!s.ok()) {
487
519
  return s;
488
520
  }
489
- s = SetDBId();
521
+
522
+ s = SetDBId(read_only, recovery_ctx);
490
523
  if (s.ok() && !read_only) {
491
- s = DeleteUnreferencedSstFiles();
524
+ s = DeleteUnreferencedSstFiles(recovery_ctx);
492
525
  }
493
526
 
494
527
  if (immutable_db_options_.paranoid_checks && s.ok()) {
@@ -503,10 +536,6 @@ Status DBImpl::Recover(
503
536
  }
504
537
  }
505
538
  }
506
- // DB mutex is already held
507
- if (s.ok() && immutable_db_options_.persist_stats_to_disk) {
508
- s = InitPersistStatsColumnFamily();
509
- }
510
539
 
511
540
  std::vector<std::string> files_in_wal_dir;
512
541
  if (s.ok()) {
@@ -535,12 +564,12 @@ Status DBImpl::Recover(
535
564
  // Note that prev_log_number() is no longer used, but we pay
536
565
  // attention to it in case we are recovering a database
537
566
  // produced by an older version of rocksdb.
567
+ auto wal_dir = immutable_db_options_.GetWalDir();
538
568
  if (!immutable_db_options_.best_efforts_recovery) {
539
- s = env_->GetChildren(immutable_db_options_.wal_dir, &files_in_wal_dir);
569
+ s = env_->GetChildren(wal_dir, &files_in_wal_dir);
540
570
  }
541
571
  if (s.IsNotFound()) {
542
- return Status::InvalidArgument("wal_dir not found",
543
- immutable_db_options_.wal_dir);
572
+ return Status::InvalidArgument("wal_dir not found", wal_dir);
544
573
  } else if (!s.ok()) {
545
574
  return s;
546
575
  }
@@ -556,8 +585,7 @@ Status DBImpl::Recover(
556
585
  "existing log file: ",
557
586
  file);
558
587
  } else {
559
- wal_files[number] =
560
- LogFileName(immutable_db_options_.wal_dir, number);
588
+ wal_files[number] = LogFileName(wal_dir, number);
561
589
  }
562
590
  }
563
591
  }
@@ -577,7 +605,10 @@ Status DBImpl::Recover(
577
605
  WalNumber max_wal_number =
578
606
  versions_->GetWalSet().GetWals().rbegin()->first;
579
607
  edit.DeleteWalsBefore(max_wal_number + 1);
580
- s = versions_->LogAndApplyToDefaultColumnFamily(&edit, &mutex_);
608
+ assert(recovery_ctx != nullptr);
609
+ assert(versions_->GetColumnFamilySet() != nullptr);
610
+ recovery_ctx->UpdateVersionEdits(
611
+ versions_->GetColumnFamilySet()->GetDefault(), edit);
581
612
  }
582
613
  if (!s.ok()) {
583
614
  return s;
@@ -613,8 +644,8 @@ Status DBImpl::Recover(
613
644
  std::sort(wals.begin(), wals.end());
614
645
 
615
646
  bool corrupted_wal_found = false;
616
- s = RecoverLogFiles(wals, &next_sequence, read_only,
617
- &corrupted_wal_found);
647
+ s = RecoverLogFiles(wals, &next_sequence, read_only, &corrupted_wal_found,
648
+ recovery_ctx);
618
649
  if (corrupted_wal_found && recovered_seq != nullptr) {
619
650
  *recovered_seq = next_sequence;
620
651
  }
@@ -637,7 +668,7 @@ Status DBImpl::Recover(
637
668
  if (s.ok()) {
638
669
  const std::string normalized_dbname = NormalizePath(dbname_);
639
670
  const std::string normalized_wal_dir =
640
- NormalizePath(immutable_db_options_.wal_dir);
671
+ NormalizePath(immutable_db_options_.GetWalDir());
641
672
  if (immutable_db_options_.best_efforts_recovery) {
642
673
  filenames = std::move(files_in_dbname);
643
674
  } else if (normalized_dbname == normalized_wal_dir) {
@@ -656,6 +687,12 @@ Status DBImpl::Recover(
656
687
  }
657
688
  }
658
689
  versions_->options_file_number_ = options_file_number;
690
+ uint64_t options_file_size = 0;
691
+ if (options_file_number > 0) {
692
+ s = env_->GetFileSize(OptionsFileName(GetName(), options_file_number),
693
+ &options_file_size);
694
+ }
695
+ versions_->options_file_size_ = options_file_size;
659
696
  }
660
697
  }
661
698
  return s;
@@ -768,10 +805,30 @@ Status DBImpl::InitPersistStatsColumnFamily() {
768
805
  return s;
769
806
  }
770
807
 
808
+ Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) {
809
+ mutex_.AssertHeld();
810
+ assert(versions_->descriptor_log_ == nullptr);
811
+ Status s = versions_->LogAndApply(
812
+ recovery_ctx.cfds_, recovery_ctx.mutable_cf_opts_,
813
+ recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir());
814
+ if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) {
815
+ mutex_.Unlock();
816
+ for (const auto& fname : recovery_ctx.files_to_delete_) {
817
+ s = env_->DeleteFile(fname);
818
+ if (!s.ok()) {
819
+ break;
820
+ }
821
+ }
822
+ mutex_.Lock();
823
+ }
824
+ return s;
825
+ }
826
+
771
827
  // REQUIRES: wal_numbers are sorted in ascending order
772
- Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
828
+ Status DBImpl::RecoverLogFiles(std::vector<uint64_t>& wal_numbers,
773
829
  SequenceNumber* next_sequence, bool read_only,
774
- bool* corrupted_wal_found) {
830
+ bool* corrupted_wal_found,
831
+ RecoveryContext* recovery_ctx) {
775
832
  struct LogReporter : public log::Reader::Reporter {
776
833
  Env* env;
777
834
  Logger* info_log;
@@ -796,6 +853,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
796
853
  edit.SetColumnFamily(cfd->GetID());
797
854
  version_edits.insert({cfd->GetID(), edit});
798
855
  }
856
+
799
857
  int job_id = next_job_id_.fetch_add(1);
800
858
  {
801
859
  auto stream = event_logger_.Log();
@@ -829,6 +887,11 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
829
887
  bool flushed = false;
830
888
  uint64_t corrupted_wal_number = kMaxSequenceNumber;
831
889
  uint64_t min_wal_number = MinLogNumberToKeep();
890
+ if (!allow_2pc()) {
891
+ // In non-2pc mode, we skip WALs that do not back unflushed data.
892
+ min_wal_number =
893
+ std::max(min_wal_number, versions_->MinLogNumberWithUnflushedData());
894
+ }
832
895
  for (auto wal_number : wal_numbers) {
833
896
  if (wal_number < min_wal_number) {
834
897
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
@@ -842,7 +905,8 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
842
905
  // update the file number allocation counter in VersionSet.
843
906
  versions_->MarkFileNumberUsed(wal_number);
844
907
  // Open the log file
845
- std::string fname = LogFileName(immutable_db_options_.wal_dir, wal_number);
908
+ std::string fname =
909
+ LogFileName(immutable_db_options_.GetWalDir(), wal_number);
846
910
 
847
911
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
848
912
  "Recovering log #%" PRIu64 " mode %d", wal_number,
@@ -1130,11 +1194,29 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1130
1194
  immutable_db_options_.wal_recovery_mode ==
1131
1195
  WALRecoveryMode::kTolerateCorruptedTailRecords)) {
1132
1196
  for (auto cfd : *versions_->GetColumnFamilySet()) {
1133
- if (cfd->GetLogNumber() > corrupted_wal_number) {
1197
+ // One special case cause cfd->GetLogNumber() > corrupted_wal_number but
1198
+ // the CF is still consistent: If a new column family is created during
1199
+ // the flush and the WAL sync fails at the same time, the new CF points to
1200
+ // the new WAL but the old WAL is curropted. Since the new CF is empty, it
1201
+ // is still consistent. We add the check of CF sst file size to avoid the
1202
+ // false positive alert.
1203
+
1204
+ // Note that, the check of (cfd->GetLiveSstFilesSize() > 0) may leads to
1205
+ // the ignorance of a very rare inconsistency case caused in data
1206
+ // canclation. One CF is empty due to KV deletion. But those operations
1207
+ // are in the WAL. If the WAL is corrupted, the status of this CF might
1208
+ // not be consistent with others. However, the consistency check will be
1209
+ // bypassed due to empty CF.
1210
+ // TODO: a better and complete implementation is needed to ensure strict
1211
+ // consistency check in WAL recovery including hanlding the tailing
1212
+ // issues.
1213
+ if (cfd->GetLogNumber() > corrupted_wal_number &&
1214
+ cfd->GetLiveSstFilesSize() > 0) {
1134
1215
  ROCKS_LOG_ERROR(immutable_db_options_.info_log,
1135
1216
  "Column family inconsistency: SST file contains data"
1136
1217
  " beyond the point of corruption.");
1137
- return Status::Corruption("SST file is ahead of WALs");
1218
+ return Status::Corruption("SST file is ahead of WALs in CF " +
1219
+ cfd->GetName());
1138
1220
  }
1139
1221
  }
1140
1222
  }
@@ -1195,6 +1277,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1195
1277
  edit->SetLogNumber(max_wal_number + 1);
1196
1278
  }
1197
1279
  }
1280
+
1198
1281
  if (status.ok()) {
1199
1282
  // we must mark the next log number as used, even though it's
1200
1283
  // not actually used. that is because VersionSet assumes
@@ -1202,33 +1285,47 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1202
1285
  // log number
1203
1286
  versions_->MarkFileNumberUsed(max_wal_number + 1);
1204
1287
 
1205
- autovector<ColumnFamilyData*> cfds;
1206
- autovector<const MutableCFOptions*> cf_opts;
1207
- autovector<autovector<VersionEdit*>> edit_lists;
1288
+ if (corrupted_wal_found != nullptr && *corrupted_wal_found == true &&
1289
+ immutable_db_options_.wal_recovery_mode ==
1290
+ WALRecoveryMode::kPointInTimeRecovery) {
1291
+ MoveCorruptedWalFiles(wal_numbers, corrupted_wal_number);
1292
+ }
1293
+
1294
+ assert(recovery_ctx != nullptr);
1208
1295
  for (auto* cfd : *versions_->GetColumnFamilySet()) {
1209
- cfds.push_back(cfd);
1210
- cf_opts.push_back(cfd->GetLatestMutableCFOptions());
1211
1296
  auto iter = version_edits.find(cfd->GetID());
1212
1297
  assert(iter != version_edits.end());
1213
- edit_lists.push_back({&iter->second});
1298
+ recovery_ctx->UpdateVersionEdits(cfd, iter->second);
1214
1299
  }
1215
1300
 
1216
- std::unique_ptr<VersionEdit> wal_deletion;
1217
- if (immutable_db_options_.track_and_verify_wals_in_manifest) {
1218
- wal_deletion.reset(new VersionEdit);
1219
- wal_deletion->DeleteWalsBefore(max_wal_number + 1);
1220
- edit_lists.back().push_back(wal_deletion.get());
1301
+ if (flushed) {
1302
+ VersionEdit wal_deletion;
1303
+ if (immutable_db_options_.track_and_verify_wals_in_manifest) {
1304
+ wal_deletion.DeleteWalsBefore(max_wal_number + 1);
1305
+ }
1306
+ if (!allow_2pc()) {
1307
+ // In non-2pc mode, flushing the memtables of the column families
1308
+ // means we can advance min_log_number_to_keep.
1309
+ wal_deletion.SetMinLogNumberToKeep(max_wal_number + 1);
1310
+ }
1311
+ assert(versions_->GetColumnFamilySet() != nullptr);
1312
+ recovery_ctx->UpdateVersionEdits(
1313
+ versions_->GetColumnFamilySet()->GetDefault(), wal_deletion);
1221
1314
  }
1222
-
1223
- // write MANIFEST with update
1224
- status = versions_->LogAndApply(cfds, cf_opts, edit_lists, &mutex_,
1225
- directories_.GetDbDir(),
1226
- /*new_descriptor_log=*/true);
1227
1315
  }
1228
1316
  }
1229
1317
 
1230
- if (status.ok() && data_seen && !flushed) {
1231
- status = RestoreAliveLogFiles(wal_numbers);
1318
+ if (status.ok()) {
1319
+ if (data_seen && !flushed) {
1320
+ status = RestoreAliveLogFiles(wal_numbers);
1321
+ } else if (!wal_numbers.empty()) {
1322
+ // If there's no data in the WAL, or we flushed all the data, still
1323
+ // truncate the log file. If the process goes into a crash loop before
1324
+ // the file is deleted, the preallocated space will never get freed.
1325
+ const bool truncate = !read_only;
1326
+ GetLogSizeAndMaybeTruncate(wal_numbers.back(), truncate, nullptr)
1327
+ .PermitUncheckedError();
1328
+ }
1232
1329
  }
1233
1330
 
1234
1331
  event_logger_.Log() << "job" << job_id << "event"
@@ -1237,6 +1334,83 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1237
1334
  return status;
1238
1335
  }
1239
1336
 
1337
+ void DBImpl::MoveCorruptedWalFiles(std::vector<uint64_t>& wal_numbers,
1338
+ uint64_t corrupted_wal_number) {
1339
+ size_t num_wals = wal_numbers.size();
1340
+ // Find the first corrupted wal.
1341
+ auto iter = std::lower_bound(wal_numbers.begin(), wal_numbers.end(),
1342
+ corrupted_wal_number);
1343
+ auto corrupt_start_iter = iter;
1344
+
1345
+ // Increment iter to move WAL files from first corrupted_wal_number + 1.
1346
+ iter++;
1347
+
1348
+ std::string archival_path =
1349
+ ArchivalDirectory(immutable_db_options_.GetWalDir());
1350
+ Status create_status = env_->CreateDirIfMissing(archival_path);
1351
+
1352
+ // create_status is only checked when it needs to move the corrupted WAL files
1353
+ // to archive folder.
1354
+ create_status.PermitUncheckedError();
1355
+
1356
+ // Truncate the last WAL to reclaim the pre allocated space before
1357
+ // moving it.
1358
+ GetLogSizeAndMaybeTruncate(wal_numbers.back(), /*truncate=*/true, nullptr)
1359
+ .PermitUncheckedError();
1360
+
1361
+ // Move all the WAL files from corrupted_wal_number + 1 to last WAL
1362
+ // (max_wal_number) to avoid column family inconsistency error to archival
1363
+ // directory. If its unable to create archive dir, it will delete the
1364
+ // corrupted WAL files.
1365
+ // We are moving all but first corrupted WAL file to a different folder.
1366
+ while (iter != wal_numbers.end()) {
1367
+ LogFileNumberSize log(*iter);
1368
+ std::string fname = LogFileName(immutable_db_options_.GetWalDir(), *iter);
1369
+ #ifndef ROCKSDB_LITE
1370
+ if (create_status.ok()) {
1371
+ wal_manager_.ArchiveWALFile(fname, *iter);
1372
+ }
1373
+ #endif
1374
+ iter++;
1375
+ }
1376
+ wal_numbers.erase(corrupt_start_iter + 1, wal_numbers.begin() + num_wals);
1377
+ }
1378
+
1379
+ Status DBImpl::GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate,
1380
+ LogFileNumberSize* log_ptr) {
1381
+ LogFileNumberSize log(wal_number);
1382
+ std::string fname =
1383
+ LogFileName(immutable_db_options_.GetWalDir(), wal_number);
1384
+ Status s;
1385
+ // This gets the appear size of the wals, not including preallocated space.
1386
+ s = env_->GetFileSize(fname, &log.size);
1387
+ if (s.ok() && truncate) {
1388
+ std::unique_ptr<FSWritableFile> last_log;
1389
+ Status truncate_status = fs_->ReopenWritableFile(
1390
+ fname,
1391
+ fs_->OptimizeForLogWrite(
1392
+ file_options_,
1393
+ BuildDBOptions(immutable_db_options_, mutable_db_options_)),
1394
+ &last_log, nullptr);
1395
+ if (truncate_status.ok()) {
1396
+ truncate_status = last_log->Truncate(log.size, IOOptions(), nullptr);
1397
+ }
1398
+ if (truncate_status.ok()) {
1399
+ truncate_status = last_log->Close(IOOptions(), nullptr);
1400
+ }
1401
+ // Not a critical error if fail to truncate.
1402
+ if (!truncate_status.ok() && !truncate_status.IsNotSupported()) {
1403
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
1404
+ "Failed to truncate log #%" PRIu64 ": %s", wal_number,
1405
+ truncate_status.ToString().c_str());
1406
+ }
1407
+ }
1408
+ if (log_ptr) {
1409
+ *log_ptr = log;
1410
+ }
1411
+ return s;
1412
+ }
1413
+
1240
1414
  Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
1241
1415
  if (wal_numbers.empty()) {
1242
1416
  return Status::OK();
@@ -1251,41 +1425,28 @@ Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
1251
1425
  // FindObsoleteFiles()
1252
1426
  total_log_size_ = 0;
1253
1427
  log_empty_ = false;
1428
+ uint64_t min_wal_with_unflushed_data =
1429
+ versions_->MinLogNumberWithUnflushedData();
1254
1430
  for (auto wal_number : wal_numbers) {
1255
- LogFileNumberSize log(wal_number);
1256
- std::string fname = LogFileName(immutable_db_options_.wal_dir, wal_number);
1257
- // This gets the appear size of the wals, not including preallocated space.
1258
- s = env_->GetFileSize(fname, &log.size);
1259
- if (!s.ok()) {
1260
- break;
1431
+ if (!allow_2pc() && wal_number < min_wal_with_unflushed_data) {
1432
+ // In non-2pc mode, the WAL files not backing unflushed data are not
1433
+ // alive, thus should not be added to the alive_log_files_.
1434
+ continue;
1261
1435
  }
1262
- total_log_size_ += log.size;
1263
- alive_log_files_.push_back(log);
1264
1436
  // We preallocate space for wals, but then after a crash and restart, those
1265
1437
  // preallocated space are not needed anymore. It is likely only the last
1266
1438
  // log has such preallocated space, so we only truncate for the last log.
1267
- if (wal_number == wal_numbers.back()) {
1268
- std::unique_ptr<FSWritableFile> last_log;
1269
- Status truncate_status = fs_->ReopenWritableFile(
1270
- fname,
1271
- fs_->OptimizeForLogWrite(
1272
- file_options_,
1273
- BuildDBOptions(immutable_db_options_, mutable_db_options_)),
1274
- &last_log, nullptr);
1275
- if (truncate_status.ok()) {
1276
- truncate_status = last_log->Truncate(log.size, IOOptions(), nullptr);
1277
- }
1278
- if (truncate_status.ok()) {
1279
- truncate_status = last_log->Close(IOOptions(), nullptr);
1280
- }
1281
- // Not a critical error if fail to truncate.
1282
- if (!truncate_status.ok()) {
1283
- ROCKS_LOG_WARN(immutable_db_options_.info_log,
1284
- "Failed to truncate log #%" PRIu64 ": %s", wal_number,
1285
- truncate_status.ToString().c_str());
1286
- }
1439
+ LogFileNumberSize log;
1440
+ s = GetLogSizeAndMaybeTruncate(
1441
+ wal_number,
1442
+ /*truncate=*/(wal_number == wal_numbers.back()), &log);
1443
+ if (!s.ok()) {
1444
+ break;
1287
1445
  }
1446
+ total_log_size_ += log.size;
1447
+ alive_log_files_.push_back(log);
1288
1448
  }
1449
+ alive_log_files_tail_ = alive_log_files_.rbegin();
1289
1450
  if (two_write_queues_) {
1290
1451
  log_write_mutex_.Unlock();
1291
1452
  }
@@ -1295,7 +1456,13 @@ Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
1295
1456
  Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1296
1457
  MemTable* mem, VersionEdit* edit) {
1297
1458
  mutex_.AssertHeld();
1298
- const uint64_t start_micros = env_->NowMicros();
1459
+ assert(cfd);
1460
+ assert(cfd->imm());
1461
+ // The immutable memtable list must be empty.
1462
+ assert(std::numeric_limits<uint64_t>::max() ==
1463
+ cfd->imm()->GetEarliestMemTableID());
1464
+
1465
+ const uint64_t start_micros = immutable_db_options_.clock->NowMicros();
1299
1466
 
1300
1467
  FileMetaData meta;
1301
1468
  std::vector<BlobFileAddition> blob_file_additions;
@@ -1323,7 +1490,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1323
1490
  cfd->GetLatestMutableCFOptions()->paranoid_file_checks;
1324
1491
 
1325
1492
  int64_t _current_time = 0;
1326
- env_->GetCurrentTime(&_current_time)
1493
+ immutable_db_options_.clock->GetCurrentTime(&_current_time)
1327
1494
  .PermitUncheckedError(); // ignore error
1328
1495
  const uint64_t current_time = static_cast<uint64_t>(_current_time);
1329
1496
  meta.oldest_ancester_time = current_time;
@@ -1348,21 +1515,24 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1348
1515
  }
1349
1516
 
1350
1517
  IOStatus io_s;
1351
- s = BuildTable(
1352
- dbname_, versions_.get(), immutable_db_options_, *cfd->ioptions(),
1353
- mutable_cf_options, file_options_for_compaction_, cfd->table_cache(),
1354
- iter.get(), std::move(range_del_iters), &meta, &blob_file_additions,
1355
- cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(),
1356
- cfd->GetID(), cfd->GetName(), snapshot_seqs,
1357
- earliest_write_conflict_snapshot, snapshot_checker,
1518
+ TableBuilderOptions tboptions(
1519
+ *cfd->ioptions(), mutable_cf_options, cfd->internal_comparator(),
1520
+ cfd->int_tbl_prop_collector_factories(),
1358
1521
  GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
1359
- mutable_cf_options.sample_for_compression,
1360
- mutable_cf_options.compression_opts, paranoid_file_checks,
1361
- cfd->internal_stats(), TableFileCreationReason::kRecovery, &io_s,
1362
- io_tracer_, &event_logger_, job_id, Env::IO_HIGH,
1363
- nullptr /* table_properties */, -1 /* level */, current_time,
1364
- 0 /* oldest_key_time */, write_hint, 0 /* file_creation_time */,
1365
- db_id_, db_session_id_);
1522
+ mutable_cf_options.compression_opts, cfd->GetID(), cfd->GetName(),
1523
+ 0 /* level */, false /* is_bottommost */,
1524
+ TableFileCreationReason::kRecovery, current_time,
1525
+ 0 /* oldest_key_time */, 0 /* file_creation_time */, db_id_,
1526
+ db_session_id_, 0 /* target_file_size */, meta.fd.GetNumber());
1527
+ s = BuildTable(
1528
+ dbname_, versions_.get(), immutable_db_options_, tboptions,
1529
+ file_options_for_compaction_, cfd->table_cache(), iter.get(),
1530
+ std::move(range_del_iters), &meta, &blob_file_additions,
1531
+ snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber,
1532
+ snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s,
1533
+ io_tracer_, BlobFileCreationReason::kRecovery, &event_logger_, job_id,
1534
+ Env::IO_HIGH, nullptr /* table_properties */, write_hint,
1535
+ nullptr /*full_history_ts_low*/, &blob_callback_);
1366
1536
  LogFlush(immutable_db_options_.info_log);
1367
1537
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1368
1538
  "[%s] [WriteLevel0TableForRecovery]"
@@ -1371,8 +1541,10 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1371
1541
  meta.fd.GetFileSize(), s.ToString().c_str());
1372
1542
  mutex_.Lock();
1373
1543
 
1374
- io_s.PermitUncheckedError(); // TODO(AR) is this correct, or should we
1375
- // return io_s if not ok()?
1544
+ // TODO(AR) is this ok?
1545
+ if (!io_s.ok() && s.ok()) {
1546
+ s = io_s;
1547
+ }
1376
1548
  }
1377
1549
  }
1378
1550
  ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
@@ -1384,18 +1556,21 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1384
1556
  constexpr int level = 0;
1385
1557
 
1386
1558
  if (s.ok() && has_output) {
1387
- edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
1388
- meta.fd.GetFileSize(), meta.smallest, meta.largest,
1389
- meta.fd.smallest_seqno, meta.fd.largest_seqno,
1390
- meta.marked_for_compaction, meta.oldest_blob_file_number,
1391
- meta.oldest_ancester_time, meta.file_creation_time,
1392
- meta.file_checksum, meta.file_checksum_func_name);
1559
+ edit->AddFile(
1560
+ level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
1561
+ meta.smallest, meta.largest, meta.fd.smallest_seqno,
1562
+ meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
1563
+ meta.oldest_blob_file_number, meta.oldest_ancester_time,
1564
+ meta.file_creation_time, meta.file_checksum,
1565
+ meta.file_checksum_func_name, meta.min_timestamp, meta.max_timestamp);
1393
1566
 
1394
- edit->SetBlobFileAdditions(std::move(blob_file_additions));
1567
+ for (const auto& blob : blob_file_additions) {
1568
+ edit->AddBlobFile(blob);
1569
+ }
1395
1570
  }
1396
1571
 
1397
1572
  InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
1398
- stats.micros = env_->NowMicros() - start_micros;
1573
+ stats.micros = immutable_db_options_.clock->NowMicros() - start_micros;
1399
1574
 
1400
1575
  if (has_output) {
1401
1576
  stats.bytes_written = meta.fd.GetFileSize();
@@ -1404,14 +1579,15 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1404
1579
 
1405
1580
  const auto& blobs = edit->GetBlobFileAdditions();
1406
1581
  for (const auto& blob : blobs) {
1407
- stats.bytes_written += blob.GetTotalBlobBytes();
1582
+ stats.bytes_written_blob += blob.GetTotalBlobBytes();
1408
1583
  }
1409
1584
 
1410
- stats.num_output_files += static_cast<int>(blobs.size());
1585
+ stats.num_output_files_blob = static_cast<int>(blobs.size());
1411
1586
 
1412
1587
  cfd->internal_stats()->AddCompactionStats(level, Env::Priority::USER, stats);
1413
- cfd->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED,
1414
- stats.bytes_written);
1588
+ cfd->internal_stats()->AddCFStats(
1589
+ InternalStats::BYTES_FLUSHED,
1590
+ stats.bytes_written + stats.bytes_written_blob);
1415
1591
  RecordTick(stats_, COMPACT_WRITE_BYTES, meta.fd.GetFileSize());
1416
1592
  return s;
1417
1593
  }
@@ -1453,6 +1629,72 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
1453
1629
  !kSeqPerBatch, kBatchPerTxn);
1454
1630
  }
1455
1631
 
1632
+ // TODO: Implement the trimming in flush code path.
1633
+ // TODO: Perform trimming before inserting into memtable during recovery.
1634
+ // TODO: Pick files with max_timestamp > trim_ts by each file's timestamp meta
1635
+ // info, and handle only these files to reduce io.
1636
+ Status DB::OpenAndTrimHistory(
1637
+ const DBOptions& db_options, const std::string& dbname,
1638
+ const std::vector<ColumnFamilyDescriptor>& column_families,
1639
+ std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
1640
+ std::string trim_ts) {
1641
+ assert(dbptr != nullptr);
1642
+ assert(handles != nullptr);
1643
+ auto validate_options = [&db_options] {
1644
+ if (db_options.avoid_flush_during_recovery) {
1645
+ return Status::InvalidArgument(
1646
+ "avoid_flush_during_recovery incompatible with "
1647
+ "OpenAndTrimHistory");
1648
+ }
1649
+ return Status::OK();
1650
+ };
1651
+ auto s = validate_options();
1652
+ if (!s.ok()) {
1653
+ return s;
1654
+ }
1655
+
1656
+ DB* db = nullptr;
1657
+ s = DB::Open(db_options, dbname, column_families, handles, &db);
1658
+ if (!s.ok()) {
1659
+ return s;
1660
+ }
1661
+ assert(db);
1662
+ CompactRangeOptions options;
1663
+ options.bottommost_level_compaction =
1664
+ BottommostLevelCompaction::kForceOptimized;
1665
+ auto db_impl = static_cast_with_check<DBImpl>(db);
1666
+ for (auto handle : *handles) {
1667
+ assert(handle != nullptr);
1668
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handle);
1669
+ auto cfd = cfh->cfd();
1670
+ assert(cfd != nullptr);
1671
+ // Only compact column families with timestamp enabled
1672
+ if (cfd->user_comparator() != nullptr &&
1673
+ cfd->user_comparator()->timestamp_size() > 0) {
1674
+ s = db_impl->CompactRangeInternal(options, handle, nullptr, nullptr,
1675
+ trim_ts);
1676
+ if (!s.ok()) {
1677
+ break;
1678
+ }
1679
+ }
1680
+ }
1681
+ auto clean_op = [&handles, &db] {
1682
+ for (auto handle : *handles) {
1683
+ auto temp_s = db->DestroyColumnFamilyHandle(handle);
1684
+ assert(temp_s.ok());
1685
+ }
1686
+ handles->clear();
1687
+ delete db;
1688
+ };
1689
+ if (!s.ok()) {
1690
+ clean_op();
1691
+ return s;
1692
+ }
1693
+
1694
+ *dbptr = db;
1695
+ return s;
1696
+ }
1697
+
1456
1698
  IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
1457
1699
  size_t preallocate_block_size,
1458
1700
  log::Writer** new_log) {
@@ -1463,15 +1705,14 @@ IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
1463
1705
  BuildDBOptions(immutable_db_options_, mutable_db_options_);
1464
1706
  FileOptions opt_file_options =
1465
1707
  fs_->OptimizeForLogWrite(file_options_, db_options);
1466
- std::string log_fname =
1467
- LogFileName(immutable_db_options_.wal_dir, log_file_num);
1708
+ std::string wal_dir = immutable_db_options_.GetWalDir();
1709
+ std::string log_fname = LogFileName(wal_dir, log_file_num);
1468
1710
 
1469
1711
  if (recycle_log_number) {
1470
1712
  ROCKS_LOG_INFO(immutable_db_options_.info_log,
1471
1713
  "reusing log %" PRIu64 " from recycle list\n",
1472
1714
  recycle_log_number);
1473
- std::string old_log_fname =
1474
- LogFileName(immutable_db_options_.wal_dir, recycle_log_number);
1715
+ std::string old_log_fname = LogFileName(wal_dir, recycle_log_number);
1475
1716
  TEST_SYNC_POINT("DBImpl::CreateWAL:BeforeReuseWritableFile1");
1476
1717
  TEST_SYNC_POINT("DBImpl::CreateWAL:BeforeReuseWritableFile2");
1477
1718
  io_s = fs_->ReuseWritableFile(log_fname, old_log_fname, opt_file_options,
@@ -1485,12 +1726,17 @@ IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
1485
1726
  lfile->SetPreallocationBlockSize(preallocate_block_size);
1486
1727
 
1487
1728
  const auto& listeners = immutable_db_options_.listeners;
1729
+ FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types;
1488
1730
  std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
1489
- std::move(lfile), log_fname, opt_file_options, env_, io_tracer_,
1490
- nullptr /* stats */, listeners));
1731
+ std::move(lfile), log_fname, opt_file_options,
1732
+ immutable_db_options_.clock, io_tracer_, nullptr /* stats */, listeners,
1733
+ nullptr, tmp_set.Contains(FileType::kWalFile),
1734
+ tmp_set.Contains(FileType::kWalFile)));
1491
1735
  *new_log = new log::Writer(std::move(file_writer), log_file_num,
1492
1736
  immutable_db_options_.recycle_log_file_num > 0,
1493
- immutable_db_options_.manual_wal_flush);
1737
+ immutable_db_options_.manual_wal_flush,
1738
+ immutable_db_options_.wal_compression);
1739
+ io_s = (*new_log)->AddCompressionTypeRecord();
1494
1740
  }
1495
1741
  return io_s;
1496
1742
  }
@@ -1519,7 +1765,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1519
1765
  }
1520
1766
 
1521
1767
  DBImpl* impl = new DBImpl(db_options, dbname, seq_per_batch, batch_per_txn);
1522
- s = impl->env_->CreateDirIfMissing(impl->immutable_db_options_.wal_dir);
1768
+ s = impl->env_->CreateDirIfMissing(impl->immutable_db_options_.GetWalDir());
1523
1769
  if (s.ok()) {
1524
1770
  std::vector<std::string> paths;
1525
1771
  for (auto& db_path : impl->immutable_db_options_.db_paths) {
@@ -1551,12 +1797,16 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1551
1797
  return s;
1552
1798
  }
1553
1799
 
1554
- impl->wal_in_db_path_ = IsWalDirSameAsDBPath(&impl->immutable_db_options_);
1800
+ impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
1555
1801
 
1556
1802
  impl->mutex_.Lock();
1803
+
1804
+ RecoveryContext recovery_ctx;
1805
+
1557
1806
  // Handles create_if_missing, error_if_exists
1558
1807
  uint64_t recovered_seq(kMaxSequenceNumber);
1559
- s = impl->Recover(column_families, false, false, false, &recovered_seq);
1808
+ s = impl->Recover(column_families, false, false, false, &recovered_seq,
1809
+ &recovery_ctx);
1560
1810
  if (s.ok()) {
1561
1811
  uint64_t new_log_number = impl->versions_->NewFileNumber();
1562
1812
  log::Writer* new_log = nullptr;
@@ -1573,69 +1823,35 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1573
1823
  }
1574
1824
 
1575
1825
  if (s.ok()) {
1576
- // set column family handles
1577
- for (auto cf : column_families) {
1578
- auto cfd =
1579
- impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
1580
- if (cfd != nullptr) {
1581
- handles->push_back(
1582
- new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
1583
- impl->NewThreadStatusCfInfo(cfd);
1584
- } else {
1585
- if (db_options.create_missing_column_families) {
1586
- // missing column family, create it
1587
- ColumnFamilyHandle* handle;
1588
- impl->mutex_.Unlock();
1589
- s = impl->CreateColumnFamily(cf.options, cf.name, &handle);
1590
- impl->mutex_.Lock();
1591
- if (s.ok()) {
1592
- handles->push_back(handle);
1593
- } else {
1594
- break;
1595
- }
1596
- } else {
1597
- s = Status::InvalidArgument("Column family not found", cf.name);
1598
- break;
1599
- }
1600
- }
1601
- }
1602
- }
1603
- if (s.ok()) {
1604
- SuperVersionContext sv_context(/* create_superversion */ true);
1605
- for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
1606
- impl->InstallSuperVersionAndScheduleWork(
1607
- cfd, &sv_context, *cfd->GetLatestMutableCFOptions());
1608
- }
1609
- sv_context.Clean();
1610
1826
  if (impl->two_write_queues_) {
1611
1827
  impl->log_write_mutex_.Lock();
1612
1828
  }
1613
1829
  impl->alive_log_files_.push_back(
1614
1830
  DBImpl::LogFileNumberSize(impl->logfile_number_));
1831
+ impl->alive_log_files_tail_ = impl->alive_log_files_.rbegin();
1615
1832
  if (impl->two_write_queues_) {
1616
1833
  impl->log_write_mutex_.Unlock();
1617
1834
  }
1618
-
1619
- impl->DeleteObsoleteFiles();
1620
- s = impl->directories_.GetDbDir()->Fsync(IOOptions(), nullptr);
1621
1835
  }
1622
1836
  if (s.ok()) {
1623
1837
  // In WritePrepared there could be gap in sequence numbers. This breaks
1624
- // the trick we use in kPointInTimeRecovery which assumes the first seq in
1625
- // the log right after the corrupted log is one larger than the last seq
1626
- // we read from the wals. To let this trick keep working, we add a dummy
1627
- // entry with the expected sequence to the first log right after recovery.
1628
- // In non-WritePrepared case also the new log after recovery could be
1629
- // empty, and thus missing the consecutive seq hint to distinguish
1630
- // middle-log corruption to corrupted-log-remained-after-recovery. This
1631
- // case also will be addressed by a dummy write.
1838
+ // the trick we use in kPointInTimeRecovery which assumes the first seq
1839
+ // in the log right after the corrupted log is one larger than the last
1840
+ // seq we read from the wals. To let this trick keep working, we add a
1841
+ // dummy entry with the expected sequence to the first log right after
1842
+ // recovery. In non-WritePrepared case also the new log after recovery
1843
+ // could be empty, and thus missing the consecutive seq hint to
1844
+ // distinguish middle-log corruption to
1845
+ // corrupted-log-remained-after-recovery. This case also will be
1846
+ // addressed by a dummy write.
1632
1847
  if (recovered_seq != kMaxSequenceNumber) {
1633
1848
  WriteBatch empty_batch;
1634
1849
  WriteBatchInternal::SetSequence(&empty_batch, recovered_seq);
1635
1850
  WriteOptions write_options;
1636
1851
  uint64_t log_used, log_size;
1637
1852
  log::Writer* log_writer = impl->logs_.back().writer;
1638
- s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size);
1853
+ s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size,
1854
+ Env::IO_TOTAL, /*with_db_mutex==*/true);
1639
1855
  if (s.ok()) {
1640
1856
  // Need to fsync, otherwise it might get lost after a power reset.
1641
1857
  s = impl->FlushWAL(false);
@@ -1646,6 +1862,52 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1646
1862
  }
1647
1863
  }
1648
1864
  }
1865
+ if (s.ok()) {
1866
+ s = impl->LogAndApplyForRecovery(recovery_ctx);
1867
+ }
1868
+
1869
+ if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
1870
+ impl->mutex_.AssertHeld();
1871
+ s = impl->InitPersistStatsColumnFamily();
1872
+ }
1873
+
1874
+ if (s.ok()) {
1875
+ // set column family handles
1876
+ for (auto cf : column_families) {
1877
+ auto cfd =
1878
+ impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
1879
+ if (cfd != nullptr) {
1880
+ handles->push_back(
1881
+ new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
1882
+ impl->NewThreadStatusCfInfo(cfd);
1883
+ } else {
1884
+ if (db_options.create_missing_column_families) {
1885
+ // missing column family, create it
1886
+ ColumnFamilyHandle* handle;
1887
+ impl->mutex_.Unlock();
1888
+ s = impl->CreateColumnFamily(cf.options, cf.name, &handle);
1889
+ impl->mutex_.Lock();
1890
+ if (s.ok()) {
1891
+ handles->push_back(handle);
1892
+ } else {
1893
+ break;
1894
+ }
1895
+ } else {
1896
+ s = Status::InvalidArgument("Column family not found", cf.name);
1897
+ break;
1898
+ }
1899
+ }
1900
+ }
1901
+ }
1902
+
1903
+ if (s.ok()) {
1904
+ SuperVersionContext sv_context(/* create_superversion */ true);
1905
+ for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
1906
+ impl->InstallSuperVersionAndScheduleWork(
1907
+ cfd, &sv_context, *cfd->GetLatestMutableCFOptions());
1908
+ }
1909
+ sv_context.Clean();
1910
+ }
1649
1911
  if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
1650
1912
  // try to read format version
1651
1913
  s = impl->PersistentStatsProcessFormatVersion();
@@ -1671,7 +1933,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1671
1933
  if (cfd->ioptions()->merge_operator != nullptr &&
1672
1934
  !cfd->mem()->IsMergeOperatorSupported()) {
1673
1935
  s = Status::InvalidArgument(
1674
- "The memtable of column family %s does not support merge operator "
1936
+ "The memtable of column family %s does not support merge "
1937
+ "operator "
1675
1938
  "its options.merge_operator is non-null",
1676
1939
  cfd->GetName().c_str());
1677
1940
  }
@@ -1690,6 +1953,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1690
1953
 
1691
1954
  *dbptr = impl;
1692
1955
  impl->opened_successfully_ = true;
1956
+ impl->DeleteObsoleteFiles();
1957
+ TEST_SYNC_POINT("DBImpl::Open:AfterDeleteFiles");
1693
1958
  impl->MaybeScheduleFlushOrCompaction();
1694
1959
  } else {
1695
1960
  persist_options_status.PermitUncheckedError();
@@ -1716,6 +1981,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1716
1981
 
1717
1982
  std::vector<LiveFileMetaData> metadata;
1718
1983
 
1984
+ // TODO: Once GetLiveFilesMetaData supports blob files, update the logic
1985
+ // below to get known_file_sizes for blob files.
1719
1986
  impl->mutex_.Lock();
1720
1987
  impl->versions_->GetLiveFilesMetaData(&metadata);
1721
1988
  impl->mutex_.Unlock();
@@ -1748,13 +2015,12 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1748
2015
  FileType file_type;
1749
2016
  std::string file_path = path + "/" + file_name;
1750
2017
  if (ParseFileName(file_name, &file_number, &file_type) &&
1751
- file_type == kTableFile) {
2018
+ (file_type == kTableFile || file_type == kBlobFile)) {
1752
2019
  // TODO: Check for errors from OnAddFile?
1753
2020
  if (known_file_sizes.count(file_name)) {
1754
2021
  // We're assuming that each sst file name exists in at most one of
1755
2022
  // the paths.
1756
- sfm->OnAddFile(file_path, known_file_sizes.at(file_name),
1757
- /* compaction */ false)
2023
+ sfm->OnAddFile(file_path, known_file_sizes.at(file_name))
1758
2024
  .PermitUncheckedError();
1759
2025
  } else {
1760
2026
  sfm->OnAddFile(file_path).PermitUncheckedError();
@@ -1786,14 +2052,15 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
1786
2052
  "DB::Open() failed --- Unable to persist Options file",
1787
2053
  persist_options_status.ToString());
1788
2054
  }
1789
- } else {
2055
+ }
2056
+ if (!s.ok()) {
1790
2057
  ROCKS_LOG_WARN(impl->immutable_db_options_.info_log,
1791
- "Persisting Option File error: %s",
1792
- persist_options_status.ToString().c_str());
2058
+ "DB::Open() failed: %s", s.ToString().c_str());
1793
2059
  }
1794
2060
  if (s.ok()) {
1795
- impl->StartPeriodicWorkScheduler();
1796
- } else {
2061
+ s = impl->StartPeriodicWorkScheduler();
2062
+ }
2063
+ if (!s.ok()) {
1797
2064
  for (auto* h : *handles) {
1798
2065
  delete h;
1799
2066
  }