@nxtedition/rocksdb 8.2.7 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -178,9 +178,12 @@ Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) {
178
178
  return WaitForFlushMemTable(cfd, nullptr, false);
179
179
  }
180
180
 
181
- Status DBImpl::TEST_WaitForCompact(bool abort_on_pause) {
182
- // Wait until the compaction completes
183
- return WaitForCompact(abort_on_pause);
181
+ Status DBImpl::TEST_WaitForCompact() {
182
+ return WaitForCompact(WaitForCompactOptions());
183
+ }
184
+ Status DBImpl::TEST_WaitForCompact(
185
+ const WaitForCompactOptions& wait_for_compact_options) {
186
+ return WaitForCompact(wait_for_compact_options);
184
187
  }
185
188
 
186
189
  Status DBImpl::TEST_WaitForPurge() {
@@ -138,7 +138,8 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
138
138
  f->oldest_blob_file_number, f->oldest_ancester_time,
139
139
  f->file_creation_time, f->epoch_number, f->file_checksum,
140
140
  f->file_checksum_func_name, f->unique_id,
141
- f->compensated_range_deletion_size, f->tail_size);
141
+ f->compensated_range_deletion_size, f->tail_size,
142
+ f->user_defined_timestamps_persisted);
142
143
  }
143
144
 
144
145
  status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
@@ -35,6 +35,11 @@ uint64_t DBImpl::MinObsoleteSstNumberToKeep() {
35
35
  return std::numeric_limits<uint64_t>::max();
36
36
  }
37
37
 
38
+ uint64_t DBImpl::GetObsoleteSstFilesSize() {
39
+ mutex_.AssertHeld();
40
+ return versions_->GetObsoleteSstFilesSize();
41
+ }
42
+
38
43
  Status DBImpl::DisableFileDeletions() {
39
44
  Status s;
40
45
  int my_disable_delete_obsolete_files;
@@ -25,6 +25,7 @@
25
25
  #include "rocksdb/wal_filter.h"
26
26
  #include "test_util/sync_point.h"
27
27
  #include "util/rate_limiter_impl.h"
28
+ #include "util/udt_util.h"
28
29
 
29
30
  namespace ROCKSDB_NAMESPACE {
30
31
  Options SanitizeOptions(const std::string& dbname, const Options& src,
@@ -142,11 +143,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
142
143
  result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1);
143
144
  }
144
145
 
145
- if (result.use_direct_reads && result.compaction_readahead_size == 0) {
146
- TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr);
147
- result.compaction_readahead_size = 1024 * 1024 * 2;
148
- }
149
-
150
146
  // Force flush on DB open if 2PC is enabled, since with 2PC we have no
151
147
  // guarantee that consecutive log files have consecutive sequence id, which
152
148
  // make recovery complicated.
@@ -617,7 +613,7 @@ Status DBImpl::Recover(
617
613
  f->file_creation_time, f->epoch_number,
618
614
  f->file_checksum, f->file_checksum_func_name,
619
615
  f->unique_id, f->compensated_range_deletion_size,
620
- f->tail_size);
616
+ f->tail_size, f->user_defined_timestamps_persisted);
621
617
  ROCKS_LOG_WARN(immutable_db_options_.info_log,
622
618
  "[%s] Moving #%" PRIu64
623
619
  " from from_level-%d to from_level-%d %" PRIu64
@@ -1186,6 +1182,9 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1186
1182
  std::string scratch;
1187
1183
  Slice record;
1188
1184
 
1185
+ const UnorderedMap<uint32_t, size_t>& running_ts_sz =
1186
+ versions_->GetRunningColumnFamiliesTimestampSize();
1187
+
1189
1188
  TEST_SYNC_POINT_CALLBACK("DBImpl::RecoverLogFiles:BeforeReadWal",
1190
1189
  /*arg=*/nullptr);
1191
1190
  uint64_t record_checksum;
@@ -1199,27 +1198,41 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1199
1198
  Status::Corruption("log record too small"));
1200
1199
  continue;
1201
1200
  }
1202
-
1203
1201
  // We create a new batch and initialize with a valid prot_info_ to store
1204
1202
  // the data checksums
1205
1203
  WriteBatch batch;
1204
+ std::unique_ptr<WriteBatch> new_batch;
1206
1205
 
1207
1206
  status = WriteBatchInternal::SetContents(&batch, record);
1208
1207
  if (!status.ok()) {
1209
1208
  return status;
1210
1209
  }
1210
+
1211
+ const UnorderedMap<uint32_t, size_t>& record_ts_sz =
1212
+ reader.GetRecordedTimestampSize();
1213
+ status = HandleWriteBatchTimestampSizeDifference(
1214
+ &batch, running_ts_sz, record_ts_sz,
1215
+ TimestampSizeConsistencyMode::kReconcileInconsistency, &new_batch);
1216
+ if (!status.ok()) {
1217
+ return status;
1218
+ }
1219
+
1220
+ bool batch_updated = new_batch != nullptr;
1221
+ WriteBatch* batch_to_use = batch_updated ? new_batch.get() : &batch;
1211
1222
  TEST_SYNC_POINT_CALLBACK(
1212
- "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch", &batch);
1223
+ "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch",
1224
+ batch_to_use);
1213
1225
  TEST_SYNC_POINT_CALLBACK(
1214
1226
  "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum",
1215
1227
  &record_checksum);
1216
1228
  status = WriteBatchInternal::UpdateProtectionInfo(
1217
- &batch, 8 /* bytes_per_key */, &record_checksum);
1229
+ batch_to_use, 8 /* bytes_per_key */,
1230
+ batch_updated ? nullptr : &record_checksum);
1218
1231
  if (!status.ok()) {
1219
1232
  return status;
1220
1233
  }
1221
1234
 
1222
- SequenceNumber sequence = WriteBatchInternal::Sequence(&batch);
1235
+ SequenceNumber sequence = WriteBatchInternal::Sequence(batch_to_use);
1223
1236
 
1224
1237
  if (immutable_db_options_.wal_recovery_mode ==
1225
1238
  WALRecoveryMode::kPointInTimeRecovery) {
@@ -1240,7 +1253,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1240
1253
  // and returns true.
1241
1254
  if (!InvokeWalFilterIfNeededOnWalRecord(wal_number, fname, reporter,
1242
1255
  status, stop_replay_by_wal_filter,
1243
- batch)) {
1256
+ *batch_to_use)) {
1244
1257
  continue;
1245
1258
  }
1246
1259
 
@@ -1251,7 +1264,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
1251
1264
  // That's why we set ignore missing column families to true
1252
1265
  bool has_valid_writes = false;
1253
1266
  status = WriteBatchInternal::InsertInto(
1254
- &batch, column_family_memtables_.get(), &flush_scheduler_,
1267
+ batch_to_use, column_family_memtables_.get(), &flush_scheduler_,
1255
1268
  &trim_history_scheduler_, true, wal_number, this,
1256
1269
  false /* concurrent_memtable_writes */, next_sequence,
1257
1270
  &has_valid_writes, seq_per_batch_, batch_per_txn_);
@@ -1640,6 +1653,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1640
1653
  Version* version = cfd->current();
1641
1654
  version->Ref();
1642
1655
  const ReadOptions read_option(Env::IOActivity::kDBOpen);
1656
+ uint64_t num_input_entries = 0;
1643
1657
  s = BuildTable(
1644
1658
  dbname_, versions_.get(), immutable_db_options_, tboptions,
1645
1659
  file_options_for_compaction_, read_option, cfd->table_cache(),
@@ -1649,7 +1663,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1649
1663
  io_tracer_, BlobFileCreationReason::kRecovery,
1650
1664
  empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
1651
1665
  nullptr /* table_properties */, write_hint,
1652
- nullptr /*full_history_ts_low*/, &blob_callback_, version);
1666
+ nullptr /*full_history_ts_low*/, &blob_callback_, version,
1667
+ &num_input_entries);
1653
1668
  version->Unref();
1654
1669
  LogFlush(immutable_db_options_.info_log);
1655
1670
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
@@ -1663,6 +1678,19 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1663
1678
  if (!io_s.ok() && s.ok()) {
1664
1679
  s = io_s;
1665
1680
  }
1681
+
1682
+ uint64_t total_num_entries = mem->num_entries();
1683
+ if (s.ok() && total_num_entries != num_input_entries) {
1684
+ std::string msg = "Expected " + std::to_string(total_num_entries) +
1685
+ " entries in memtable, but read " +
1686
+ std::to_string(num_input_entries);
1687
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
1688
+ "[%s] [JOB %d] Level-0 flush during recover: %s",
1689
+ cfd->GetName().c_str(), job_id, msg.c_str());
1690
+ if (immutable_db_options_.flush_verify_memtable_count) {
1691
+ s = Status::Corruption(msg);
1692
+ }
1693
+ }
1666
1694
  }
1667
1695
  }
1668
1696
  ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
@@ -1674,14 +1702,15 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1674
1702
  constexpr int level = 0;
1675
1703
 
1676
1704
  if (s.ok() && has_output) {
1677
- edit->AddFile(
1678
- level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
1679
- meta.smallest, meta.largest, meta.fd.smallest_seqno,
1680
- meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
1681
- meta.oldest_blob_file_number, meta.oldest_ancester_time,
1682
- meta.file_creation_time, meta.epoch_number, meta.file_checksum,
1683
- meta.file_checksum_func_name, meta.unique_id,
1684
- meta.compensated_range_deletion_size, meta.tail_size);
1705
+ edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
1706
+ meta.fd.GetFileSize(), meta.smallest, meta.largest,
1707
+ meta.fd.smallest_seqno, meta.fd.largest_seqno,
1708
+ meta.marked_for_compaction, meta.temperature,
1709
+ meta.oldest_blob_file_number, meta.oldest_ancester_time,
1710
+ meta.file_creation_time, meta.epoch_number,
1711
+ meta.file_checksum, meta.file_checksum_func_name,
1712
+ meta.unique_id, meta.compensated_range_deletion_size,
1713
+ meta.tail_size, meta.user_defined_timestamps_persisted);
1685
1714
 
1686
1715
  for (const auto& blob : blob_file_additions) {
1687
1716
  edit->AddBlobFile(blob);
@@ -36,23 +36,30 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options,
36
36
  /*timestamp*/ nullptr);
37
37
  }
38
38
 
39
- Status DBImplReadOnly::Get(const ReadOptions& read_options,
39
+ Status DBImplReadOnly::Get(const ReadOptions& _read_options,
40
40
  ColumnFamilyHandle* column_family, const Slice& key,
41
41
  PinnableSlice* pinnable_val,
42
42
  std::string* timestamp) {
43
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
43
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
44
+ _read_options.io_activity != Env::IOActivity::kGet) {
44
45
  return Status::InvalidArgument(
45
- "Cannot call Get with `ReadOptions::io_activity` != "
46
- "`Env::IOActivity::kUnknown`");
46
+ "Can only call Get with `ReadOptions::io_activity` is "
47
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
48
+ }
49
+ ReadOptions read_options(_read_options);
50
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
51
+ read_options.io_activity = Env::IOActivity::kGet;
47
52
  }
48
53
  assert(pinnable_val != nullptr);
49
- // TODO: stopwatch DB_GET needed?, perf timer needed?
54
+ PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
55
+ StopWatch sw(immutable_db_options_.clock, stats_, DB_GET);
50
56
  PERF_TIMER_GUARD(get_snapshot_time);
51
57
 
52
58
  assert(column_family);
53
59
  if (read_options.timestamp) {
54
- const Status s = FailIfTsMismatchCf(
55
- column_family, *(read_options.timestamp), /*ts_for_read=*/true);
60
+ const Status s =
61
+ FailIfTsMismatchCf(column_family, *(read_options.timestamp),
62
+ /*ts_for_read=*/true);
56
63
  if (!s.ok()) {
57
64
  return s;
58
65
  }
@@ -115,17 +122,23 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options,
115
122
  return s;
116
123
  }
117
124
 
118
- Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
125
+ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& _read_options,
119
126
  ColumnFamilyHandle* column_family) {
120
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
127
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
128
+ _read_options.io_activity != Env::IOActivity::kDBIterator) {
121
129
  return NewErrorIterator(Status::InvalidArgument(
122
- "Cannot call NewIterator with `ReadOptions::io_activity` != "
123
- "`Env::IOActivity::kUnknown`"));
130
+ "Can only call NewIterator with `ReadOptions::io_activity` is "
131
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`"));
132
+ }
133
+ ReadOptions read_options(_read_options);
134
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
135
+ read_options.io_activity = Env::IOActivity::kDBIterator;
124
136
  }
125
137
  assert(column_family);
126
138
  if (read_options.timestamp) {
127
- const Status s = FailIfTsMismatchCf(
128
- column_family, *(read_options.timestamp), /*ts_for_read=*/true);
139
+ const Status s =
140
+ FailIfTsMismatchCf(column_family, *(read_options.timestamp),
141
+ /*ts_for_read=*/true);
129
142
  if (!s.ok()) {
130
143
  return NewErrorIterator(s);
131
144
  }
@@ -28,14 +28,14 @@ class DBImplReadOnly : public DBImpl {
28
28
  virtual Status Get(const ReadOptions& options,
29
29
  ColumnFamilyHandle* column_family, const Slice& key,
30
30
  PinnableSlice* value) override;
31
- Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
32
- const Slice& key, PinnableSlice* value,
33
- std::string* timestamp) override;
31
+ Status Get(const ReadOptions& _read_options,
32
+ ColumnFamilyHandle* column_family, const Slice& key,
33
+ PinnableSlice* value, std::string* timestamp) override;
34
34
 
35
35
  // TODO: Implement ReadOnly MultiGet?
36
36
 
37
37
  using DBImpl::NewIterator;
38
- virtual Iterator* NewIterator(const ReadOptions&,
38
+ virtual Iterator* NewIterator(const ReadOptions& _read_options,
39
39
  ColumnFamilyHandle* column_family) override;
40
40
 
41
41
  virtual Status NewIterators(
@@ -142,6 +142,15 @@ class DBImplReadOnly : public DBImpl {
142
142
  return Status::NotSupported("Not supported operation in read only mode.");
143
143
  }
144
144
 
145
+ virtual Status CreateColumnFamilyWithImport(
146
+ const ColumnFamilyOptions& /*options*/,
147
+ const std::string& /*column_family_name*/,
148
+ const ImportColumnFamilyOptions& /*import_options*/,
149
+ const std::vector<const ExportImportFilesMetaData*>& /*metadatas*/,
150
+ ColumnFamilyHandle** /*handle*/) override {
151
+ return Status::NotSupported("Not supported operation in read only mode.");
152
+ }
153
+
145
154
  using DB::ClipColumnFamily;
146
155
  virtual Status ClipColumnFamily(ColumnFamilyHandle* /*column_family*/,
147
156
  const Slice& /*begin*/,
@@ -170,4 +179,3 @@ class DBImplReadOnly : public DBImpl {
170
179
  friend class DB;
171
180
  };
172
181
  } // namespace ROCKSDB_NAMESPACE
173
-
@@ -14,6 +14,7 @@
14
14
  #include "monitoring/perf_context_imp.h"
15
15
  #include "rocksdb/configurable.h"
16
16
  #include "util/cast_util.h"
17
+ #include "util/write_batch_util.h"
17
18
 
18
19
  namespace ROCKSDB_NAMESPACE {
19
20
 
@@ -197,6 +198,9 @@ Status DBImplSecondary::RecoverLogFiles(
197
198
  }
198
199
  assert(reader != nullptr);
199
200
  }
201
+
202
+ const UnorderedMap<uint32_t, size_t>& running_ts_sz =
203
+ versions_->GetRunningColumnFamiliesTimestampSize();
200
204
  for (auto log_number : log_numbers) {
201
205
  auto it = log_readers_.find(log_number);
202
206
  assert(it != log_readers_.end());
@@ -224,6 +228,14 @@ Status DBImplSecondary::RecoverLogFiles(
224
228
  if (!status.ok()) {
225
229
  break;
226
230
  }
231
+ const UnorderedMap<uint32_t, size_t>& record_ts_sz =
232
+ reader->GetRecordedTimestampSize();
233
+ status = HandleWriteBatchTimestampSizeDifference(
234
+ &batch, running_ts_sz, record_ts_sz,
235
+ TimestampSizeConsistencyMode::kVerifyConsistency);
236
+ if (!status.ok()) {
237
+ break;
238
+ }
227
239
  SequenceNumber seq_of_batch = WriteBatchInternal::Sequence(&batch);
228
240
  std::vector<uint32_t> column_family_ids;
229
241
  status = CollectColumnFamilyIdsFromWriteBatch(batch, &column_family_ids);
@@ -328,16 +340,36 @@ Status DBImplSecondary::RecoverLogFiles(
328
340
  }
329
341
 
330
342
  // Implementation of the DB interface
331
- Status DBImplSecondary::Get(const ReadOptions& read_options,
343
+ Status DBImplSecondary::Get(const ReadOptions& _read_options,
332
344
  ColumnFamilyHandle* column_family, const Slice& key,
333
345
  PinnableSlice* value) {
346
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
347
+ _read_options.io_activity != Env::IOActivity::kGet) {
348
+ return Status::InvalidArgument(
349
+ "Can only call Get with `ReadOptions::io_activity` is "
350
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
351
+ }
352
+ ReadOptions read_options(_read_options);
353
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
354
+ read_options.io_activity = Env::IOActivity::kGet;
355
+ }
334
356
  return GetImpl(read_options, column_family, key, value,
335
357
  /*timestamp*/ nullptr);
336
358
  }
337
359
 
338
- Status DBImplSecondary::Get(const ReadOptions& read_options,
360
+ Status DBImplSecondary::Get(const ReadOptions& _read_options,
339
361
  ColumnFamilyHandle* column_family, const Slice& key,
340
362
  PinnableSlice* value, std::string* timestamp) {
363
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
364
+ _read_options.io_activity != Env::IOActivity::kGet) {
365
+ return Status::InvalidArgument(
366
+ "Can only call Get with `ReadOptions::io_activity` is "
367
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
368
+ }
369
+ ReadOptions read_options(_read_options);
370
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
371
+ read_options.io_activity = Env::IOActivity::kGet;
372
+ }
341
373
  return GetImpl(read_options, column_family, key, value, timestamp);
342
374
  }
343
375
 
@@ -345,11 +377,6 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options,
345
377
  ColumnFamilyHandle* column_family,
346
378
  const Slice& key, PinnableSlice* pinnable_val,
347
379
  std::string* timestamp) {
348
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
349
- return Status::InvalidArgument(
350
- "Cannot call Get with `ReadOptions::io_activity` != "
351
- "`Env::IOActivity::kUnknown`");
352
- }
353
380
  assert(pinnable_val != nullptr);
354
381
  PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
355
382
  StopWatch sw(immutable_db_options_.clock, stats_, DB_GET);
@@ -440,8 +467,18 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options,
440
467
  return s;
441
468
  }
442
469
 
443
- Iterator* DBImplSecondary::NewIterator(const ReadOptions& read_options,
470
+ Iterator* DBImplSecondary::NewIterator(const ReadOptions& _read_options,
444
471
  ColumnFamilyHandle* column_family) {
472
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
473
+ _read_options.io_activity != Env::IOActivity::kDBIterator) {
474
+ return NewErrorIterator(Status::InvalidArgument(
475
+ "Can only call NewIterator with `ReadOptions::io_activity` is "
476
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`"));
477
+ }
478
+ ReadOptions read_options(_read_options);
479
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
480
+ read_options.io_activity = Env::IOActivity::kDBIterator;
481
+ }
445
482
  if (read_options.managed) {
446
483
  return NewErrorIterator(
447
484
  Status::NotSupported("Managed iterator is not supported anymore."));
@@ -450,16 +487,12 @@ Iterator* DBImplSecondary::NewIterator(const ReadOptions& read_options,
450
487
  return NewErrorIterator(Status::NotSupported(
451
488
  "ReadTier::kPersistedData is not yet supported in iterators."));
452
489
  }
453
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
454
- return NewErrorIterator(Status::InvalidArgument(
455
- "Cannot call NewIterator with `ReadOptions::io_activity` != "
456
- "`Env::IOActivity::kUnknown`"));
457
- }
458
490
 
459
491
  assert(column_family);
460
492
  if (read_options.timestamp) {
461
- const Status s = FailIfTsMismatchCf(
462
- column_family, *(read_options.timestamp), /*ts_for_read=*/true);
493
+ const Status s =
494
+ FailIfTsMismatchCf(column_family, *(read_options.timestamp),
495
+ /*ts_for_read=*/true);
463
496
  if (!s.ok()) {
464
497
  return NewErrorIterator(s);
465
498
  }
@@ -511,9 +544,19 @@ ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl(
511
544
  }
512
545
 
513
546
  Status DBImplSecondary::NewIterators(
514
- const ReadOptions& read_options,
547
+ const ReadOptions& _read_options,
515
548
  const std::vector<ColumnFamilyHandle*>& column_families,
516
549
  std::vector<Iterator*>* iterators) {
550
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
551
+ _read_options.io_activity != Env::IOActivity::kDBIterator) {
552
+ return Status::InvalidArgument(
553
+ "Can only call NewIterators with `ReadOptions::io_activity` is "
554
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`");
555
+ }
556
+ ReadOptions read_options(_read_options);
557
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
558
+ read_options.io_activity = Env::IOActivity::kDBIterator;
559
+ }
517
560
  if (read_options.managed) {
518
561
  return Status::NotSupported("Managed iterator is not supported anymore.");
519
562
  }
@@ -521,11 +564,6 @@ Status DBImplSecondary::NewIterators(
521
564
  return Status::NotSupported(
522
565
  "ReadTier::kPersistedData is not yet supported in iterators.");
523
566
  }
524
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
525
- return Status::InvalidArgument(
526
- "Cannot call NewIterators with `ReadOptions::io_activity` != "
527
- "`Env::IOActivity::kUnknown`");
528
- }
529
567
  ReadCallback* read_callback = nullptr; // No read callback provided.
530
568
  if (iterators == nullptr) {
531
569
  return Status::InvalidArgument("iterators not allowed to be nullptr");
@@ -948,6 +986,8 @@ Status DB::OpenAndCompact(
948
986
  delete db;
949
987
  if (s.ok()) {
950
988
  return serialization_status;
989
+ } else {
990
+ serialization_status.PermitUncheckedError();
951
991
  }
952
992
  return s;
953
993
  }
@@ -96,12 +96,13 @@ class DBImplSecondary : public DBImpl {
96
96
  // workaround, the secondaries can be opened with `max_open_files=-1` so that
97
97
  // it eagerly keeps all talbe files open and is able to access the contents of
98
98
  // deleted files via prior open fd.
99
- Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
100
- const Slice& key, PinnableSlice* value) override;
99
+ Status Get(const ReadOptions& _read_options,
100
+ ColumnFamilyHandle* column_family, const Slice& key,
101
+ PinnableSlice* value) override;
101
102
 
102
- Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
103
- const Slice& key, PinnableSlice* value,
104
- std::string* timestamp) override;
103
+ Status Get(const ReadOptions& _read_options,
104
+ ColumnFamilyHandle* column_family, const Slice& key,
105
+ PinnableSlice* value, std::string* timestamp) override;
105
106
 
106
107
  Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family,
107
108
  const Slice& key, PinnableSlice* value,
@@ -117,7 +118,7 @@ class DBImplSecondary : public DBImpl {
117
118
  // deleted. As a partial hacky workaround, the secondaries can be opened with
118
119
  // `max_open_files=-1` so that it eagerly keeps all talbe files open and is
119
120
  // able to access the contents of deleted files via prior open fd.
120
- Iterator* NewIterator(const ReadOptions&,
121
+ Iterator* NewIterator(const ReadOptions& _read_options,
121
122
  ColumnFamilyHandle* column_family) override;
122
123
 
123
124
  ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& read_options,
@@ -127,7 +128,7 @@ class DBImplSecondary : public DBImpl {
127
128
  bool expose_blob_index = false,
128
129
  bool allow_refresh = true);
129
130
 
130
- Status NewIterators(const ReadOptions& options,
131
+ Status NewIterators(const ReadOptions& _read_options,
131
132
  const std::vector<ColumnFamilyHandle*>& column_families,
132
133
  std::vector<Iterator*>* iterators) override;
133
134
 
@@ -273,85 +274,6 @@ class DBImplSecondary : public DBImpl {
273
274
  return Status::OK();
274
275
  }
275
276
 
276
- // ColumnFamilyCollector is a write batch handler which does nothing
277
- // except recording unique column family IDs
278
- class ColumnFamilyCollector : public WriteBatch::Handler {
279
- std::unordered_set<uint32_t> column_family_ids_;
280
-
281
- Status AddColumnFamilyId(uint32_t column_family_id) {
282
- if (column_family_ids_.find(column_family_id) ==
283
- column_family_ids_.end()) {
284
- column_family_ids_.insert(column_family_id);
285
- }
286
- return Status::OK();
287
- }
288
-
289
- public:
290
- explicit ColumnFamilyCollector() {}
291
-
292
- ~ColumnFamilyCollector() override {}
293
-
294
- Status PutCF(uint32_t column_family_id, const Slice&,
295
- const Slice&) override {
296
- return AddColumnFamilyId(column_family_id);
297
- }
298
-
299
- Status DeleteCF(uint32_t column_family_id, const Slice&) override {
300
- return AddColumnFamilyId(column_family_id);
301
- }
302
-
303
- Status SingleDeleteCF(uint32_t column_family_id, const Slice&) override {
304
- return AddColumnFamilyId(column_family_id);
305
- }
306
-
307
- Status DeleteRangeCF(uint32_t column_family_id, const Slice&,
308
- const Slice&) override {
309
- return AddColumnFamilyId(column_family_id);
310
- }
311
-
312
- Status MergeCF(uint32_t column_family_id, const Slice&,
313
- const Slice&) override {
314
- return AddColumnFamilyId(column_family_id);
315
- }
316
-
317
- Status PutBlobIndexCF(uint32_t column_family_id, const Slice&,
318
- const Slice&) override {
319
- return AddColumnFamilyId(column_family_id);
320
- }
321
-
322
- Status MarkBeginPrepare(bool) override { return Status::OK(); }
323
-
324
- Status MarkEndPrepare(const Slice&) override { return Status::OK(); }
325
-
326
- Status MarkRollback(const Slice&) override { return Status::OK(); }
327
-
328
- Status MarkCommit(const Slice&) override { return Status::OK(); }
329
-
330
- Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
331
- return Status::OK();
332
- }
333
-
334
- Status MarkNoop(bool) override { return Status::OK(); }
335
-
336
- const std::unordered_set<uint32_t>& column_families() const {
337
- return column_family_ids_;
338
- }
339
- };
340
-
341
- Status CollectColumnFamilyIdsFromWriteBatch(
342
- const WriteBatch& batch, std::vector<uint32_t>* column_family_ids) {
343
- assert(column_family_ids != nullptr);
344
- column_family_ids->clear();
345
- ColumnFamilyCollector handler;
346
- Status s = batch.Iterate(&handler);
347
- if (s.ok()) {
348
- for (const auto& cf : handler.column_families()) {
349
- column_family_ids->push_back(cf);
350
- }
351
- }
352
- return s;
353
- }
354
-
355
277
  bool OwnTablesAndLogs() const override {
356
278
  // Currently, the secondary instance does not own the database files. It
357
279
  // simply opens the files of the primary instance and tracks their file
@@ -403,4 +325,3 @@ class DBImplSecondary : public DBImpl {
403
325
  };
404
326
 
405
327
  } // namespace ROCKSDB_NAMESPACE
406
-
@@ -1328,7 +1328,13 @@ IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
1328
1328
  if (UNLIKELY(needs_locking)) {
1329
1329
  log_write_mutex_.Lock();
1330
1330
  }
1331
- IOStatus io_s = log_writer->AddRecord(log_entry, rate_limiter_priority);
1331
+ IOStatus io_s = log_writer->MaybeAddUserDefinedTimestampSizeRecord(
1332
+ versions_->GetColumnFamiliesTimestampSizeForRecord(),
1333
+ rate_limiter_priority);
1334
+ if (!io_s.ok()) {
1335
+ return io_s;
1336
+ }
1337
+ io_s = log_writer->AddRecord(log_entry, rate_limiter_priority);
1332
1338
 
1333
1339
  if (UNLIKELY(needs_locking)) {
1334
1340
  log_write_mutex_.Unlock();
@@ -77,6 +77,7 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
77
77
  expose_blob_index_(expose_blob_index),
78
78
  is_blob_(false),
79
79
  arena_mode_(arena_mode),
80
+ io_activity_(read_options.io_activity),
80
81
  db_impl_(db_impl),
81
82
  cfd_(cfd),
82
83
  timestamp_ub_(read_options.timestamp),
@@ -196,12 +197,11 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
196
197
 
197
198
  // TODO: consider moving ReadOptions from ArenaWrappedDBIter to DBIter to
198
199
  // avoid having to copy options back and forth.
199
- // TODO: plumb Env::IOActivity
200
200
  ReadOptions read_options;
201
201
  read_options.read_tier = read_tier_;
202
202
  read_options.fill_cache = fill_cache_;
203
203
  read_options.verify_checksums = verify_checksums_;
204
-
204
+ read_options.io_activity = io_activity_;
205
205
  constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
206
206
  constexpr uint64_t* bytes_read = nullptr;
207
207
 
@@ -384,6 +384,7 @@ class DBIter final : public Iterator {
384
384
  bool expose_blob_index_;
385
385
  bool is_blob_;
386
386
  bool arena_mode_;
387
+ const Env::IOActivity io_activity_;
387
388
  // List of operands for merge operator.
388
389
  MergeContext merge_context_;
389
390
  LocalStatistics local_stats_;