@nxtedition/rocksdb 8.2.0 → 8.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/binding.cc +3 -3
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -52
  3. package/deps/rocksdb/rocksdb/Makefile +10 -5
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -345
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +92 -0
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +32 -32
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +12 -9
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +6 -43
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +3 -13
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +8 -5
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +21 -47
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +3 -8
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +1 -2
  15. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +44 -7
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +13 -14
  17. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -0
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +2 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +17 -8
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +40 -21
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +41 -42
  25. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +1 -1
  26. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +5 -4
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -2
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +5 -3
  30. package/deps/rocksdb/rocksdb/db/builder.cc +7 -6
  31. package/deps/rocksdb/rocksdb/db/builder.h +2 -2
  32. package/deps/rocksdb/rocksdb/db/c.cc +76 -5
  33. package/deps/rocksdb/rocksdb/db/c_test.c +141 -0
  34. package/deps/rocksdb/rocksdb/db/column_family.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +3 -2
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +5 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -5
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +12 -10
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +21 -17
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -7
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +3 -1
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +77 -50
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +4 -5
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +55 -8
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +142 -56
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1 -1
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +1 -2
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +21 -20
  51. package/deps/rocksdb/rocksdb/db/convenience.cc +8 -6
  52. package/deps/rocksdb/rocksdb/db/corruption_test.cc +5 -4
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +6 -3
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +260 -220
  55. package/deps/rocksdb/rocksdb/db/db_clip_test.cc +142 -0
  56. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +1 -1
  57. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +333 -27
  58. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +7 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +189 -27
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +23 -10
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +134 -90
  63. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -2
  64. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -3
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -1
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +124 -16
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +10 -0
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +7 -0
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +15 -0
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -5
  71. package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -8
  72. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +54 -3
  73. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +42 -0
  74. package/deps/rocksdb/rocksdb/db/db_options_test.cc +116 -1
  75. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -2
  76. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +3 -2
  77. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +9 -8
  78. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +142 -63
  79. package/deps/rocksdb/rocksdb/db/db_test.cc +28 -7
  80. package/deps/rocksdb/rocksdb/db/db_test2.cc +71 -131
  81. package/deps/rocksdb/rocksdb/db/db_test_util.cc +18 -0
  82. package/deps/rocksdb/rocksdb/db/db_test_util.h +6 -0
  83. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +10 -10
  84. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +25 -0
  85. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +88 -0
  86. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +67 -0
  87. package/deps/rocksdb/rocksdb/db/db_write_test.cc +5 -0
  88. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  89. package/deps/rocksdb/rocksdb/db/experimental.cc +4 -2
  90. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +86 -1
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +15 -2
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -2
  93. package/deps/rocksdb/rocksdb/db/flush_job.cc +21 -14
  94. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -7
  95. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +31 -8
  96. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +21 -19
  97. package/deps/rocksdb/rocksdb/db/internal_stats.cc +42 -12
  98. package/deps/rocksdb/rocksdb/db/internal_stats.h +1 -0
  99. package/deps/rocksdb/rocksdb/db/kv_checksum.h +92 -6
  100. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -2
  101. package/deps/rocksdb/rocksdb/db/log_format.h +8 -4
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -51
  103. package/deps/rocksdb/rocksdb/db/log_reader.h +16 -0
  104. package/deps/rocksdb/rocksdb/db/log_test.cc +125 -4
  105. package/deps/rocksdb/rocksdb/db/log_writer.cc +32 -2
  106. package/deps/rocksdb/rocksdb/db/log_writer.h +16 -0
  107. package/deps/rocksdb/rocksdb/db/memtable.cc +17 -46
  108. package/deps/rocksdb/rocksdb/db/memtable.h +1 -1
  109. package/deps/rocksdb/rocksdb/db/memtable_list.cc +8 -4
  110. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -1
  111. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +5 -4
  113. package/deps/rocksdb/rocksdb/db/repair.cc +38 -11
  114. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -3
  115. package/deps/rocksdb/rocksdb/db/table_cache.cc +68 -51
  116. package/deps/rocksdb/rocksdb/db/table_cache.h +20 -10
  117. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -1
  118. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +6 -3
  119. package/deps/rocksdb/rocksdb/db/version_builder.cc +9 -5
  120. package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
  121. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +140 -120
  122. package/deps/rocksdb/rocksdb/db/version_edit.cc +14 -0
  123. package/deps/rocksdb/rocksdb/db/version_edit.h +12 -4
  124. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +21 -13
  125. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +26 -16
  126. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  127. package/deps/rocksdb/rocksdb/db/version_set.cc +292 -96
  128. package/deps/rocksdb/rocksdb/db/version_set.h +53 -28
  129. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -0
  130. package/deps/rocksdb/rocksdb/db/version_set_test.cc +62 -22
  131. package/deps/rocksdb/rocksdb/db/version_util.h +5 -4
  132. package/deps/rocksdb/rocksdb/db/write_batch.cc +3 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  134. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +119 -27
  135. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +123 -0
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +7 -2
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +34 -0
  139. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  140. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +43 -33
  141. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +29 -17
  142. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  143. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -1
  144. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +85 -50
  145. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +96 -54
  146. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.cc +122 -0
  147. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +206 -0
  148. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +9 -1
  149. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +9 -3
  150. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +322 -92
  151. package/deps/rocksdb/rocksdb/env/env_posix.cc +12 -8
  152. package/deps/rocksdb/rocksdb/env/env_test.cc +31 -0
  153. package/deps/rocksdb/rocksdb/env/mock_env.cc +1 -1
  154. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +14 -0
  155. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  156. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +5 -1
  157. package/deps/rocksdb/rocksdb/file/file_util.cc +3 -3
  158. package/deps/rocksdb/rocksdb/file/file_util.h +2 -0
  159. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +89 -0
  160. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +22 -7
  161. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -2
  162. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +1 -1
  163. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +1 -1
  164. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +1 -1
  165. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +3 -0
  166. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +154 -74
  167. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +27 -7
  168. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +107 -28
  169. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -0
  170. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +8 -0
  171. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  172. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +7 -1
  173. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +137 -152
  174. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +61 -26
  175. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +30 -26
  176. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +33 -16
  177. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +87 -8
  178. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +1 -1
  179. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  180. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +1 -0
  182. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  183. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -1
  184. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  185. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +9 -2
  186. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -0
  187. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +78 -42
  188. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +14 -9
  189. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
  190. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
  191. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +4 -9
  192. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +19 -11
  193. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
  194. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +211 -555
  195. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +1 -1
  196. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +36 -2
  197. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +17 -7
  198. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +10 -7
  199. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +19 -18
  200. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +10 -2
  201. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
  202. package/deps/rocksdb/rocksdb/options/cf_options.cc +35 -2
  203. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  204. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1 -1
  205. package/deps/rocksdb/rocksdb/options/options.cc +12 -53
  206. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  207. package/deps/rocksdb/rocksdb/options/options_parser.cc +11 -0
  208. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +32 -4
  209. package/deps/rocksdb/rocksdb/options/options_test.cc +89 -5
  210. package/deps/rocksdb/rocksdb/port/lang.h +27 -0
  211. package/deps/rocksdb/rocksdb/port/stack_trace.cc +67 -24
  212. package/deps/rocksdb/rocksdb/src.mk +2 -0
  213. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -3
  214. package/deps/rocksdb/rocksdb/table/block_based/block.cc +195 -35
  215. package/deps/rocksdb/rocksdb/table/block_based/block.h +197 -24
  216. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +71 -51
  217. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -1
  218. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +4 -6
  219. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +3 -0
  220. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +43 -2
  221. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +36 -6
  222. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +266 -166
  223. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +44 -14
  224. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -1
  225. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +63 -56
  226. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +8 -2
  227. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +4 -2
  228. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +10 -0
  229. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +14 -2
  230. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +918 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -2
  232. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -9
  233. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -8
  234. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +2 -2
  235. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +1 -1
  236. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +18 -23
  237. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +8 -8
  238. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -32
  239. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +7 -8
  240. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +4 -5
  241. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +3 -3
  242. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +46 -53
  243. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +12 -12
  244. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +7 -9
  245. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +26 -23
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -1
  247. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +3 -0
  248. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -2
  249. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +3 -2
  250. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +7 -1
  251. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  252. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  253. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +3 -2
  254. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +5 -2
  255. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +4 -2
  256. package/deps/rocksdb/rocksdb/table/format.cc +4 -4
  257. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  258. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -1
  259. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +33 -22
  260. package/deps/rocksdb/rocksdb/table/meta_blocks.h +4 -0
  261. package/deps/rocksdb/rocksdb/table/mock_table.cc +4 -2
  262. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +1 -1
  263. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +1 -1
  264. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +18 -10
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -3
  266. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +10 -7
  267. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +4 -2
  268. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +11 -0
  269. package/deps/rocksdb/rocksdb/table/table_builder.h +14 -5
  270. package/deps/rocksdb/rocksdb/table/table_properties.cc +2 -0
  271. package/deps/rocksdb/rocksdb/table/table_reader.h +6 -3
  272. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +1 -1
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +291 -34
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +3 -1
  275. package/deps/rocksdb/rocksdb/test_util/testharness.h +5 -0
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -2
  277. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +33 -17
  278. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -1
  279. package/deps/rocksdb/rocksdb/util/bloom_impl.h +2 -2
  280. package/deps/rocksdb/rocksdb/util/compression.h +1 -1
  281. package/deps/rocksdb/rocksdb/util/crc32c.cc +24 -83
  282. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +7 -9
  283. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +4 -1
  284. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  285. package/deps/rocksdb/rocksdb/util/gflags_compat.h +9 -10
  286. package/deps/rocksdb/rocksdb/util/math.h +12 -7
  287. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +16 -18
  288. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +46 -2
  289. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +6 -6
  290. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +12 -7
  291. package/deps/rocksdb/rocksdb/util/stop_watch.h +31 -13
  292. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -0
  293. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
  294. package/deps/rocksdb/rocksdb/util/udt_util.h +77 -0
  295. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +2 -2
  296. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -1
  297. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +1 -1
  298. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +1 -1
  299. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  300. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -1
  301. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +11 -1
  302. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +34 -1
  303. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +15 -0
  304. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +1 -1
  305. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +5 -1
  306. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +29 -1
  307. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +0 -1
  308. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -1
  309. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +6 -1
  310. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +10 -0
  311. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +6 -1
  312. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +5 -0
  313. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -0
  314. package/package.json +1 -1
  315. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  316. package/prebuilds/linux-x64/node.napi.node +0 -0
  317. /package/deps/rocksdb/rocksdb/memory/{memory_allocator.h → memory_allocator_impl.h} +0 -0
  318. /package/deps/rocksdb/rocksdb/monitoring/{statistics.h → statistics_impl.h} +0 -0
  319. /package/deps/rocksdb/rocksdb/table/block_based/{flush_block_policy.h → flush_block_policy_impl.h} +0 -0
  320. /package/deps/rocksdb/rocksdb/util/{rate_limiter.h → rate_limiter_impl.h} +0 -0
  321. /package/deps/rocksdb/rocksdb/utilities/agg_merge/{agg_merge.h → agg_merge_impl.h} +0 -0
@@ -164,6 +164,54 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
164
164
  }
165
165
  break;
166
166
 
167
+ case kSetCompressionType: {
168
+ if (compression_type_record_read_) {
169
+ ReportCorruption(fragment.size(),
170
+ "read multiple SetCompressionType records");
171
+ }
172
+ if (first_record_read_) {
173
+ ReportCorruption(fragment.size(),
174
+ "SetCompressionType not the first record");
175
+ }
176
+ prospective_record_offset = physical_record_offset;
177
+ scratch->clear();
178
+ last_record_offset_ = prospective_record_offset;
179
+ CompressionTypeRecord compression_record(kNoCompression);
180
+ Status s = compression_record.DecodeFrom(&fragment);
181
+ if (!s.ok()) {
182
+ ReportCorruption(fragment.size(),
183
+ "could not decode SetCompressionType record");
184
+ } else {
185
+ InitCompression(compression_record);
186
+ }
187
+ break;
188
+ }
189
+ case kUserDefinedTimestampSizeType:
190
+ case kRecyclableUserDefinedTimestampSizeType: {
191
+ if (in_fragmented_record && !scratch->empty()) {
192
+ ReportCorruption(
193
+ scratch->size(),
194
+ "user-defined timestamp size record interspersed partial record");
195
+ }
196
+ prospective_record_offset = physical_record_offset;
197
+ scratch->clear();
198
+ last_record_offset_ = prospective_record_offset;
199
+ UserDefinedTimestampSizeRecord ts_record;
200
+ Status s = ts_record.DecodeFrom(&fragment);
201
+ if (!s.ok()) {
202
+ ReportCorruption(
203
+ fragment.size(),
204
+ "could not decode user-defined timestamp size record");
205
+ } else {
206
+ s = UpdateRecordedTimestampSize(
207
+ ts_record.GetUserDefinedTimestampSize());
208
+ if (!s.ok()) {
209
+ ReportCorruption(fragment.size(), s.getState());
210
+ }
211
+ }
212
+ break;
213
+ }
214
+
167
215
  case kBadHeader:
168
216
  if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
169
217
  wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
@@ -257,29 +305,6 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
257
305
  }
258
306
  break;
259
307
 
260
- case kSetCompressionType: {
261
- if (compression_type_record_read_) {
262
- ReportCorruption(fragment.size(),
263
- "read multiple SetCompressionType records");
264
- }
265
- if (first_record_read_) {
266
- ReportCorruption(fragment.size(),
267
- "SetCompressionType not the first record");
268
- }
269
- prospective_record_offset = physical_record_offset;
270
- scratch->clear();
271
- last_record_offset_ = prospective_record_offset;
272
- CompressionTypeRecord compression_record(kNoCompression);
273
- Status s = compression_record.DecodeFrom(&fragment);
274
- if (!s.ok()) {
275
- ReportCorruption(fragment.size(),
276
- "could not decode SetCompressionType record");
277
- } else {
278
- InitCompression(compression_record);
279
- }
280
- break;
281
- }
282
-
283
308
  default: {
284
309
  char buf[40];
285
310
  snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
@@ -444,7 +469,8 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size,
444
469
  const unsigned int type = header[6];
445
470
  const uint32_t length = a | (b << 8);
446
471
  int header_size = kHeaderSize;
447
- if (type >= kRecyclableFullType && type <= kRecyclableLastType) {
472
+ if ((type >= kRecyclableFullType && type <= kRecyclableLastType) ||
473
+ type == kRecyclableUserDefinedTimestampSizeType) {
448
474
  if (end_of_buffer_offset_ - buffer_.size() == 0) {
449
475
  recycled_ = true;
450
476
  }
@@ -500,7 +526,9 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size,
500
526
 
501
527
  buffer_.remove_prefix(header_size + length);
502
528
 
503
- if (!uncompress_ || type == kSetCompressionType) {
529
+ if (!uncompress_ || type == kSetCompressionType ||
530
+ type == kUserDefinedTimestampSizeType ||
531
+ type == kRecyclableUserDefinedTimestampSizeType) {
504
532
  *result = Slice(header + header_size, length);
505
533
  return type;
506
534
  } else {
@@ -567,6 +595,26 @@ void Reader::InitCompression(const CompressionTypeRecord& compression_record) {
567
595
  assert(uncompressed_buffer_);
568
596
  }
569
597
 
598
+ Status Reader::UpdateRecordedTimestampSize(
599
+ const std::vector<std::pair<uint32_t, size_t>>& cf_to_ts_sz) {
600
+ for (const auto& [cf, ts_sz] : cf_to_ts_sz) {
601
+ // Zero user-defined timestamp size are not recorded.
602
+ if (ts_sz == 0) {
603
+ return Status::Corruption(
604
+ "User-defined timestamp size record contains zero timestamp size.");
605
+ }
606
+ // The user-defined timestamp size record for a column family should not be
607
+ // updated in the same log file.
608
+ if (recorded_cf_to_ts_sz_.count(cf) != 0) {
609
+ return Status::Corruption(
610
+ "User-defined timestamp size record contains update to "
611
+ "recorded column family.");
612
+ }
613
+ recorded_cf_to_ts_sz_.insert(std::make_pair(cf, ts_sz));
614
+ }
615
+ return Status::OK();
616
+ }
617
+
570
618
  bool FragmentBufferedReader::ReadRecord(Slice* record, std::string* scratch,
571
619
  WALRecoveryMode /*unused*/,
572
620
  uint64_t* /* checksum */) {
@@ -635,30 +683,6 @@ bool FragmentBufferedReader::ReadRecord(Slice* record, std::string* scratch,
635
683
  }
636
684
  break;
637
685
 
638
- case kBadHeader:
639
- case kBadRecord:
640
- case kEof:
641
- case kOldRecord:
642
- if (in_fragmented_record_) {
643
- ReportCorruption(fragments_.size(), "error in middle of record");
644
- in_fragmented_record_ = false;
645
- fragments_.clear();
646
- }
647
- break;
648
-
649
- case kBadRecordChecksum:
650
- if (recycled_) {
651
- fragments_.clear();
652
- return false;
653
- }
654
- ReportCorruption(drop_size, "checksum mismatch");
655
- if (in_fragmented_record_) {
656
- ReportCorruption(fragments_.size(), "error in middle of record");
657
- in_fragmented_record_ = false;
658
- fragments_.clear();
659
- }
660
- break;
661
-
662
686
  case kSetCompressionType: {
663
687
  if (compression_type_record_read_) {
664
688
  ReportCorruption(fragment.size(),
@@ -683,6 +707,57 @@ bool FragmentBufferedReader::ReadRecord(Slice* record, std::string* scratch,
683
707
  break;
684
708
  }
685
709
 
710
+ case kUserDefinedTimestampSizeType:
711
+ case kRecyclableUserDefinedTimestampSizeType: {
712
+ if (in_fragmented_record_ && !scratch->empty()) {
713
+ ReportCorruption(
714
+ scratch->size(),
715
+ "user-defined timestamp size record interspersed partial record");
716
+ }
717
+ fragments_.clear();
718
+ prospective_record_offset = physical_record_offset;
719
+ last_record_offset_ = prospective_record_offset;
720
+ in_fragmented_record_ = false;
721
+ UserDefinedTimestampSizeRecord ts_record;
722
+ Status s = ts_record.DecodeFrom(&fragment);
723
+ if (!s.ok()) {
724
+ ReportCorruption(
725
+ fragment.size(),
726
+ "could not decode user-defined timestamp size record");
727
+ } else {
728
+ s = UpdateRecordedTimestampSize(
729
+ ts_record.GetUserDefinedTimestampSize());
730
+ if (!s.ok()) {
731
+ ReportCorruption(fragment.size(), s.getState());
732
+ }
733
+ }
734
+ break;
735
+ }
736
+
737
+ case kBadHeader:
738
+ case kBadRecord:
739
+ case kEof:
740
+ case kOldRecord:
741
+ if (in_fragmented_record_) {
742
+ ReportCorruption(fragments_.size(), "error in middle of record");
743
+ in_fragmented_record_ = false;
744
+ fragments_.clear();
745
+ }
746
+ break;
747
+
748
+ case kBadRecordChecksum:
749
+ if (recycled_) {
750
+ fragments_.clear();
751
+ return false;
752
+ }
753
+ ReportCorruption(drop_size, "checksum mismatch");
754
+ if (in_fragmented_record_) {
755
+ ReportCorruption(fragments_.size(), "error in middle of record");
756
+ in_fragmented_record_ = false;
757
+ fragments_.clear();
758
+ }
759
+ break;
760
+
686
761
  default: {
687
762
  char buf[40];
688
763
  snprintf(buf, sizeof(buf), "unknown record type %u",
@@ -770,7 +845,8 @@ bool FragmentBufferedReader::TryReadFragment(
770
845
  const unsigned int type = header[6];
771
846
  const uint32_t length = a | (b << 8);
772
847
  int header_size = kHeaderSize;
773
- if (type >= kRecyclableFullType && type <= kRecyclableLastType) {
848
+ if ((type >= kRecyclableFullType && type <= kRecyclableLastType) ||
849
+ type == kRecyclableUserDefinedTimestampSizeType) {
774
850
  if (end_of_buffer_offset_ - buffer_.size() == 0) {
775
851
  recycled_ = true;
776
852
  }
@@ -822,7 +898,9 @@ bool FragmentBufferedReader::TryReadFragment(
822
898
 
823
899
  buffer_.remove_prefix(header_size + length);
824
900
 
825
- if (!uncompress_ || type == kSetCompressionType) {
901
+ if (!uncompress_ || type == kSetCompressionType ||
902
+ type == kUserDefinedTimestampSizeType ||
903
+ type == kRecyclableUserDefinedTimestampSizeType) {
826
904
  *fragment = Slice(header + header_size, length);
827
905
  *fragment_type_or_err = type;
828
906
  return true;
@@ -11,6 +11,8 @@
11
11
  #include <stdint.h>
12
12
 
13
13
  #include <memory>
14
+ #include <unordered_map>
15
+ #include <vector>
14
16
 
15
17
  #include "db/log_format.h"
16
18
  #include "file/sequence_file_reader.h"
@@ -18,6 +20,7 @@
18
20
  #include "rocksdb/slice.h"
19
21
  #include "rocksdb/status.h"
20
22
  #include "util/compression.h"
23
+ #include "util/udt_util.h"
21
24
  #include "util/xxhash.h"
22
25
 
23
26
  namespace ROCKSDB_NAMESPACE {
@@ -74,6 +77,12 @@ class Reader {
74
77
  WALRecoveryMode::kTolerateCorruptedTailRecords,
75
78
  uint64_t* record_checksum = nullptr);
76
79
 
80
+ // Return the recorded user-defined timestamp size that have been read so
81
+ // far. This only applies to WAL logs.
82
+ const std::unordered_map<uint32_t, size_t>& GetRecordedTimestampSize() const {
83
+ return recorded_cf_to_ts_sz_;
84
+ }
85
+
77
86
  // Returns the physical offset of the last record returned by ReadRecord.
78
87
  //
79
88
  // Undefined before the first call to ReadRecord.
@@ -154,6 +163,10 @@ class Reader {
154
163
  // Used for stream hashing uncompressed buffer in ReadPhysicalRecord()
155
164
  XXH3_state_t* uncompress_hash_state_;
156
165
 
166
+ // The recorded user-defined timestamp sizes that have been read so far. This
167
+ // is only for WAL logs.
168
+ std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
169
+
157
170
  // Extend record types with the following special values
158
171
  enum {
159
172
  kEof = kMaxRecordType + 1,
@@ -190,6 +203,9 @@ class Reader {
190
203
  void ReportDrop(size_t bytes, const Status& reason);
191
204
 
192
205
  void InitCompression(const CompressionTypeRecord& compression_record);
206
+
207
+ Status UpdateRecordedTimestampSize(
208
+ const std::vector<std::pair<uint32_t, size_t>>& cf_to_ts_sz);
193
209
  };
194
210
 
195
211
  class FragmentBufferedReader : public Reader {
@@ -45,9 +45,10 @@ static std::string RandomSkewedString(int i, Random* rnd) {
45
45
  return BigString(NumberString(i), rnd->Skewed(17));
46
46
  }
47
47
 
48
- // Param type is tuple<int, bool>
48
+ // Param type is tuple<int, bool, CompressionType>
49
49
  // get<0>(tuple): non-zero if recycling log, zero if regular log
50
50
  // get<1>(tuple): true if allow retry after read EOF, false otherwise
51
+ // get<2>(tuple): type of compression used
51
52
  class LogTest
52
53
  : public ::testing::TestWithParam<std::tuple<int, bool, CompressionType>> {
53
54
  private:
@@ -181,20 +182,30 @@ class LogTest
181
182
 
182
183
  Slice* get_reader_contents() { return &reader_contents_; }
183
184
 
184
- void Write(const std::string& msg) {
185
+ void Write(
186
+ const std::string& msg,
187
+ const std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
188
+ if (cf_to_ts_sz != nullptr && !cf_to_ts_sz->empty()) {
189
+ ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(*cf_to_ts_sz));
190
+ }
185
191
  ASSERT_OK(writer_->AddRecord(Slice(msg)));
186
192
  }
187
193
 
188
194
  size_t WrittenBytes() const { return dest_contents().size(); }
189
195
 
190
- std::string Read(const WALRecoveryMode wal_recovery_mode =
191
- WALRecoveryMode::kTolerateCorruptedTailRecords) {
196
+ std::string Read(
197
+ const WALRecoveryMode wal_recovery_mode =
198
+ WALRecoveryMode::kTolerateCorruptedTailRecords,
199
+ std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
192
200
  std::string scratch;
193
201
  Slice record;
194
202
  bool ret = false;
195
203
  uint64_t record_checksum;
196
204
  ret = reader_->ReadRecord(&record, &scratch, wal_recovery_mode,
197
205
  &record_checksum);
206
+ if (cf_to_ts_sz != nullptr) {
207
+ *cf_to_ts_sz = reader_->GetRecordedTimestampSize();
208
+ }
198
209
  if (ret) {
199
210
  if (!allow_retry_read_) {
200
211
  // allow_retry_read_ means using FragmentBufferedReader which does not
@@ -257,6 +268,17 @@ class LogTest
257
268
  return "OK";
258
269
  }
259
270
  }
271
+
272
+ void CheckRecordAndTimestampSize(
273
+ std::string record,
274
+ std::unordered_map<uint32_t, size_t>& expected_ts_sz) {
275
+ std::unordered_map<uint32_t, size_t> recorded_ts_sz;
276
+ ASSERT_EQ(record,
277
+ Read(WALRecoveryMode::
278
+ kTolerateCorruptedTailRecords /* wal_recovery_mode */,
279
+ &recorded_ts_sz));
280
+ EXPECT_EQ(expected_ts_sz, recorded_ts_sz);
281
+ }
260
282
  };
261
283
 
262
284
  TEST_P(LogTest, Empty) { ASSERT_EQ("EOF", Read()); }
@@ -274,6 +296,43 @@ TEST_P(LogTest, ReadWrite) {
274
296
  ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
275
297
  }
276
298
 
299
+ TEST_P(LogTest, ReadWriteWithTimestampSize) {
300
+ std::unordered_map<uint32_t, size_t> ts_sz_one = {
301
+ {1, sizeof(uint64_t)},
302
+ };
303
+ Write("foo", &ts_sz_one);
304
+ Write("bar");
305
+ std::unordered_map<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
306
+ Write("", &ts_sz_two);
307
+ Write("xxxx");
308
+
309
+ CheckRecordAndTimestampSize("foo", ts_sz_one);
310
+ CheckRecordAndTimestampSize("bar", ts_sz_one);
311
+ std::unordered_map<uint32_t, size_t> expected_ts_sz_two;
312
+ // User-defined timestamp size records are accumulated and applied to
313
+ // subsequent records.
314
+ expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
315
+ expected_ts_sz_two.insert(ts_sz_two.begin(), ts_sz_two.end());
316
+ CheckRecordAndTimestampSize("", expected_ts_sz_two);
317
+ CheckRecordAndTimestampSize("xxxx", expected_ts_sz_two);
318
+ ASSERT_EQ("EOF", Read());
319
+ ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
320
+ }
321
+
322
+ TEST_P(LogTest, ReadWriteWithTimestampSizeZeroTimestampIgnored) {
323
+ std::unordered_map<uint32_t, size_t> ts_sz_one = {{1, sizeof(uint64_t)}};
324
+ Write("foo", &ts_sz_one);
325
+ std::unordered_map<uint32_t, size_t> ts_sz_two(ts_sz_one.begin(),
326
+ ts_sz_one.end());
327
+ ts_sz_two.insert(std::make_pair(2, 0));
328
+ Write("bar", &ts_sz_two);
329
+
330
+ CheckRecordAndTimestampSize("foo", ts_sz_one);
331
+ CheckRecordAndTimestampSize("bar", ts_sz_one);
332
+ ASSERT_EQ("EOF", Read());
333
+ ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
334
+ }
335
+
277
336
  TEST_P(LogTest, ManyBlocks) {
278
337
  for (int i = 0; i < 100000; i++) {
279
338
  Write(NumberString(i));
@@ -685,6 +744,39 @@ TEST_P(LogTest, Recycle) {
685
744
  ASSERT_EQ("EOF", Read());
686
745
  }
687
746
 
747
+ TEST_P(LogTest, RecycleWithTimestampSize) {
748
+ bool recyclable_log = (std::get<0>(GetParam()) != 0);
749
+ if (!recyclable_log) {
750
+ return; // test is only valid for recycled logs
751
+ }
752
+ std::unordered_map<uint32_t, size_t> ts_sz_one = {
753
+ {1, sizeof(uint32_t)},
754
+ };
755
+ Write("foo", &ts_sz_one);
756
+ Write("bar");
757
+ Write("baz");
758
+ Write("bif");
759
+ Write("blitz");
760
+ while (get_reader_contents()->size() < log::kBlockSize * 2) {
761
+ Write("xxxxxxxxxxxxxxxx");
762
+ }
763
+ std::unique_ptr<FSWritableFile> sink(
764
+ new test::OverwritingStringSink(get_reader_contents()));
765
+ std::unique_ptr<WritableFileWriter> dest_holder(new WritableFileWriter(
766
+ std::move(sink), "" /* don't care */, FileOptions()));
767
+ Writer recycle_writer(std::move(dest_holder), 123, true);
768
+ std::unordered_map<uint32_t, size_t> ts_sz_two = {
769
+ {2, sizeof(uint64_t)},
770
+ };
771
+ ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(ts_sz_two));
772
+ ASSERT_OK(recycle_writer.AddRecord(Slice("foooo")));
773
+ ASSERT_OK(recycle_writer.AddRecord(Slice("bar")));
774
+ ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2);
775
+ CheckRecordAndTimestampSize("foooo", ts_sz_two);
776
+ CheckRecordAndTimestampSize("bar", ts_sz_two);
777
+ ASSERT_EQ("EOF", Read());
778
+ }
779
+
688
780
  // Do NOT enable compression for this instantiation.
689
781
  INSTANTIATE_TEST_CASE_P(
690
782
  Log, LogTest,
@@ -940,6 +1032,35 @@ TEST_P(CompressionLogTest, ReadWrite) {
940
1032
  ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
941
1033
  }
942
1034
 
1035
+ TEST_P(CompressionLogTest, ReadWriteWithTimestampSize) {
1036
+ CompressionType compression_type = std::get<2>(GetParam());
1037
+ if (!StreamingCompressionTypeSupported(compression_type)) {
1038
+ ROCKSDB_GTEST_SKIP("Test requires support for compression type");
1039
+ return;
1040
+ }
1041
+ ASSERT_OK(SetupTestEnv());
1042
+ std::unordered_map<uint32_t, size_t> ts_sz_one = {
1043
+ {1, sizeof(uint64_t)},
1044
+ };
1045
+ Write("foo", &ts_sz_one);
1046
+ Write("bar");
1047
+ std::unordered_map<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
1048
+ Write("", &ts_sz_two);
1049
+ Write("xxxx");
1050
+
1051
+ CheckRecordAndTimestampSize("foo", ts_sz_one);
1052
+ CheckRecordAndTimestampSize("bar", ts_sz_one);
1053
+ std::unordered_map<uint32_t, size_t> expected_ts_sz_two;
1054
+ // User-defined timestamp size records are accumulated and applied to
1055
+ // subsequent records.
1056
+ expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
1057
+ expected_ts_sz_two.insert(ts_sz_two.begin(), ts_sz_two.end());
1058
+ CheckRecordAndTimestampSize("", expected_ts_sz_two);
1059
+ CheckRecordAndTimestampSize("xxxx", expected_ts_sz_two);
1060
+ ASSERT_EQ("EOF", Read());
1061
+ ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
1062
+ }
1063
+
943
1064
  TEST_P(CompressionLogTest, ManyBlocks) {
944
1065
  CompressionType compression_type = std::get<2>(GetParam());
945
1066
  if (!StreamingCompressionTypeSupported(compression_type)) {
@@ -16,6 +16,7 @@
16
16
  #include "rocksdb/io_status.h"
17
17
  #include "util/coding.h"
18
18
  #include "util/crc32c.h"
19
+ #include "util/udt_util.h"
19
20
 
20
21
  namespace ROCKSDB_NAMESPACE {
21
22
  namespace log {
@@ -73,7 +74,6 @@ IOStatus Writer::AddRecord(const Slice& slice,
73
74
  // Fragment the record if necessary and emit it. Note that if slice
74
75
  // is empty, we still want to iterate once to emit a single
75
76
  // zero-length record
76
- IOStatus s;
77
77
  bool begin = true;
78
78
  int compress_remaining = 0;
79
79
  bool compress_start = false;
@@ -81,6 +81,8 @@ IOStatus Writer::AddRecord(const Slice& slice,
81
81
  compress_->Reset();
82
82
  compress_start = true;
83
83
  }
84
+
85
+ IOStatus s;
84
86
  do {
85
87
  const int64_t leftover = kBlockSize - block_offset_;
86
88
  assert(leftover >= 0);
@@ -194,6 +196,33 @@ IOStatus Writer::AddCompressionTypeRecord() {
194
196
  return s;
195
197
  }
196
198
 
199
+ IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
200
+ const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
201
+ Env::IOPriority rate_limiter_priority) {
202
+ std::vector<std::pair<uint32_t, size_t>> ts_sz_to_record;
203
+ for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) {
204
+ if (recorded_cf_to_ts_sz_.count(cf_id) != 0) {
205
+ // A column family's user-defined timestamp size should not be
206
+ // updated while DB is running.
207
+ assert(recorded_cf_to_ts_sz_[cf_id] == ts_sz);
208
+ } else if (ts_sz != 0) {
209
+ ts_sz_to_record.emplace_back(cf_id, ts_sz);
210
+ recorded_cf_to_ts_sz_.insert(std::make_pair(cf_id, ts_sz));
211
+ }
212
+ }
213
+ if (ts_sz_to_record.empty()) {
214
+ return IOStatus::OK();
215
+ }
216
+
217
+ UserDefinedTimestampSizeRecord record(std::move(ts_sz_to_record));
218
+ std::string encoded;
219
+ record.EncodeTo(&encoded);
220
+ RecordType type = recycle_log_files_ ? kRecyclableUserDefinedTimestampSizeType
221
+ : kUserDefinedTimestampSizeType;
222
+ return EmitPhysicalRecord(type, encoded.data(), encoded.size(),
223
+ rate_limiter_priority);
224
+ }
225
+
197
226
  bool Writer::BufferIsEmpty() { return dest_->BufferIsEmpty(); }
198
227
 
199
228
  IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n,
@@ -209,7 +238,8 @@ IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n,
209
238
  buf[6] = static_cast<char>(t);
210
239
 
211
240
  uint32_t crc = type_crc_[t];
212
- if (t < kRecyclableFullType || t == kSetCompressionType) {
241
+ if (t < kRecyclableFullType || t == kSetCompressionType ||
242
+ t == kUserDefinedTimestampSizeType) {
213
243
  // Legacy record format
214
244
  assert(block_offset_ + kHeaderSize + n <= kBlockSize);
215
245
  header_size = kHeaderSize;
@@ -10,6 +10,8 @@
10
10
 
11
11
  #include <cstdint>
12
12
  #include <memory>
13
+ #include <unordered_map>
14
+ #include <vector>
13
15
 
14
16
  #include "db/log_format.h"
15
17
  #include "rocksdb/compression_type.h"
@@ -87,6 +89,15 @@ class Writer {
87
89
  Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
88
90
  IOStatus AddCompressionTypeRecord();
89
91
 
92
+ // If there are column families in `cf_to_ts_sz` not included in
93
+ // `recorded_cf_to_ts_sz_` and its user-defined timestamp size is non-zero,
94
+ // adds a record of type kUserDefinedTimestampSizeType or
95
+ // kRecyclableUserDefinedTimestampSizeType for these column families.
96
+ // This timestamp size record applies to all subsequent records.
97
+ IOStatus MaybeAddUserDefinedTimestampSizeRecord(
98
+ const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
99
+ Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
100
+
90
101
  WritableFileWriter* file() { return dest_.get(); }
91
102
  const WritableFileWriter* file() const { return dest_.get(); }
92
103
 
@@ -122,6 +133,11 @@ class Writer {
122
133
  StreamingCompress* compress_;
123
134
  // Reusable compressed output buffer
124
135
  std::unique_ptr<char[]> compressed_buffer_;
136
+
137
+ // The recorded user-defined timestamp size that have been written so far.
138
+ // Since the user-defined timestamp size cannot be changed while the DB is
139
+ // running, existing entry in this map cannot be updated.
140
+ std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
125
141
  };
126
142
 
127
143
  } // namespace log
@@ -26,7 +26,7 @@
26
26
  #include "memory/arena.h"
27
27
  #include "memory/memory_usage.h"
28
28
  #include "monitoring/perf_context_imp.h"
29
- #include "monitoring/statistics.h"
29
+ #include "monitoring/statistics_impl.h"
30
30
  #include "port/lang.h"
31
31
  #include "port/port.h"
32
32
  #include "rocksdb/comparator.h"
@@ -256,7 +256,7 @@ void MemTable::UpdateOldestKeyTime() {
256
256
  }
257
257
 
258
258
  Status MemTable::VerifyEntryChecksum(const char* entry,
259
- size_t protection_bytes_per_key,
259
+ uint32_t protection_bytes_per_key,
260
260
  bool allow_data_in_errors) {
261
261
  if (protection_bytes_per_key == 0) {
262
262
  return Status::OK();
@@ -285,28 +285,11 @@ Status MemTable::VerifyEntryChecksum(const char* entry,
285
285
  Slice value = Slice(value_ptr, value_length);
286
286
 
287
287
  const char* checksum_ptr = value_ptr + value_length;
288
- uint64_t expected = ProtectionInfo64()
289
- .ProtectKVO(user_key, value, type)
290
- .ProtectS(seq)
291
- .GetVal();
292
- bool match = true;
293
- switch (protection_bytes_per_key) {
294
- case 1:
295
- match = static_cast<uint8_t>(checksum_ptr[0]) ==
296
- static_cast<uint8_t>(expected);
297
- break;
298
- case 2:
299
- match = DecodeFixed16(checksum_ptr) == static_cast<uint16_t>(expected);
300
- break;
301
- case 4:
302
- match = DecodeFixed32(checksum_ptr) == static_cast<uint32_t>(expected);
303
- break;
304
- case 8:
305
- match = DecodeFixed64(checksum_ptr) == expected;
306
- break;
307
- default:
308
- assert(false);
309
- }
288
+ bool match =
289
+ ProtectionInfo64()
290
+ .ProtectKVO(user_key, value, type)
291
+ .ProtectS(seq)
292
+ .Verify(static_cast<uint8_t>(protection_bytes_per_key), checksum_ptr);
310
293
  if (!match) {
311
294
  std::string msg(
312
295
  "Corrupted memtable entry, per key-value checksum verification "
@@ -526,7 +509,7 @@ class MemTableIterator : public InternalIterator {
526
509
  bool valid_;
527
510
  bool arena_mode_;
528
511
  bool value_pinned_;
529
- size_t protection_bytes_per_key_;
512
+ uint32_t protection_bytes_per_key_;
530
513
  Status status_;
531
514
  Logger* logger_;
532
515
 
@@ -599,6 +582,7 @@ void MemTable::ConstructFragmentedRangeTombstones() {
599
582
  assert(!IsFragmentedRangeTombstonesConstructed(false));
600
583
  // There should be no concurrent Construction
601
584
  if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
585
+ // TODO: plumb Env::IOActivity
602
586
  auto* unfragmented_iter =
603
587
  new MemTableIterator(*this, ReadOptions(), nullptr /* arena */,
604
588
  true /* use_range_del_table */);
@@ -683,28 +667,15 @@ void MemTable::UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
683
667
  return;
684
668
  }
685
669
 
686
- uint64_t checksum = 0;
687
670
  if (kv_prot_info == nullptr) {
688
- checksum =
689
- ProtectionInfo64().ProtectKVO(key, value, type).ProtectS(s).GetVal();
671
+ ProtectionInfo64()
672
+ .ProtectKVO(key, value, type)
673
+ .ProtectS(s)
674
+ .Encode(static_cast<uint8_t>(moptions_.protection_bytes_per_key),
675
+ checksum_ptr);
690
676
  } else {
691
- checksum = kv_prot_info->GetVal();
692
- }
693
- switch (moptions_.protection_bytes_per_key) {
694
- case 1:
695
- checksum_ptr[0] = static_cast<uint8_t>(checksum);
696
- break;
697
- case 2:
698
- EncodeFixed16(checksum_ptr, static_cast<uint16_t>(checksum));
699
- break;
700
- case 4:
701
- EncodeFixed32(checksum_ptr, static_cast<uint32_t>(checksum));
702
- break;
703
- case 8:
704
- EncodeFixed64(checksum_ptr, checksum);
705
- break;
706
- default:
707
- assert(false);
677
+ kv_prot_info->Encode(
678
+ static_cast<uint8_t>(moptions_.protection_bytes_per_key), checksum_ptr);
708
679
  }
709
680
  }
710
681
 
@@ -901,7 +872,7 @@ struct Saver {
901
872
  ReadCallback* callback_;
902
873
  bool* is_blob_index;
903
874
  bool allow_data_in_errors;
904
- size_t protection_bytes_per_key;
875
+ uint32_t protection_bytes_per_key;
905
876
  bool CheckCallback(SequenceNumber _seq) {
906
877
  if (callback_) {
907
878
  return callback_->IsVisible(_seq);