@nxtedition/rocksdb 8.2.8 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -43,6 +43,9 @@ class MemTableListTest : public testing::Test {
43
43
  // Open DB only with default column family
44
44
  ColumnFamilyOptions cf_options;
45
45
  std::vector<ColumnFamilyDescriptor> cf_descs;
46
+ if (udt_enabled_) {
47
+ cf_options.comparator = test::BytewiseComparatorWithU64TsWrapper();
48
+ }
46
49
  cf_descs.emplace_back(kDefaultColumnFamilyName, cf_options);
47
50
  Status s = DB::Open(options, dbname, cf_descs, &handles, &db);
48
51
  EXPECT_OK(s);
@@ -200,6 +203,9 @@ class MemTableListTest : public testing::Test {
200
203
  nullptr /* prep_tracker */, &mutex, file_meta_ptrs,
201
204
  committed_flush_jobs_info, to_delete, nullptr, &log_buffer);
202
205
  }
206
+
207
+ protected:
208
+ bool udt_enabled_ = false;
203
209
  };
204
210
 
205
211
  TEST_F(MemTableListTest, Empty) {
@@ -676,7 +682,7 @@ TEST_F(MemTableListTest, FlushPendingTest) {
676
682
  ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire));
677
683
 
678
684
  // Revert flush
679
- list.RollbackMemtableFlush(to_flush, 0);
685
+ list.RollbackMemtableFlush(to_flush, false);
680
686
  ASSERT_FALSE(list.IsFlushPending());
681
687
  ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire));
682
688
  to_flush.clear();
@@ -726,7 +732,7 @@ TEST_F(MemTableListTest, FlushPendingTest) {
726
732
  ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire));
727
733
 
728
734
  // Rollback first pick of tables
729
- list.RollbackMemtableFlush(to_flush, 0);
735
+ list.RollbackMemtableFlush(to_flush, false);
730
736
  ASSERT_TRUE(list.IsFlushPending());
731
737
  ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire));
732
738
  to_flush.clear();
@@ -868,7 +874,7 @@ TEST_F(MemTableListTest, FlushPendingTest) {
868
874
  to_delete.clear();
869
875
  }
870
876
 
871
- TEST_F(MemTableListTest, EmptyAtomicFlusTest) {
877
+ TEST_F(MemTableListTest, EmptyAtomicFlushTest) {
872
878
  autovector<MemTableList*> lists;
873
879
  autovector<uint32_t> cf_ids;
874
880
  autovector<const MutableCFOptions*> options_list;
@@ -880,7 +886,7 @@ TEST_F(MemTableListTest, EmptyAtomicFlusTest) {
880
886
  ASSERT_TRUE(to_delete.empty());
881
887
  }
882
888
 
883
- TEST_F(MemTableListTest, AtomicFlusTest) {
889
+ TEST_F(MemTableListTest, AtomicFlushTest) {
884
890
  const int num_cfs = 3;
885
891
  const int num_tables_per_cf = 2;
886
892
  SequenceNumber seq = 1;
@@ -1028,6 +1034,86 @@ TEST_F(MemTableListTest, AtomicFlusTest) {
1028
1034
  }
1029
1035
  }
1030
1036
 
1037
+ class MemTableListWithTimestampTest : public MemTableListTest {
1038
+ public:
1039
+ MemTableListWithTimestampTest() : MemTableListTest() {}
1040
+
1041
+ void SetUp() override { udt_enabled_ = true; }
1042
+ };
1043
+
1044
+ TEST_F(MemTableListWithTimestampTest, GetTableNewestUDT) {
1045
+ const int num_tables = 3;
1046
+ const int num_entries = 5;
1047
+ SequenceNumber seq = 1;
1048
+
1049
+ auto factory = std::make_shared<SkipListFactory>();
1050
+ options.memtable_factory = factory;
1051
+ options.persist_user_defined_timestamps = false;
1052
+ ImmutableOptions ioptions(options);
1053
+ const Comparator* ucmp = test::BytewiseComparatorWithU64TsWrapper();
1054
+ InternalKeyComparator cmp(ucmp);
1055
+ WriteBufferManager wb(options.db_write_buffer_size);
1056
+
1057
+ // Create MemTableList
1058
+ int min_write_buffer_number_to_merge = 1;
1059
+ int max_write_buffer_number_to_maintain = 4;
1060
+ int64_t max_write_buffer_size_to_maintain =
1061
+ 4 * static_cast<int>(options.write_buffer_size);
1062
+ MemTableList list(min_write_buffer_number_to_merge,
1063
+ max_write_buffer_number_to_maintain,
1064
+ max_write_buffer_size_to_maintain);
1065
+
1066
+ // Create some MemTables
1067
+ uint64_t memtable_id = 0;
1068
+ std::vector<MemTable*> tables;
1069
+ MutableCFOptions mutable_cf_options(options);
1070
+ uint64_t current_ts = 0;
1071
+ autovector<MemTable*> to_delete;
1072
+ std::vector<std::string> newest_udts;
1073
+
1074
+ std::string key;
1075
+ std::string write_ts;
1076
+ for (int i = 0; i < num_tables; i++) {
1077
+ MemTable* mem = new MemTable(cmp, ioptions, mutable_cf_options, &wb,
1078
+ kMaxSequenceNumber, 0 /* column_family_id */);
1079
+ mem->SetID(memtable_id++);
1080
+ mem->Ref();
1081
+
1082
+ std::string value;
1083
+ MergeContext merge_context;
1084
+
1085
+ for (int j = 0; j < num_entries; j++) {
1086
+ key = "key1";
1087
+ write_ts.clear();
1088
+ PutFixed64(&write_ts, current_ts);
1089
+ key.append(write_ts);
1090
+ ASSERT_OK(mem->Add(++seq, kTypeValue, key, std::to_string(i),
1091
+ nullptr /* kv_prot_info */));
1092
+ current_ts++;
1093
+ }
1094
+
1095
+ tables.push_back(mem);
1096
+ list.Add(tables.back(), &to_delete);
1097
+ newest_udts.push_back(write_ts);
1098
+ }
1099
+
1100
+ ASSERT_EQ(num_tables, list.NumNotFlushed());
1101
+ ASSERT_TRUE(list.IsFlushPending());
1102
+ std::vector<Slice> tables_newest_udts = list.GetTablesNewestUDT(num_tables);
1103
+ ASSERT_EQ(newest_udts.size(), tables_newest_udts.size());
1104
+ for (size_t i = 0; i < tables_newest_udts.size(); i++) {
1105
+ const Slice& table_newest_udt = tables_newest_udts[i];
1106
+ const Slice expected_newest_udt = newest_udts[i];
1107
+ ASSERT_EQ(expected_newest_udt, table_newest_udt);
1108
+ }
1109
+
1110
+ list.current()->Unref(&to_delete);
1111
+ for (MemTable* m : to_delete) {
1112
+ delete m;
1113
+ }
1114
+ to_delete.clear();
1115
+ }
1116
+
1031
1117
  } // namespace ROCKSDB_NAMESPACE
1032
1118
 
1033
1119
  int main(int argc, char** argv) {
@@ -262,7 +262,7 @@ void ProfileQueries(bool enabled_time = false) {
262
262
  for (const int i : keys) {
263
263
  if (i == kFlushFlag) {
264
264
  FlushOptions fo;
265
- db->Flush(fo);
265
+ ASSERT_OK(db->Flush(fo));
266
266
  continue;
267
267
  }
268
268
 
@@ -1111,7 +1111,7 @@ TEST_F(PerfContextTest, MergeOperandCount) {
1111
1111
  verify();
1112
1112
 
1113
1113
  // Verify counters when reading from table files
1114
- db->Flush(FlushOptions());
1114
+ ASSERT_OK(db->Flush(FlushOptions()));
1115
1115
 
1116
1116
  verify();
1117
1117
  }
@@ -123,6 +123,7 @@ class PlainTableDBTest : public testing::Test,
123
123
  // Return the current option configuration.
124
124
  Options CurrentOptions() {
125
125
  Options options;
126
+ options.level_compaction_dynamic_level_bytes = false;
126
127
 
127
128
  PlainTableOptions plain_table_options;
128
129
  plain_table_options.user_key_len = 0;
@@ -157,6 +157,7 @@ class Repairer {
157
157
 
158
158
  VersionEdit edit;
159
159
  edit.SetComparatorName(opts.comparator->Name());
160
+ edit.SetPersistUserDefinedTimestamps(opts.persist_user_defined_timestamps);
160
161
  edit.SetLogNumber(0);
161
162
  edit.SetColumnFamily(cf_id);
162
163
  ColumnFamilyData* cfd;
@@ -394,9 +395,12 @@ class Repairer {
394
395
  auto cf_mems = new ColumnFamilyMemTablesImpl(vset_.GetColumnFamilySet());
395
396
 
396
397
  // Read all the records and add to a memtable
398
+ const UnorderedMap<uint32_t, size_t>& running_ts_sz =
399
+ vset_.GetRunningColumnFamiliesTimestampSize();
397
400
  std::string scratch;
398
401
  Slice record;
399
402
  WriteBatch batch;
403
+
400
404
  int counter = 0;
401
405
  while (reader.ReadRecord(&record, &scratch)) {
402
406
  if (record.size() < WriteBatchInternal::kHeader) {
@@ -406,8 +410,15 @@ class Repairer {
406
410
  }
407
411
  Status record_status = WriteBatchInternal::SetContents(&batch, record);
408
412
  if (record_status.ok()) {
409
- record_status =
410
- WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr);
413
+ const UnorderedMap<uint32_t, size_t>& record_ts_sz =
414
+ reader.GetRecordedTimestampSize();
415
+ record_status = HandleWriteBatchTimestampSizeDifference(
416
+ &batch, running_ts_sz, record_ts_sz,
417
+ TimestampSizeConsistencyMode::kVerifyConsistency);
418
+ if (record_status.ok()) {
419
+ record_status =
420
+ WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr);
421
+ }
411
422
  }
412
423
  if (record_status.ok()) {
413
424
  counter += WriteBatchInternal::Count(&batch);
@@ -550,6 +561,8 @@ class Repairer {
550
561
  AddColumnFamily(props->column_family_name, t->column_family_id);
551
562
  }
552
563
  t->meta.oldest_ancester_time = props->creation_time;
564
+ t->meta.user_defined_timestamps_persisted =
565
+ static_cast<bool>(props->user_defined_timestamps_persisted);
553
566
  }
554
567
  if (status.ok()) {
555
568
  uint64_t tail_size = 0;
@@ -679,7 +692,8 @@ class Repairer {
679
692
  &cfd->internal_comparator(), cfd->user_comparator(),
680
693
  cfd->NumberLevels(), cfd->ioptions()->compaction_style,
681
694
  nullptr /* src_vstorage */, cfd->ioptions()->force_consistency_checks,
682
- EpochNumberRequirement::kMightMissing);
695
+ EpochNumberRequirement::kMightMissing, cfd->ioptions()->clock,
696
+ /*bottommost_file_compaction_delay=*/0);
683
697
  Status s;
684
698
  VersionEdit dummy_edit;
685
699
  for (const auto* table : cf_id_and_tables.second) {
@@ -693,7 +707,8 @@ class Repairer {
693
707
  table->meta.oldest_ancester_time, table->meta.file_creation_time,
694
708
  table->meta.epoch_number, table->meta.file_checksum,
695
709
  table->meta.file_checksum_func_name, table->meta.unique_id,
696
- table->meta.compensated_range_deletion_size, table->meta.tail_size);
710
+ table->meta.compensated_range_deletion_size, table->meta.tail_size,
711
+ table->meta.user_defined_timestamps_persisted);
697
712
  }
698
713
  s = dummy_version_builder.Apply(&dummy_edit);
699
714
  if (s.ok()) {
@@ -707,6 +722,8 @@ class Repairer {
707
722
  // recovered epoch numbers
708
723
  VersionEdit edit;
709
724
  edit.SetComparatorName(cfd->user_comparator()->Name());
725
+ edit.SetPersistUserDefinedTimestamps(
726
+ cfd->ioptions()->persist_user_defined_timestamps);
710
727
  edit.SetLogNumber(0);
711
728
  edit.SetNextFile(next_file_number_);
712
729
  edit.SetColumnFamily(cfd->GetID());
@@ -3,17 +3,17 @@
3
3
  // COPYING file in the root directory) and Apache 2.0 License
4
4
  // (found in the LICENSE.Apache file in the root directory).
5
5
 
6
- #include "rocksdb/options.h"
7
-
8
6
  #include <algorithm>
9
7
  #include <string>
10
8
  #include <vector>
11
9
 
12
10
  #include "db/db_impl/db_impl.h"
13
11
  #include "db/db_test_util.h"
12
+ #include "db/db_with_timestamp_test_util.h"
14
13
  #include "file/file_util.h"
15
14
  #include "rocksdb/comparator.h"
16
15
  #include "rocksdb/db.h"
16
+ #include "rocksdb/options.h"
17
17
  #include "rocksdb/transaction_log.h"
18
18
  #include "table/unique_id_impl.h"
19
19
  #include "util/string_util.h"
@@ -315,6 +315,147 @@ TEST_F(RepairTest, UnflushedSst) {
315
315
  ASSERT_EQ(Get("key"), "val");
316
316
  }
317
317
 
318
+ // Test parameters:
319
+ // param 0): paranoid file check
320
+ // param 1): user-defined timestamp test mode
321
+ class RepairTestWithTimestamp
322
+ : public DBBasicTestWithTimestampBase,
323
+ public testing::WithParamInterface<
324
+ std::tuple<bool, test::UserDefinedTimestampTestMode>> {
325
+ public:
326
+ RepairTestWithTimestamp()
327
+ : DBBasicTestWithTimestampBase("repair_test_with_timestamp") {}
328
+
329
+ Status Put(const Slice& key, const Slice& ts, const Slice& value) {
330
+ WriteOptions write_opts;
331
+ return db_->Put(write_opts, handles_[0], key, ts, value);
332
+ }
333
+
334
+ void CheckGet(const ReadOptions& read_opts, const Slice& key,
335
+ const std::string& expected_value,
336
+ const std::string& expected_ts) {
337
+ std::string actual_value;
338
+ std::string actual_ts;
339
+ ASSERT_OK(db_->Get(read_opts, handles_[0], key, &actual_value, &actual_ts));
340
+ ASSERT_EQ(expected_value, actual_value);
341
+ ASSERT_EQ(expected_ts, actual_ts);
342
+ }
343
+
344
+ void CheckFileBoundaries(const Slice& smallest_user_key,
345
+ const Slice& largest_user_key) {
346
+ std::vector<std::vector<FileMetaData>> level_to_files;
347
+ dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
348
+ &level_to_files);
349
+ ASSERT_GT(level_to_files.size(), 1);
350
+ // L0 only has one SST file.
351
+ ASSERT_EQ(level_to_files[0].size(), 1);
352
+ auto file_meta = level_to_files[0][0];
353
+ ASSERT_EQ(smallest_user_key, file_meta.smallest.user_key());
354
+ ASSERT_EQ(largest_user_key, file_meta.largest.user_key());
355
+ }
356
+ };
357
+
358
+ TEST_P(RepairTestWithTimestamp, UnflushedSst) {
359
+ Destroy(last_options_);
360
+
361
+ bool paranoid_file_checks = std::get<0>(GetParam());
362
+ bool persist_udt = test::ShouldPersistUDT(std::get<1>(GetParam()));
363
+ std::string smallest_ukey_without_ts = "bar";
364
+ std::string largest_ukey_without_ts = "foo";
365
+ Options options = CurrentOptions();
366
+ options.env = env_;
367
+ options.create_if_missing = true;
368
+ std::string min_ts;
369
+ std::string write_ts;
370
+ PutFixed64(&min_ts, 0);
371
+ PutFixed64(&write_ts, 1);
372
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
373
+ options.persist_user_defined_timestamps = persist_udt;
374
+ if (!persist_udt) {
375
+ options.allow_concurrent_memtable_write = false;
376
+ }
377
+ options.paranoid_file_checks = paranoid_file_checks;
378
+
379
+ ColumnFamilyOptions cf_options(options);
380
+ std::vector<ColumnFamilyDescriptor> column_families;
381
+ column_families.push_back(
382
+ ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
383
+
384
+ ASSERT_OK(DB::Open(options, dbname_, column_families, &handles_, &db_));
385
+
386
+ ASSERT_OK(Put(smallest_ukey_without_ts, write_ts,
387
+ smallest_ukey_without_ts + ":val"));
388
+ ASSERT_OK(
389
+ Put(largest_ukey_without_ts, write_ts, largest_ukey_without_ts + ":val"));
390
+ VectorLogPtr wal_files;
391
+ ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
392
+ ASSERT_EQ(wal_files.size(), 1);
393
+ {
394
+ uint64_t total_ssts_size;
395
+ std::unordered_map<std::string, uint64_t> sst_files;
396
+ ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size));
397
+ ASSERT_EQ(total_ssts_size, 0);
398
+ }
399
+ // Need to get path before Close() deletes db_, but delete it after Close() to
400
+ // ensure Close() didn't change the manifest.
401
+ std::string manifest_path =
402
+ DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
403
+
404
+ Close();
405
+ ASSERT_OK(env_->FileExists(manifest_path));
406
+ ASSERT_OK(env_->DeleteFile(manifest_path));
407
+ ASSERT_OK(RepairDB(dbname_, options));
408
+ ASSERT_OK(DB::Open(options, dbname_, column_families, &handles_, &db_));
409
+
410
+ ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
411
+ ASSERT_EQ(wal_files.size(), 0);
412
+ {
413
+ uint64_t total_ssts_size;
414
+ std::unordered_map<std::string, uint64_t> sst_files;
415
+ ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size));
416
+ ASSERT_GT(total_ssts_size, 0);
417
+ }
418
+
419
+ // Check file boundaries are correct for different
420
+ // `persist_user_defined_timestamps` option values.
421
+ if (persist_udt) {
422
+ CheckFileBoundaries(smallest_ukey_without_ts + write_ts,
423
+ largest_ukey_without_ts + write_ts);
424
+ } else {
425
+ CheckFileBoundaries(smallest_ukey_without_ts + min_ts,
426
+ largest_ukey_without_ts + min_ts);
427
+ }
428
+
429
+ ReadOptions read_opts;
430
+ Slice read_ts_slice = write_ts;
431
+ read_opts.timestamp = &read_ts_slice;
432
+ if (persist_udt) {
433
+ CheckGet(read_opts, smallest_ukey_without_ts,
434
+ smallest_ukey_without_ts + ":val", write_ts);
435
+ CheckGet(read_opts, largest_ukey_without_ts,
436
+ largest_ukey_without_ts + ":val", write_ts);
437
+ } else {
438
+ // TODO(yuzhangyu): currently when `persist_user_defined_timestamps` is
439
+ // false, ts is unconditionally stripped during flush.
440
+ // When `full_history_ts_low` is set and respected during flush.
441
+ // We should prohibit reading below `full_history_ts_low` all together.
442
+ CheckGet(read_opts, smallest_ukey_without_ts,
443
+ smallest_ukey_without_ts + ":val", min_ts);
444
+ CheckGet(read_opts, largest_ukey_without_ts,
445
+ largest_ukey_without_ts + ":val", min_ts);
446
+ }
447
+ }
448
+
449
+ // Param 0: paranoid file check
450
+ // Param 1: test mode for the user-defined timestamp feature
451
+ INSTANTIATE_TEST_CASE_P(
452
+ UnflushedSst, RepairTestWithTimestamp,
453
+ ::testing::Combine(
454
+ ::testing::Bool(),
455
+ ::testing::Values(
456
+ test::UserDefinedTimestampTestMode::kStripUserDefinedTimestamp,
457
+ test::UserDefinedTimestampTestMode::kNormal)));
458
+
318
459
  TEST_F(RepairTest, SeparateWalDir) {
319
460
  do {
320
461
  Options options = CurrentOptions();
@@ -93,7 +93,7 @@ TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) {
93
93
  }
94
94
  ASSERT_OK(Flush());
95
95
  }
96
- ASSERT_OK(dbfull()->WaitForCompact());
96
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
97
97
 
98
98
  // All data is hot, only output to penultimate level
99
99
  ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
@@ -114,7 +114,7 @@ TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) {
114
114
  });
115
115
  }
116
116
  ASSERT_OK(Flush());
117
- ASSERT_OK(dbfull()->WaitForCompact());
117
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
118
118
  ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
119
119
  ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
120
120
  }
@@ -128,7 +128,7 @@ TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) {
128
128
  });
129
129
  }
130
130
  ASSERT_OK(Flush());
131
- ASSERT_OK(dbfull()->WaitForCompact());
131
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
132
132
  }
133
133
 
134
134
  CompactRangeOptions cro;
@@ -226,7 +226,8 @@ TEST_F(SeqnoTimeTest, TemperatureBasicLevel) {
226
226
  }
227
227
  ASSERT_OK(Flush());
228
228
  }
229
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
229
+ // Second to last level
230
+ MoveFilesToLevel(5);
230
231
  ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
231
232
  ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
232
233
 
@@ -75,7 +75,7 @@ TableCache::TableCache(const ImmutableOptions& ioptions,
75
75
  cache_(cache),
76
76
  immortal_tables_(false),
77
77
  block_cache_tracer_(block_cache_tracer),
78
- loader_mutex_(kLoadConcurency, kGetSliceNPHash64UnseededFnPtr),
78
+ loader_mutex_(kLoadConcurency),
79
79
  io_tracer_(io_tracer),
80
80
  db_session_id_(db_session_id) {
81
81
  if (ioptions_.row_cache) {
@@ -90,7 +90,7 @@ TableCache::~TableCache() {}
90
90
  Status TableCache::GetTableReader(
91
91
  const ReadOptions& ro, const FileOptions& file_options,
92
92
  const InternalKeyComparator& internal_comparator,
93
- const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats,
93
+ const FileMetaData& file_meta, bool sequential_mode,
94
94
  uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist,
95
95
  std::unique_ptr<TableReader>* table_reader,
96
96
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
@@ -111,13 +111,17 @@ Status TableCache::GetTableReader(
111
111
  RecordTick(ioptions_.stats, NO_FILE_OPENS);
112
112
  } else if (s.IsPathNotFound()) {
113
113
  fname = Rocks2LevelTableFileName(fname);
114
- s = PrepareIOFromReadOptions(ro, ioptions_.clock, fopts.io_options);
115
- if (s.ok()) {
116
- s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
117
- nullptr);
114
+ // If this file is also not found, we want to use the error message
115
+ // that contains the table file name which is less confusing.
116
+ Status temp_s =
117
+ PrepareIOFromReadOptions(ro, ioptions_.clock, fopts.io_options);
118
+ if (temp_s.ok()) {
119
+ temp_s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
120
+ nullptr);
118
121
  }
119
- if (s.ok()) {
122
+ if (temp_s.ok()) {
120
123
  RecordTick(ioptions_.stats, NO_FILE_OPENS);
124
+ s = temp_s;
121
125
  }
122
126
  }
123
127
 
@@ -125,13 +129,17 @@ Status TableCache::GetTableReader(
125
129
  if (!sequential_mode && ioptions_.advise_random_on_open) {
126
130
  file->Hint(FSRandomAccessFile::kRandom);
127
131
  }
132
+ if (ioptions_.default_temperature != Temperature::kUnknown &&
133
+ file_temperature == Temperature::kUnknown) {
134
+ file_temperature = ioptions_.default_temperature;
135
+ }
128
136
  StopWatch sw(ioptions_.clock, ioptions_.stats, TABLE_OPEN_IO_MICROS);
129
137
  std::unique_ptr<RandomAccessFileReader> file_reader(
130
- new RandomAccessFileReader(
131
- std::move(file), fname, ioptions_.clock, io_tracer_,
132
- record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
133
- file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
134
- file_temperature, level == ioptions_.num_levels - 1));
138
+ new RandomAccessFileReader(std::move(file), fname, ioptions_.clock,
139
+ io_tracer_, ioptions_.stats, SST_READ_MICROS,
140
+ file_read_hist, ioptions_.rate_limiter.get(),
141
+ ioptions_.listeners, file_temperature,
142
+ level == ioptions_.num_levels - 1));
135
143
  UniqueId64x2 expected_unique_id;
136
144
  if (ioptions_.verify_sst_unique_id_in_manifest) {
137
145
  expected_unique_id = file_meta.unique_id;
@@ -146,7 +154,8 @@ Status TableCache::GetTableReader(
146
154
  false /* force_direct_prefetch */, level, block_cache_tracer_,
147
155
  max_file_size_for_l0_meta_pin, db_session_id_,
148
156
  file_meta.fd.GetNumber(), expected_unique_id,
149
- file_meta.fd.largest_seqno, file_meta.tail_size),
157
+ file_meta.fd.largest_seqno, file_meta.tail_size,
158
+ file_meta.user_defined_timestamps_persisted),
150
159
  std::move(file_reader), file_meta.fd.GetFileSize(), table_reader,
151
160
  prefetch_index_and_filter_in_cache);
152
161
  TEST_SYNC_POINT("TableCache::GetTableReader:0");
@@ -160,8 +169,8 @@ Status TableCache::FindTable(
160
169
  const FileMetaData& file_meta, TypedHandle** handle,
161
170
  uint8_t block_protection_bytes_per_key,
162
171
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
163
- const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
164
- bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
172
+ const bool no_io, HistogramImpl* file_read_hist, bool skip_filters,
173
+ int level, bool prefetch_index_and_filter_in_cache,
165
174
  size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
166
175
  PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
167
176
  uint64_t number = file_meta.fd.GetNumber();
@@ -174,7 +183,7 @@ Status TableCache::FindTable(
174
183
  if (no_io) {
175
184
  return Status::Incomplete("Table not found in table_cache, no_io is set");
176
185
  }
177
- MutexLock load_lock(loader_mutex_.get(key));
186
+ MutexLock load_lock(&loader_mutex_.Get(key));
178
187
  // We check the cache again under loading mutex
179
188
  *handle = cache_.Lookup(key);
180
189
  if (*handle != nullptr) {
@@ -183,7 +192,7 @@ Status TableCache::FindTable(
183
192
 
184
193
  std::unique_ptr<TableReader> table_reader;
185
194
  Status s = GetTableReader(ro, file_options, internal_comparator, file_meta,
186
- false /* sequential mode */, record_read_stats,
195
+ false /* sequential mode */,
187
196
  block_protection_bytes_per_key, file_read_hist,
188
197
  &table_reader, prefix_extractor, skip_filters,
189
198
  level, prefetch_index_and_filter_in_cache,
@@ -232,8 +241,7 @@ InternalIterator* TableCache::NewIterator(
232
241
  s = FindTable(options, file_options, icomparator, file_meta, &handle,
233
242
  block_protection_bytes_per_key, prefix_extractor,
234
243
  options.read_tier == kBlockCacheTier /* no_io */,
235
- !for_compaction /* record_read_stats */, file_read_hist,
236
- skip_filters, level,
244
+ file_read_hist, skip_filters, level,
237
245
  true /* prefetch_index_and_filter_in_cache */,
238
246
  max_file_size_for_l0_meta_pin, file_meta.temperature);
239
247
  if (s.ok()) {
@@ -438,8 +446,8 @@ Status TableCache::Get(
438
446
  s = FindTable(options, file_options_, internal_comparator, file_meta,
439
447
  &handle, block_protection_bytes_per_key, prefix_extractor,
440
448
  options.read_tier == kBlockCacheTier /* no_io */,
441
- true /* record_read_stats */, file_read_hist, skip_filters,
442
- level, true /* prefetch_index_and_filter_in_cache */,
449
+ file_read_hist, skip_filters, level,
450
+ true /* prefetch_index_and_filter_in_cache */,
443
451
  max_file_size_for_l0_meta_pin, file_meta.temperature);
444
452
  if (s.ok()) {
445
453
  t = cache_.Value(handle);
@@ -480,9 +488,12 @@ Status TableCache::Get(
480
488
  RowCacheInterface row_cache{ioptions_.row_cache.get()};
481
489
  size_t charge = row_cache_entry->capacity() + sizeof(std::string);
482
490
  auto row_ptr = new std::string(std::move(*row_cache_entry));
483
- // If row cache is full, it's OK to continue.
484
- row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge)
485
- .PermitUncheckedError();
491
+ Status rcs = row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge);
492
+ if (!rcs.ok()) {
493
+ // If row cache is full, it's OK to continue, but we keep ownership of
494
+ // row_ptr.
495
+ delete row_ptr;
496
+ }
486
497
  }
487
498
 
488
499
  if (handle != nullptr) {
@@ -541,7 +552,7 @@ Status TableCache::MultiGetFilter(
541
552
  s = FindTable(options, file_options_, internal_comparator, file_meta,
542
553
  &handle, block_protection_bytes_per_key, prefix_extractor,
543
554
  options.read_tier == kBlockCacheTier /* no_io */,
544
- true /* record_read_stats */, file_read_hist,
555
+ file_read_hist,
545
556
  /*skip_filters=*/false, level,
546
557
  true /* prefetch_index_and_filter_in_cache */,
547
558
  /*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
@@ -658,11 +669,10 @@ uint64_t TableCache::ApproximateOffsetOf(
658
669
  TableReader* table_reader = file_meta.fd.table_reader;
659
670
  TypedHandle* table_handle = nullptr;
660
671
  if (table_reader == nullptr) {
661
- const bool for_compaction = (caller == TableReaderCaller::kCompaction);
662
- Status s = FindTable(
663
- read_options, file_options_, internal_comparator, file_meta,
664
- &table_handle, block_protection_bytes_per_key, prefix_extractor,
665
- false /* no_io */, !for_compaction /* record_read_stats */);
672
+ Status s =
673
+ FindTable(read_options, file_options_, internal_comparator, file_meta,
674
+ &table_handle, block_protection_bytes_per_key,
675
+ prefix_extractor, false /* no_io */);
666
676
  if (s.ok()) {
667
677
  table_reader = cache_.Value(table_handle);
668
678
  }
@@ -688,11 +698,10 @@ uint64_t TableCache::ApproximateSize(
688
698
  TableReader* table_reader = file_meta.fd.table_reader;
689
699
  TypedHandle* table_handle = nullptr;
690
700
  if (table_reader == nullptr) {
691
- const bool for_compaction = (caller == TableReaderCaller::kCompaction);
692
- Status s = FindTable(
693
- read_options, file_options_, internal_comparator, file_meta,
694
- &table_handle, block_protection_bytes_per_key, prefix_extractor,
695
- false /* no_io */, !for_compaction /* record_read_stats */);
701
+ Status s =
702
+ FindTable(read_options, file_options_, internal_comparator, file_meta,
703
+ &table_handle, block_protection_bytes_per_key,
704
+ prefix_extractor, false /* no_io */);
696
705
  if (s.ok()) {
697
706
  table_reader = cache_.Value(table_handle);
698
707
  }
@@ -171,9 +171,9 @@ class TableCache {
171
171
  const FileMetaData& file_meta, TypedHandle**,
172
172
  uint8_t block_protection_bytes_per_key,
173
173
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
174
- const bool no_io = false, bool record_read_stats = true,
175
- HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
176
- int level = -1, bool prefetch_index_and_filter_in_cache = true,
174
+ const bool no_io = false, HistogramImpl* file_read_hist = nullptr,
175
+ bool skip_filters = false, int level = -1,
176
+ bool prefetch_index_and_filter_in_cache = true,
177
177
  size_t max_file_size_for_l0_meta_pin = 0,
178
178
  Temperature file_temperature = Temperature::kUnknown);
179
179
 
@@ -243,8 +243,8 @@ class TableCache {
243
243
  const ReadOptions& ro, const FileOptions& file_options,
244
244
  const InternalKeyComparator& internal_comparator,
245
245
  const FileMetaData& file_meta, bool sequential_mode,
246
- bool record_read_stats, uint8_t block_protection_bytes_per_key,
247
- HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
246
+ uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist,
247
+ std::unique_ptr<TableReader>* table_reader,
248
248
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
249
249
  bool skip_filters = false, int level = -1,
250
250
  bool prefetch_index_and_filter_in_cache = true,
@@ -275,7 +275,7 @@ class TableCache {
275
275
  std::string row_cache_id_;
276
276
  bool immortal_tables_;
277
277
  BlockCacheTracer* const block_cache_tracer_;
278
- Striped<port::Mutex, Slice> loader_mutex_;
278
+ Striped<CacheAlignedWrapper<port::Mutex>> loader_mutex_;
279
279
  std::shared_ptr<IOTracer> io_tracer_;
280
280
  std::string db_session_id_;
281
281
  };
@@ -68,8 +68,8 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
68
68
  s = FindTable(options, file_options_, internal_comparator, file_meta,
69
69
  &handle, block_protection_bytes_per_key, prefix_extractor,
70
70
  options.read_tier == kBlockCacheTier /* no_io */,
71
- true /* record_read_stats */, file_read_hist, skip_filters,
72
- level, true /* prefetch_index_and_filter_in_cache */,
71
+ file_read_hist, skip_filters, level,
72
+ true /* prefetch_index_and_filter_in_cache */,
73
73
  0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
74
74
  TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
75
75
  if (s.ok()) {