@nxtedition/rocksdb 8.2.7 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -538,6 +538,56 @@ TEST_F(ExternalSSTFileTest, Basic) {
538
538
  kRangeDelSkipConfigs));
539
539
  }
540
540
 
541
+ TEST_F(ExternalSSTFileTest, BasicWideColumn) {
542
+ do {
543
+ Options options = CurrentOptions();
544
+
545
+ SstFileWriter sst_file_writer(EnvOptions(), options);
546
+
547
+ // Current file size should be 0 after sst_file_writer init and before open
548
+ // a file.
549
+ ASSERT_EQ(sst_file_writer.FileSize(), 0);
550
+
551
+ std::string file = sst_files_dir_ + "wide_column_file.sst";
552
+ ASSERT_OK(sst_file_writer.Open(file));
553
+ for (int k = 0; k < 10; k++) {
554
+ std::string val1 = Key(k) + "_attr_1_val";
555
+ std::string val2 = Key(k) + "_attr_2_val";
556
+ WideColumns columns{{"attr_1", val1}, {"attr_2", val2}};
557
+ ASSERT_OK(sst_file_writer.PutEntity(Key(k), columns));
558
+ }
559
+ ExternalSstFileInfo file_info;
560
+ ASSERT_OK(sst_file_writer.Finish(&file_info));
561
+
562
+ // Current file size should be non-zero after success write.
563
+ ASSERT_GT(sst_file_writer.FileSize(), 0);
564
+
565
+ ASSERT_EQ(file_info.file_path, file);
566
+ ASSERT_EQ(file_info.num_entries, 10);
567
+ ASSERT_EQ(file_info.smallest_key, Key(0));
568
+ ASSERT_EQ(file_info.largest_key, Key(9));
569
+ ASSERT_EQ(file_info.num_range_del_entries, 0);
570
+ ASSERT_EQ(file_info.smallest_range_del_key, "");
571
+ ASSERT_EQ(file_info.largest_range_del_key, "");
572
+
573
+ DestroyAndReopen(options);
574
+ // Add file using file path
575
+ ASSERT_OK(DeprecatedAddFile({file}));
576
+ ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
577
+ for (int k = 0; k < 10; k++) {
578
+ PinnableWideColumns result;
579
+ ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
580
+ Key(k), &result));
581
+ std::string val1 = Key(k) + "_attr_1_val";
582
+ std::string val2 = Key(k) + "_attr_2_val";
583
+ WideColumns expected_columns{{"attr_1", val1}, {"attr_2", val2}};
584
+ ASSERT_EQ(result.columns(), expected_columns);
585
+ }
586
+
587
+ } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction |
588
+ kRangeDelSkipConfigs));
589
+ }
590
+
541
591
  class SstFileWriterCollector : public TablePropertiesCollector {
542
592
  public:
543
593
  explicit SstFileWriterCollector(const std::string prefix) : prefix_(prefix) {
@@ -2160,13 +2210,13 @@ TEST_P(ExternalSSTFileTest, IngestBehind) {
2160
2210
  // Insert 100 -> 200 into the memtable
2161
2211
  for (int i = 100; i <= 200; i++) {
2162
2212
  ASSERT_OK(Put(Key(i), "memtable"));
2163
- true_data[Key(i)] = "memtable";
2164
2213
  }
2165
2214
 
2166
2215
  // Insert 100 -> 200 using IngestExternalFile
2167
2216
  file_data.clear();
2168
2217
  for (int i = 0; i <= 20; i++) {
2169
2218
  file_data.emplace_back(Key(i), "ingest_behind");
2219
+ true_data[Key(i)] = "ingest_behind";
2170
2220
  }
2171
2221
 
2172
2222
  bool allow_global_seqno = true;
@@ -2188,6 +2238,7 @@ TEST_P(ExternalSSTFileTest, IngestBehind) {
2188
2238
 
2189
2239
  options.num_levels = 3;
2190
2240
  DestroyAndReopen(options);
2241
+ true_data.clear();
2191
2242
  // Insert 100 -> 200 into the memtable
2192
2243
  for (int i = 100; i <= 200; i++) {
2193
2244
  ASSERT_OK(Put(Key(i), "memtable"));
@@ -2207,12 +2258,43 @@ TEST_P(ExternalSSTFileTest, IngestBehind) {
2207
2258
  verify_checksums_before_ingest, true /*ingest_behind*/,
2208
2259
  false /*sort_data*/, &true_data));
2209
2260
  ASSERT_EQ("0,1,1", FilesPerLevel());
2261
+ std::vector<std::vector<FileMetaData>> level_to_files;
2262
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2263
+ uint64_t ingested_file_number = level_to_files[2][0].fd.GetNumber();
2210
2264
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2211
- // bottom level should be empty
2212
- ASSERT_EQ("0,1", FilesPerLevel());
2213
-
2265
+ // Last level should not be compacted
2266
+ ASSERT_EQ("0,1,1", FilesPerLevel());
2267
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2268
+ ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2214
2269
  size_t kcnt = 0;
2215
2270
  VerifyDBFromMap(true_data, &kcnt, false);
2271
+
2272
+ // Auto-compaction should not include the last level.
2273
+ // Trigger compaction if size amplification exceeds 110%.
2274
+ options.compaction_options_universal.max_size_amplification_percent = 110;
2275
+ options.level0_file_num_compaction_trigger = 4;
2276
+ ASSERT_OK(TryReopen(options));
2277
+ Random rnd(301);
2278
+ for (int i = 0; i < 4; ++i) {
2279
+ for (int j = 0; j < 10; j++) {
2280
+ true_data[Key(j)] = rnd.RandomString(1000);
2281
+ ASSERT_OK(Put(Key(j), true_data[Key(j)]));
2282
+ }
2283
+ ASSERT_OK(Flush());
2284
+ }
2285
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
2286
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2287
+ ASSERT_EQ(1, level_to_files[2].size());
2288
+ ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2289
+
2290
+ // Turning off the option allows DB to compact ingested files.
2291
+ options.allow_ingest_behind = false;
2292
+ ASSERT_OK(TryReopen(options));
2293
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2294
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2295
+ ASSERT_EQ(1, level_to_files[2].size());
2296
+ ASSERT_NE(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2297
+ VerifyDBFromMap(true_data, &kcnt, false);
2216
2298
  }
2217
2299
 
2218
2300
  TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
@@ -443,7 +443,7 @@ TEST_P(FaultInjectionTest, UninstalledCompaction) {
443
443
  options_.level0_stop_writes_trigger = 1 << 10;
444
444
  options_.level0_slowdown_writes_trigger = 1 << 10;
445
445
  options_.max_background_compactions = 1;
446
- OpenDB();
446
+ ASSERT_OK(OpenDB());
447
447
 
448
448
  if (!sequential_order_) {
449
449
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
@@ -56,17 +56,15 @@ void FileIndexer::GetNextLevelIndex(const size_t level, const size_t file_index,
56
56
  } else if (cmp_smallest == 0) {
57
57
  *left_bound = index.smallest_lb;
58
58
  *right_bound = index.smallest_rb;
59
- } else if (cmp_smallest > 0 && cmp_largest < 0) {
59
+ } else if (cmp_largest < 0) {
60
60
  *left_bound = index.smallest_lb;
61
61
  *right_bound = index.largest_rb;
62
62
  } else if (cmp_largest == 0) {
63
63
  *left_bound = index.largest_lb;
64
64
  *right_bound = index.largest_rb;
65
- } else if (cmp_largest > 0) {
65
+ } else {
66
66
  *left_bound = index.largest_lb;
67
67
  *right_bound = level_rb_[level + 1];
68
- } else {
69
- assert(false);
70
68
  }
71
69
 
72
70
  assert(*left_bound >= 0);
@@ -75,8 +75,12 @@ const char* GetFlushReasonString(FlushReason flush_reason) {
75
75
  return "Manual Flush";
76
76
  case FlushReason::kErrorRecovery:
77
77
  return "Error Recovery";
78
+ case FlushReason::kErrorRecoveryRetryFlush:
79
+ return "Error Recovery Retry Flush";
78
80
  case FlushReason::kWalFull:
79
81
  return "WAL Full";
82
+ case FlushReason::kCatchUpAfterErrorRecovery:
83
+ return "Catch Up After Error Recovery";
80
84
  default:
81
85
  return "Invalid";
82
86
  }
@@ -187,6 +191,10 @@ void FlushJob::PickMemTable() {
187
191
  return;
188
192
  }
189
193
 
194
+ // Track effective cutoff user-defined timestamp during flush if
195
+ // user-defined timestamps can be stripped.
196
+ GetEffectiveCutoffUDTForPickedMemTables();
197
+
190
198
  ReportFlushInputSize(mems_);
191
199
 
192
200
  // entries mems are (implicitly) sorted in ascending order by their created
@@ -209,7 +217,8 @@ void FlushJob::PickMemTable() {
209
217
  }
210
218
 
211
219
  Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta,
212
- bool* switched_to_mempurge) {
220
+ bool* switched_to_mempurge, bool* skipped_since_bg_error,
221
+ ErrorHandler* error_handler) {
213
222
  TEST_SYNC_POINT("FlushJob::Start");
214
223
  db_mutex_->AssertHeld();
215
224
  assert(pick_memtable_called);
@@ -292,18 +301,37 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta,
292
301
  s = Status::ShutdownInProgress("Database shutdown");
293
302
  }
294
303
 
304
+ if (s.ok()) {
305
+ s = MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT();
306
+ }
307
+
295
308
  if (!s.ok()) {
296
- cfd_->imm()->RollbackMemtableFlush(mems_, meta_.fd.GetNumber());
309
+ cfd_->imm()->RollbackMemtableFlush(
310
+ mems_, /*rollback_succeeding_memtables=*/!db_options_.atomic_flush);
297
311
  } else if (write_manifest_) {
298
- TEST_SYNC_POINT("FlushJob::InstallResults");
299
- // Replace immutable memtable with the generated Table
300
- s = cfd_->imm()->TryInstallMemtableFlushResults(
301
- cfd_, mutable_cf_options_, mems_, prep_tracker, versions_, db_mutex_,
302
- meta_.fd.GetNumber(), &job_context_->memtables_to_free, db_directory_,
303
- log_buffer_, &committed_flush_jobs_info_,
304
- !(mempurge_s.ok()) /* write_edit : true if no mempurge happened (or if aborted),
312
+ assert(!db_options_.atomic_flush);
313
+ if (!db_options_.atomic_flush &&
314
+ flush_reason_ != FlushReason::kErrorRecovery &&
315
+ flush_reason_ != FlushReason::kErrorRecoveryRetryFlush &&
316
+ error_handler && !error_handler->GetBGError().ok() &&
317
+ error_handler->IsBGWorkStopped()) {
318
+ cfd_->imm()->RollbackMemtableFlush(
319
+ mems_, /*rollback_succeeding_memtables=*/!db_options_.atomic_flush);
320
+ s = error_handler->GetBGError();
321
+ if (skipped_since_bg_error) {
322
+ *skipped_since_bg_error = true;
323
+ }
324
+ } else {
325
+ TEST_SYNC_POINT("FlushJob::InstallResults");
326
+ // Replace immutable memtable with the generated Table
327
+ s = cfd_->imm()->TryInstallMemtableFlushResults(
328
+ cfd_, mutable_cf_options_, mems_, prep_tracker, versions_, db_mutex_,
329
+ meta_.fd.GetNumber(), &job_context_->memtables_to_free, db_directory_,
330
+ log_buffer_, &committed_flush_jobs_info_,
331
+ !(mempurge_s.ok()) /* write_edit : true if no mempurge happened (or if aborted),
305
332
  but 'false' if mempurge successful: no new min log number
306
333
  or new level 0 file path to write to manifest. */);
334
+ }
307
335
  }
308
336
 
309
337
  if (s.ok() && file_meta != nullptr) {
@@ -352,6 +380,7 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta,
352
380
  << (IOSTATS(cpu_read_nanos) - prev_cpu_read_nanos);
353
381
  }
354
382
 
383
+ TEST_SYNC_POINT("FlushJob::End");
355
384
  return s;
356
385
  }
357
386
 
@@ -479,6 +508,7 @@ Status FlushJob::MemPurge() {
479
508
  nullptr, ioptions->allow_data_in_errors,
480
509
  ioptions->enforce_single_del_contracts,
481
510
  /*manual_compaction_canceled=*/kManualCompactionCanceledFalse,
511
+ false /* must_count_input_entries */,
482
512
  /*compaction=*/nullptr, compaction_filter.get(),
483
513
  /*shutting_down=*/nullptr, ioptions->info_log, full_history_ts_low);
484
514
 
@@ -849,6 +879,7 @@ Status FlushJob::WriteLevel0Table() {
849
879
  uint64_t total_num_entries = 0, total_num_deletes = 0;
850
880
  uint64_t total_data_size = 0;
851
881
  size_t total_memory_usage = 0;
882
+ uint64_t total_num_range_deletes = 0;
852
883
  // Used for testing:
853
884
  uint64_t mems_size = mems_.size();
854
885
  (void)mems_size; // avoids unused variable error when
@@ -871,15 +902,20 @@ Status FlushJob::WriteLevel0Table() {
871
902
  total_num_deletes += m->num_deletes();
872
903
  total_data_size += m->get_data_size();
873
904
  total_memory_usage += m->ApproximateMemoryUsage();
905
+ total_num_range_deletes += m->num_range_deletes();
874
906
  }
875
907
 
908
+ // TODO(cbi): when memtable is flushed due to number of range deletions
909
+ // hitting limit memtable_max_range_deletions, flush_reason_ is still
910
+ // "Write Buffer Full", should make update flush_reason_ accordingly.
876
911
  event_logger_->Log() << "job" << job_context_->job_id << "event"
877
912
  << "flush_started"
878
913
  << "num_memtables" << mems_.size() << "num_entries"
879
914
  << total_num_entries << "num_deletes"
880
915
  << total_num_deletes << "total_data_size"
881
916
  << total_data_size << "memory_usage"
882
- << total_memory_usage << "flush_reason"
917
+ << total_memory_usage << "num_range_deletes"
918
+ << total_num_range_deletes << "flush_reason"
883
919
  << GetFlushReasonString(flush_reason_);
884
920
 
885
921
  {
@@ -947,6 +983,7 @@ Status FlushJob::WriteLevel0Table() {
947
983
  &table_properties_, write_hint, full_history_ts_low,
948
984
  blob_callback_, base_, &num_input_entries,
949
985
  &memtable_payload_bytes, &memtable_garbage_bytes);
986
+ TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:s", &s);
950
987
  // TODO: Cleanup io_status in BuildTable and table builders
951
988
  assert(!s.ok() || io_s.ok());
952
989
  io_s.PermitUncheckedError();
@@ -1008,7 +1045,7 @@ Status FlushJob::WriteLevel0Table() {
1008
1045
  meta_.file_creation_time, meta_.epoch_number,
1009
1046
  meta_.file_checksum, meta_.file_checksum_func_name,
1010
1047
  meta_.unique_id, meta_.compensated_range_deletion_size,
1011
- meta_.tail_size);
1048
+ meta_.tail_size, meta_.user_defined_timestamps_persisted);
1012
1049
  edit_->SetBlobFileAdditions(std::move(blob_file_additions));
1013
1050
  }
1014
1051
  // Piggyback FlushJobInfo on the first first flushed memtable.
@@ -1094,4 +1131,57 @@ std::unique_ptr<FlushJobInfo> FlushJob::GetFlushJobInfo() const {
1094
1131
  return info;
1095
1132
  }
1096
1133
 
1134
+ void FlushJob::GetEffectiveCutoffUDTForPickedMemTables() {
1135
+ db_mutex_->AssertHeld();
1136
+ assert(pick_memtable_called);
1137
+ const auto* ucmp = cfd_->internal_comparator().user_comparator();
1138
+ assert(ucmp);
1139
+ const size_t ts_sz = ucmp->timestamp_size();
1140
+ if (db_options_.atomic_flush || ts_sz == 0 ||
1141
+ cfd_->ioptions()->persist_user_defined_timestamps) {
1142
+ return;
1143
+ }
1144
+ for (MemTable* m : mems_) {
1145
+ Slice table_newest_udt = m->GetNewestUDT();
1146
+ // The picked Memtables should have ascending ID, and should have
1147
+ // non-decreasing newest user-defined timestamps.
1148
+ if (!cutoff_udt_.empty()) {
1149
+ assert(table_newest_udt.size() == cutoff_udt_.size());
1150
+ assert(ucmp->CompareTimestamp(table_newest_udt, cutoff_udt_) >= 0);
1151
+ cutoff_udt_.clear();
1152
+ }
1153
+ cutoff_udt_.assign(table_newest_udt.data(), table_newest_udt.size());
1154
+ }
1155
+ }
1156
+
1157
+ Status FlushJob::MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT() {
1158
+ db_mutex_->AssertHeld();
1159
+ const auto* ucmp = cfd_->user_comparator();
1160
+ assert(ucmp);
1161
+ const std::string& full_history_ts_low = cfd_->GetFullHistoryTsLow();
1162
+ // Update full_history_ts_low to right above cutoff udt only if that would
1163
+ // increase it.
1164
+ if (cutoff_udt_.empty() ||
1165
+ (!full_history_ts_low.empty() &&
1166
+ ucmp->CompareTimestamp(cutoff_udt_, full_history_ts_low) < 0)) {
1167
+ return Status::OK();
1168
+ }
1169
+ Slice cutoff_udt_slice = cutoff_udt_;
1170
+ uint64_t cutoff_udt_ts = 0;
1171
+ bool format_res = GetFixed64(&cutoff_udt_slice, &cutoff_udt_ts);
1172
+ assert(format_res);
1173
+ (void)format_res;
1174
+ std::string new_full_history_ts_low;
1175
+ // TODO(yuzhangyu): Add a member to AdvancedColumnFamilyOptions for an
1176
+ // operation to get the next immediately larger user-defined timestamp to
1177
+ // expand this feature to other user-defined timestamp formats.
1178
+ PutFixed64(&new_full_history_ts_low, cutoff_udt_ts + 1);
1179
+ VersionEdit edit;
1180
+ edit.SetColumnFamily(cfd_->GetID());
1181
+ edit.SetFullHistoryTsLow(new_full_history_ts_low);
1182
+ return versions_->LogAndApply(cfd_, *cfd_->GetLatestMutableCFOptions(),
1183
+ ReadOptions(), &edit, db_mutex_,
1184
+ output_file_directory_);
1185
+ }
1186
+
1097
1187
  } // namespace ROCKSDB_NAMESPACE
@@ -83,9 +83,14 @@ class FlushJob {
83
83
  // Require db_mutex held.
84
84
  // Once PickMemTable() is called, either Run() or Cancel() has to be called.
85
85
  void PickMemTable();
86
+ // @param skip_since_bg_error If not nullptr and if atomic_flush=false,
87
+ // then it is set to true if flush installation is skipped and memtable
88
+ // is rolled back due to existing background error.
86
89
  Status Run(LogsWithPrepTracker* prep_tracker = nullptr,
87
90
  FileMetaData* file_meta = nullptr,
88
- bool* switched_to_mempurge = nullptr);
91
+ bool* switched_to_mempurge = nullptr,
92
+ bool* skipped_since_bg_error = nullptr,
93
+ ErrorHandler* error_handler = nullptr);
89
94
  void Cancel();
90
95
  const autovector<MemTable*>& GetMemTables() const { return mems_; }
91
96
 
@@ -127,6 +132,20 @@ class FlushJob {
127
132
  Env::IOPriority GetRateLimiterPriorityForWrite();
128
133
  std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const;
129
134
 
135
+ // Require db_mutex held.
136
+ // Called only when UDT feature is enabled and
137
+ // `persist_user_defined_timestamps` flag is false. Because we will refrain
138
+ // from flushing as long as there are still UDTs in a memtable that hasn't
139
+ // expired w.r.t `full_history_ts_low`. However, flush is continued if there
140
+ // is risk of entering write stall mode. In that case, we need
141
+ // to track the effective cutoff timestamp below which all the udts are
142
+ // removed because of flush, and use it to increase `full_history_ts_low` if
143
+ // the effective cutoff timestamp is newer. See
144
+ // `MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT` for details.
145
+ void GetEffectiveCutoffUDTForPickedMemTables();
146
+
147
+ Status MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT();
148
+
130
149
  const std::string& dbname_;
131
150
  const std::string db_id_;
132
151
  const std::string db_session_id_;
@@ -195,6 +214,10 @@ class FlushJob {
195
214
  // db mutex
196
215
  const SeqnoToTimeMapping& db_impl_seqno_time_mapping_;
197
216
  SeqnoToTimeMapping seqno_to_time_mapping_;
217
+
218
+ // Keeps track of the newest user-defined timestamp for this flush job if
219
+ // `persist_user_defined_timestamps` flag is false.
220
+ std::string cutoff_udt_;
198
221
  };
199
222
 
200
223
  } // namespace ROCKSDB_NAMESPACE
@@ -70,6 +70,7 @@ class FlushJobTestBase : public testing::Test {
70
70
  new_cf.AddColumnFamily(column_family_names_[i]);
71
71
  new_cf.SetColumnFamily(cf_id++);
72
72
  new_cf.SetComparatorName(ucmp_->Name());
73
+ new_cf.SetPersistUserDefinedTimestamps(persist_udt_);
73
74
  new_cf.SetLogNumber(0);
74
75
  new_cf.SetNextFile(2);
75
76
  new_cf.SetLastSequence(last_seq++);
@@ -117,6 +118,8 @@ class FlushJobTestBase : public testing::Test {
117
118
  db_options_.statistics = CreateDBStatistics();
118
119
 
119
120
  cf_options_.comparator = ucmp_;
121
+ cf_options_.persist_user_defined_timestamps = persist_udt_;
122
+ cf_options_.paranoid_file_checks = paranoid_file_checks_;
120
123
 
121
124
  std::vector<ColumnFamilyDescriptor> column_families;
122
125
  cf_options_.table_factory = mock_table_factory_;
@@ -149,6 +152,9 @@ class FlushJobTestBase : public testing::Test {
149
152
  std::atomic<bool> shutting_down_;
150
153
  std::shared_ptr<mock::MockTableFactory> mock_table_factory_;
151
154
 
155
+ bool persist_udt_ = true;
156
+ bool paranoid_file_checks_ = false;
157
+
152
158
  SeqnoToTimeMapping empty_seqno_to_time_mapping_;
153
159
  };
154
160
 
@@ -600,7 +606,13 @@ TEST_F(FlushJobTest, GetRateLimiterPriorityForWrite) {
600
606
  }
601
607
  }
602
608
 
603
- class FlushJobTimestampTest : public FlushJobTestBase {
609
+ // Test parameters:
610
+ // param 0): paranoid file check
611
+ // param 1): user-defined timestamp test mode
612
+ class FlushJobTimestampTest
613
+ : public FlushJobTestBase,
614
+ public testing::WithParamInterface<
615
+ std::tuple<bool, test::UserDefinedTimestampTestMode>> {
604
616
  public:
605
617
  FlushJobTimestampTest()
606
618
  : FlushJobTestBase(test::PerThreadDBPath("flush_job_ts_gc_test"),
@@ -616,13 +628,40 @@ class FlushJobTimestampTest : public FlushJobTestBase {
616
628
  }
617
629
 
618
630
  protected:
631
+ void SetUp() override {
632
+ paranoid_file_checks_ = std::get<0>(GetParam());
633
+ auto udt_test_mode = std::get<1>(GetParam());
634
+ persist_udt_ = test::ShouldPersistUDT(udt_test_mode);
635
+ FlushJobTestBase::SetUp();
636
+ }
619
637
  static constexpr uint64_t kStartTs = 10;
620
638
  static constexpr SequenceNumber kStartSeq = 0;
621
639
  SequenceNumber curr_seq_{kStartSeq};
622
640
  std::atomic<uint64_t> curr_ts_{kStartTs};
641
+
642
+ void CheckFileMetaData(ColumnFamilyData* cfd,
643
+ const InternalKey& expected_smallest,
644
+ const InternalKey& expected_largest,
645
+ const FileMetaData* meta_from_flush) const {
646
+ ASSERT_EQ(expected_smallest.Encode(), meta_from_flush->smallest.Encode());
647
+ ASSERT_EQ(expected_largest.Encode(), meta_from_flush->largest.Encode());
648
+
649
+ const VersionStorageInfo* storage_info = cfd->current()->storage_info();
650
+ const std::vector<FileMetaData*>& l0_files = storage_info->LevelFiles(0);
651
+
652
+ ASSERT_EQ(l0_files.size(), 1);
653
+ auto installed_file_meta = l0_files[0];
654
+ ASSERT_EQ(expected_smallest.Encode(),
655
+ installed_file_meta->smallest.Encode());
656
+ ASSERT_EQ(expected_largest.Encode(), installed_file_meta->largest.Encode());
657
+ }
658
+ void CheckFullHistoryTsLow(ColumnFamilyData* cfd,
659
+ const std::string& expected_full_history_ts_low) {
660
+ ASSERT_EQ(expected_full_history_ts_low, cfd->GetFullHistoryTsLow());
661
+ }
623
662
  };
624
663
 
625
- TEST_F(FlushJobTimestampTest, AllKeysExpired) {
664
+ TEST_P(FlushJobTimestampTest, AllKeysExpired) {
626
665
  ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
627
666
  autovector<MemTable*> to_delete;
628
667
 
@@ -650,6 +689,7 @@ TEST_F(FlushJobTimestampTest, AllKeysExpired) {
650
689
  EventLogger event_logger(db_options_.info_log.get());
651
690
  std::string full_history_ts_low;
652
691
  PutFixed64(&full_history_ts_low, std::numeric_limits<uint64_t>::max());
692
+ cfd->SetFullHistoryTsLow(full_history_ts_low);
653
693
  FlushJob flush_job(
654
694
  dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
655
695
  std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
@@ -669,17 +709,25 @@ TEST_F(FlushJobTimestampTest, AllKeysExpired) {
669
709
 
670
710
  {
671
711
  std::string key = test::EncodeInt(0);
672
- key.append(test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1));
712
+ if (!persist_udt_) {
713
+ // When `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` flag
714
+ // is set to false. The user-defined timestamp is stripped from user key
715
+ // during flush, making the user key logically containing the minimum
716
+ // timestamp.
717
+ key.append(test::EncodeInt(0));
718
+ } else {
719
+ key.append(test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1));
720
+ }
673
721
  InternalKey ikey(key, curr_seq_ - 1, ValueType::kTypeDeletionWithTimestamp);
674
- ASSERT_EQ(ikey.Encode(), fmeta.smallest.Encode());
675
- ASSERT_EQ(ikey.Encode(), fmeta.largest.Encode());
722
+ CheckFileMetaData(cfd, ikey, ikey, &fmeta);
723
+ CheckFullHistoryTsLow(cfd, full_history_ts_low);
676
724
  }
677
725
 
678
726
  job_context.Clean();
679
727
  ASSERT_TRUE(to_delete.empty());
680
728
  }
681
729
 
682
- TEST_F(FlushJobTimestampTest, NoKeyExpired) {
730
+ TEST_P(FlushJobTimestampTest, NoKeyExpired) {
683
731
  ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
684
732
  autovector<MemTable*> to_delete;
685
733
 
@@ -703,6 +751,7 @@ TEST_F(FlushJobTimestampTest, NoKeyExpired) {
703
751
  EventLogger event_logger(db_options_.info_log.get());
704
752
  std::string full_history_ts_low;
705
753
  PutFixed64(&full_history_ts_low, 0);
754
+ cfd->SetFullHistoryTsLow(full_history_ts_low);
706
755
  FlushJob flush_job(
707
756
  dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
708
757
  std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
@@ -722,18 +771,46 @@ TEST_F(FlushJobTimestampTest, NoKeyExpired) {
722
771
 
723
772
  {
724
773
  std::string ukey = test::EncodeInt(0);
725
- std::string smallest_key =
726
- ukey + test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1);
727
- std::string largest_key = ukey + test::EncodeInt(kStartTs);
774
+ std::string smallest_key;
775
+ std::string largest_key;
776
+ std::string expected_full_history_ts_low;
777
+ if (!persist_udt_) {
778
+ // When `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` flag
779
+ // is set to false. The user-defined timestamp is stripped from user key
780
+ // during flush, making the user key logically containing the minimum
781
+ // timestamp, which is hardcoded to be all zeros for now.
782
+ smallest_key = ukey + test::EncodeInt(0);
783
+ largest_key = ukey + test::EncodeInt(0);
784
+ // When not all keys have expired and `persist_user_defined_timestamps` is
785
+ // false. UDTs will be removed during flush, `full_history_ts_low` should
786
+ // be automatically increased to above the effective cutoff UDT in the
787
+ // flush.
788
+ PutFixed64(&expected_full_history_ts_low, curr_ts_.fetch_add(1));
789
+ } else {
790
+ smallest_key =
791
+ ukey + test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1);
792
+ largest_key = ukey + test::EncodeInt(kStartTs);
793
+ expected_full_history_ts_low = full_history_ts_low;
794
+ }
728
795
  InternalKey smallest(smallest_key, curr_seq_ - 1, ValueType::kTypeValue);
729
796
  InternalKey largest(largest_key, kStartSeq, ValueType::kTypeValue);
730
- ASSERT_EQ(smallest.Encode(), fmeta.smallest.Encode());
731
- ASSERT_EQ(largest.Encode(), fmeta.largest.Encode());
797
+ CheckFileMetaData(cfd, smallest, largest, &fmeta);
798
+ CheckFullHistoryTsLow(cfd, expected_full_history_ts_low);
732
799
  }
733
800
  job_context.Clean();
734
801
  ASSERT_TRUE(to_delete.empty());
735
802
  }
736
803
 
804
+ // Param 0: paranoid file check
805
+ // Param 1: test mode for the user-defined timestamp feature
806
+ INSTANTIATE_TEST_CASE_P(
807
+ FlushJobTimestampTest, FlushJobTimestampTest,
808
+ ::testing::Combine(
809
+ ::testing::Bool(),
810
+ ::testing::Values(
811
+ test::UserDefinedTimestampTestMode::kStripUserDefinedTimestamp,
812
+ test::UserDefinedTimestampTestMode::kNormal)));
813
+
737
814
  } // namespace ROCKSDB_NAMESPACE
738
815
 
739
816
  int main(int argc, char** argv) {
@@ -241,10 +241,10 @@ ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
241
241
  if (sv_) {
242
242
  RebuildIterators(false);
243
243
  }
244
- if (!cfd_->ioptions()->env->GetFileSystem()->use_async_io()) {
244
+ if (!CheckFSFeatureSupport(cfd_->ioptions()->env->GetFileSystem().get(),
245
+ FSSupportedOps::kAsyncIO)) {
245
246
  read_options_.async_io = false;
246
247
  }
247
-
248
248
  // immutable_status_ is a local aggregation of the
249
249
  // status of the immutable Iterators.
250
250
  // We have to PermitUncheckedError in case it is never
@@ -1067,4 +1067,3 @@ void ForwardIterator::DeleteIterator(InternalIterator* iter, bool is_arena) {
1067
1067
  }
1068
1068
 
1069
1069
  } // namespace ROCKSDB_NAMESPACE
1070
-