@nxtedition/rocksdb 8.2.7 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -182,9 +182,8 @@ class LogTest
182
182
 
183
183
  Slice* get_reader_contents() { return &reader_contents_; }
184
184
 
185
- void Write(
186
- const std::string& msg,
187
- const std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
185
+ void Write(const std::string& msg,
186
+ const UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
188
187
  if (cf_to_ts_sz != nullptr && !cf_to_ts_sz->empty()) {
189
188
  ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(*cf_to_ts_sz));
190
189
  }
@@ -193,10 +192,9 @@ class LogTest
193
192
 
194
193
  size_t WrittenBytes() const { return dest_contents().size(); }
195
194
 
196
- std::string Read(
197
- const WALRecoveryMode wal_recovery_mode =
198
- WALRecoveryMode::kTolerateCorruptedTailRecords,
199
- std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
195
+ std::string Read(const WALRecoveryMode wal_recovery_mode =
196
+ WALRecoveryMode::kTolerateCorruptedTailRecords,
197
+ UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
200
198
  std::string scratch;
201
199
  Slice record;
202
200
  bool ret = false;
@@ -270,9 +268,8 @@ class LogTest
270
268
  }
271
269
 
272
270
  void CheckRecordAndTimestampSize(
273
- std::string record,
274
- std::unordered_map<uint32_t, size_t>& expected_ts_sz) {
275
- std::unordered_map<uint32_t, size_t> recorded_ts_sz;
271
+ std::string record, UnorderedMap<uint32_t, size_t>& expected_ts_sz) {
272
+ UnorderedMap<uint32_t, size_t> recorded_ts_sz;
276
273
  ASSERT_EQ(record,
277
274
  Read(WALRecoveryMode::
278
275
  kTolerateCorruptedTailRecords /* wal_recovery_mode */,
@@ -297,18 +294,18 @@ TEST_P(LogTest, ReadWrite) {
297
294
  }
298
295
 
299
296
  TEST_P(LogTest, ReadWriteWithTimestampSize) {
300
- std::unordered_map<uint32_t, size_t> ts_sz_one = {
297
+ UnorderedMap<uint32_t, size_t> ts_sz_one = {
301
298
  {1, sizeof(uint64_t)},
302
299
  };
303
300
  Write("foo", &ts_sz_one);
304
301
  Write("bar");
305
- std::unordered_map<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
302
+ UnorderedMap<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
306
303
  Write("", &ts_sz_two);
307
304
  Write("xxxx");
308
305
 
309
306
  CheckRecordAndTimestampSize("foo", ts_sz_one);
310
307
  CheckRecordAndTimestampSize("bar", ts_sz_one);
311
- std::unordered_map<uint32_t, size_t> expected_ts_sz_two;
308
+ UnorderedMap<uint32_t, size_t> expected_ts_sz_two;
312
309
  // User-defined timestamp size records are accumulated and applied to
313
310
  // subsequent records.
314
311
  expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
@@ -320,10 +317,9 @@ TEST_P(LogTest, ReadWriteWithTimestampSize) {
320
317
  }
321
318
 
322
319
  TEST_P(LogTest, ReadWriteWithTimestampSizeZeroTimestampIgnored) {
323
- std::unordered_map<uint32_t, size_t> ts_sz_one = {{1, sizeof(uint64_t)}};
320
+ UnorderedMap<uint32_t, size_t> ts_sz_one = {{1, sizeof(uint64_t)}};
324
321
  Write("foo", &ts_sz_one);
325
- std::unordered_map<uint32_t, size_t> ts_sz_two(ts_sz_one.begin(),
326
- ts_sz_one.end());
322
+ UnorderedMap<uint32_t, size_t> ts_sz_two(ts_sz_one.begin(), ts_sz_one.end());
327
323
  ts_sz_two.insert(std::make_pair(2, 0));
328
324
  Write("bar", &ts_sz_two);
329
325
 
@@ -749,7 +745,7 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
749
745
  if (!recyclable_log) {
750
746
  return; // test is only valid for recycled logs
751
747
  }
752
- std::unordered_map<uint32_t, size_t> ts_sz_one = {
748
+ UnorderedMap<uint32_t, size_t> ts_sz_one = {
753
749
  {1, sizeof(uint32_t)},
754
750
  };
755
751
  Write("foo", &ts_sz_one);
@@ -765,7 +761,7 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
765
761
  std::unique_ptr<WritableFileWriter> dest_holder(new WritableFileWriter(
766
762
  std::move(sink), "" /* don't care */, FileOptions()));
767
763
  Writer recycle_writer(std::move(dest_holder), 123, true);
768
- std::unordered_map<uint32_t, size_t> ts_sz_two = {
764
+ UnorderedMap<uint32_t, size_t> ts_sz_two = {
769
765
  {2, sizeof(uint64_t)},
770
766
  };
771
767
  ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(ts_sz_two));
@@ -1039,18 +1035,18 @@ TEST_P(CompressionLogTest, ReadWriteWithTimestampSize) {
1039
1035
  return;
1040
1036
  }
1041
1037
  ASSERT_OK(SetupTestEnv());
1042
- std::unordered_map<uint32_t, size_t> ts_sz_one = {
1038
+ UnorderedMap<uint32_t, size_t> ts_sz_one = {
1043
1039
  {1, sizeof(uint64_t)},
1044
1040
  };
1045
1041
  Write("foo", &ts_sz_one);
1046
1042
  Write("bar");
1047
- std::unordered_map<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
1043
+ UnorderedMap<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
1048
1044
  Write("", &ts_sz_two);
1049
1045
  Write("xxxx");
1050
1046
 
1051
1047
  CheckRecordAndTimestampSize("foo", ts_sz_one);
1052
1048
  CheckRecordAndTimestampSize("bar", ts_sz_one);
1053
- std::unordered_map<uint32_t, size_t> expected_ts_sz_two;
1049
+ UnorderedMap<uint32_t, size_t> expected_ts_sz_two;
1054
1050
  // User-defined timestamp size records are accumulated and applied to
1055
1051
  // subsequent records.
1056
1052
  expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
@@ -197,7 +197,7 @@ IOStatus Writer::AddCompressionTypeRecord() {
197
197
  }
198
198
 
199
199
  IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
200
- const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
200
+ const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
201
201
  Env::IOPriority rate_limiter_priority) {
202
202
  std::vector<std::pair<uint32_t, size_t>> ts_sz_to_record;
203
203
  for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) {
@@ -20,6 +20,7 @@
20
20
  #include "rocksdb/slice.h"
21
21
  #include "rocksdb/status.h"
22
22
  #include "util/compression.h"
23
+ #include "util/hash_containers.h"
23
24
 
24
25
  namespace ROCKSDB_NAMESPACE {
25
26
 
@@ -95,7 +96,7 @@ class Writer {
95
96
  // kRecyclableUserDefinedTimestampSizeType for these column families.
96
97
  // This timestamp size record applies to all subsequent records.
97
98
  IOStatus MaybeAddUserDefinedTimestampSizeRecord(
98
- const std::unordered_map<uint32_t, size_t>& cf_to_ts_sz,
99
+ const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
99
100
  Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
100
101
 
101
102
  WritableFileWriter* file() { return dest_.get(); }
@@ -137,7 +138,7 @@ class Writer {
137
138
  // The recorded user-defined timestamp size that have been written so far.
138
139
  // Since the user-defined timestamp size cannot be changed while the DB is
139
140
  // running, existing entry in this map cannot be updated.
140
- std::unordered_map<uint32_t, size_t> recorded_cf_to_ts_sz_;
141
+ UnorderedMap<uint32_t, size_t> recorded_cf_to_ts_sz_;
141
142
  };
142
143
 
143
144
  } // namespace log
@@ -190,6 +190,7 @@ TEST_F(ManualCompactionTest, Test) {
190
190
  TEST_F(ManualCompactionTest, SkipLevel) {
191
191
  DB* db;
192
192
  Options options;
193
+ options.level_compaction_dynamic_level_bytes = false;
193
194
  options.num_levels = 3;
194
195
  // Initially, flushed L0 files won't exceed 100.
195
196
  options.level0_file_num_compaction_trigger = 100;
@@ -286,9 +287,9 @@ TEST_F(ManualCompactionTest, SkipLevel) {
286
287
  filter->Reset();
287
288
  ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, nullptr));
288
289
  ASSERT_EQ(4, filter->NumKeys());
289
- // 1 is first compacted to L1 and then further compacted into [2, 4, 8],
290
- // so finally the logged level for 1 is L1.
291
- ASSERT_EQ(1, filter->KeyLevel("1"));
290
+ // 1 is first compacted from L0 to L1, and then L1 intra level compaction
291
+ // compacts [2, 4, 8] only.
292
+ ASSERT_EQ(0, filter->KeyLevel("1"));
292
293
  ASSERT_EQ(1, filter->KeyLevel("2"));
293
294
  ASSERT_EQ(1, filter->KeyLevel("4"));
294
295
  ASSERT_EQ(1, filter->KeyLevel("8"));
@@ -95,6 +95,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
95
95
  data_size_(0),
96
96
  num_entries_(0),
97
97
  num_deletes_(0),
98
+ num_range_deletes_(0),
98
99
  write_buffer_size_(mutable_cf_options.write_buffer_size),
99
100
  flush_in_progress_(false),
100
101
  flush_completed_(false),
@@ -114,7 +115,9 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
114
115
  ioptions.memtable_insert_with_hint_prefix_extractor.get()),
115
116
  oldest_key_time_(std::numeric_limits<uint64_t>::max()),
116
117
  atomic_flush_seqno_(kMaxSequenceNumber),
117
- approximate_memory_usage_(0) {
118
+ approximate_memory_usage_(0),
119
+ memtable_max_range_deletions_(
120
+ mutable_cf_options.memtable_max_range_deletions) {
118
121
  UpdateFlushState();
119
122
  // something went wrong if we need to flush before inserting anything
120
123
  assert(!ShouldScheduleFlush());
@@ -143,6 +146,10 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
143
146
  new_cache.get()),
144
147
  std::memory_order_relaxed);
145
148
  }
149
+ const Comparator* ucmp = cmp.user_comparator();
150
+ assert(ucmp);
151
+ ts_sz_ = ucmp->timestamp_size();
152
+ persist_user_defined_timestamps_ = ioptions.persist_user_defined_timestamps;
146
153
  }
147
154
 
148
155
  MemTable::~MemTable() {
@@ -170,6 +177,14 @@ size_t MemTable::ApproximateMemoryUsage() {
170
177
  }
171
178
 
172
179
  bool MemTable::ShouldFlushNow() {
180
+ // This is set if memtable_max_range_deletions is > 0,
181
+ // and that many range deletions are done
182
+ if (memtable_max_range_deletions_ > 0 &&
183
+ num_range_deletes_.load(std::memory_order_relaxed) >=
184
+ static_cast<uint64_t>(memtable_max_range_deletions_)) {
185
+ return true;
186
+ }
187
+
173
188
  size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed);
174
189
  // In a lot of times, we cannot allocate arena blocks that exactly matches the
175
190
  // buffer size. Thus we have to decide if we should over-allocate or
@@ -357,7 +372,8 @@ class MemTableIterator : public InternalIterator {
357
372
  !mem.GetImmutableMemTableOptions()->inplace_update_support),
358
373
  protection_bytes_per_key_(mem.moptions_.protection_bytes_per_key),
359
374
  status_(Status::OK()),
360
- logger_(mem.moptions_.info_log) {
375
+ logger_(mem.moptions_.info_log),
376
+ ts_sz_(mem.ts_sz_) {
361
377
  if (use_range_del_table) {
362
378
  iter_ = mem.range_del_table_->GetIterator(arena);
363
379
  } else if (prefix_extractor_ != nullptr && !read_options.total_order_seek &&
@@ -400,8 +416,7 @@ class MemTableIterator : public InternalIterator {
400
416
  PERF_COUNTER_ADD(seek_on_memtable_count, 1);
401
417
  if (bloom_) {
402
418
  // iterator should only use prefix bloom filter
403
- auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
404
- Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
419
+ Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz_));
405
420
  if (prefix_extractor_->InDomain(user_k_without_ts)) {
406
421
  if (!bloom_->MayContain(
407
422
  prefix_extractor_->Transform(user_k_without_ts))) {
@@ -421,8 +436,7 @@ class MemTableIterator : public InternalIterator {
421
436
  PERF_TIMER_GUARD(seek_on_memtable_time);
422
437
  PERF_COUNTER_ADD(seek_on_memtable_count, 1);
423
438
  if (bloom_) {
424
- auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
425
- Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
439
+ Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz_));
426
440
  if (prefix_extractor_->InDomain(user_k_without_ts)) {
427
441
  if (!bloom_->MayContain(
428
442
  prefix_extractor_->Transform(user_k_without_ts))) {
@@ -512,6 +526,7 @@ class MemTableIterator : public InternalIterator {
512
526
  uint32_t protection_bytes_per_key_;
513
527
  Status status_;
514
528
  Logger* logger_;
529
+ size_t ts_sz_;
515
530
 
516
531
  void VerifyEntryChecksum() {
517
532
  if (protection_bytes_per_key_ > 0 && Valid()) {
@@ -625,8 +640,7 @@ Status MemTable::VerifyEncodedEntry(Slice encoded,
625
640
  if (!GetVarint32(&encoded, &ikey_len)) {
626
641
  return Status::Corruption("Unable to parse internal key length");
627
642
  }
628
- size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
629
- if (ikey_len < 8 + ts_sz) {
643
+ if (ikey_len < 8 + ts_sz_) {
630
644
  return Status::Corruption("Internal key length too short");
631
645
  }
632
646
  if (ikey_len > encoded.size()) {
@@ -725,8 +739,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
725
739
  }
726
740
  }
727
741
 
728
- size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
729
- Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz);
742
+ Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz_);
730
743
 
731
744
  if (!allow_concurrent) {
732
745
  // Extract prefix for insert with hint.
@@ -754,6 +767,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
754
767
  type == kTypeDeletionWithTimestamp) {
755
768
  num_deletes_.store(num_deletes_.load(std::memory_order_relaxed) + 1,
756
769
  std::memory_order_relaxed);
770
+ } else if (type == kTypeRangeDeletion) {
771
+ uint64_t val = num_range_deletes_.load(std::memory_order_relaxed) + 1;
772
+ num_range_deletes_.store(val, std::memory_order_relaxed);
757
773
  }
758
774
 
759
775
  if (bloom_filter_ && prefix_extractor_ &&
@@ -776,6 +792,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
776
792
  assert(first_seqno_.load() >= earliest_seqno_.load());
777
793
  }
778
794
  assert(post_process_info == nullptr);
795
+ // TODO(yuzhangyu): support updating newest UDT for when `allow_concurrent`
796
+ // is true.
797
+ MaybeUpdateNewestUDT(key_slice);
779
798
  UpdateFlushState();
780
799
  } else {
781
800
  bool res = (hint == nullptr)
@@ -810,13 +829,14 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
810
829
  earliest_seqno_.load(std::memory_order_relaxed);
811
830
  while (
812
831
  (cur_earliest_seqno == kMaxSequenceNumber || s < cur_earliest_seqno) &&
813
- !first_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) {
832
+ !earliest_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) {
814
833
  }
815
834
  }
816
835
  if (type == kTypeRangeDeletion) {
817
836
  auto new_cache = std::make_shared<FragmentedRangeTombstoneListCache>();
818
837
  size_t size = cached_range_tombstone_.Size();
819
838
  if (allow_concurrent) {
839
+ post_process_info->num_range_deletes++;
820
840
  range_del_mutex_.lock();
821
841
  }
822
842
  for (size_t i = 0; i < size; ++i) {
@@ -835,6 +855,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
835
855
  new_local_cache_ref, new_cache.get()),
836
856
  std::memory_order_relaxed);
837
857
  }
858
+
838
859
  if (allow_concurrent) {
839
860
  range_del_mutex_.unlock();
840
861
  }
@@ -1263,6 +1284,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
1263
1284
  // Avoiding recording stats for speed.
1264
1285
  return false;
1265
1286
  }
1287
+
1266
1288
  PERF_TIMER_GUARD(get_from_memtable_time);
1267
1289
 
1268
1290
  std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
@@ -1286,8 +1308,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
1286
1308
  bool found_final_value = false;
1287
1309
  bool merge_in_progress = s->IsMergeInProgress();
1288
1310
  bool may_contain = true;
1289
- size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
1290
- Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz);
1311
+ Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz_);
1291
1312
  bool bloom_checked = false;
1292
1313
  if (bloom_filter_) {
1293
1314
  // when both memtable_whole_key_filtering and prefix_extractor_ are set,
@@ -1672,4 +1693,22 @@ uint64_t MemTable::GetMinLogContainingPrepSection() {
1672
1693
  return min_prep_log_referenced_.load();
1673
1694
  }
1674
1695
 
1696
+ void MemTable::MaybeUpdateNewestUDT(const Slice& user_key) {
1697
+ if (ts_sz_ == 0 || persist_user_defined_timestamps_) {
1698
+ return;
1699
+ }
1700
+ const Comparator* ucmp = GetInternalKeyComparator().user_comparator();
1701
+ Slice udt = ExtractTimestampFromUserKey(user_key, ts_sz_);
1702
+ if (newest_udt_.empty() || ucmp->CompareTimestamp(udt, newest_udt_) > 0) {
1703
+ newest_udt_ = udt;
1704
+ }
1705
+ }
1706
+
1707
+ const Slice& MemTable::GetNewestUDT() const {
1708
+ // This path should not be invoked for MemTables that does not enable the UDT
1709
+ // in Memtable only feature.
1710
+ assert(ts_sz_ > 0 && !persist_user_defined_timestamps_);
1711
+ return newest_udt_;
1712
+ }
1713
+
1675
1714
  } // namespace ROCKSDB_NAMESPACE
@@ -68,6 +68,7 @@ struct MemTablePostProcessInfo {
68
68
  uint64_t data_size = 0;
69
69
  uint64_t num_entries = 0;
70
70
  uint64_t num_deletes = 0;
71
+ uint64_t num_range_deletes = 0;
71
72
  };
72
73
 
73
74
  using MultiGetRange = MultiGetContext::Range;
@@ -332,6 +333,10 @@ class MemTable {
332
333
  num_deletes_.fetch_add(update_counters.num_deletes,
333
334
  std::memory_order_relaxed);
334
335
  }
336
+ if (update_counters.num_range_deletes > 0) {
337
+ num_range_deletes_.fetch_add(update_counters.num_range_deletes,
338
+ std::memory_order_relaxed);
339
+ }
335
340
  UpdateFlushState();
336
341
  }
337
342
 
@@ -349,10 +354,21 @@ class MemTable {
349
354
  return num_deletes_.load(std::memory_order_relaxed);
350
355
  }
351
356
 
357
+ // Get total number of range deletions in the mem table.
358
+ // REQUIRES: external synchronization to prevent simultaneous
359
+ // operations on the same MemTable (unless this Memtable is immutable).
360
+ uint64_t num_range_deletes() const {
361
+ return num_range_deletes_.load(std::memory_order_relaxed);
362
+ }
363
+
352
364
  uint64_t get_data_size() const {
353
365
  return data_size_.load(std::memory_order_relaxed);
354
366
  }
355
367
 
368
+ size_t write_buffer_size() const {
369
+ return write_buffer_size_.load(std::memory_order_relaxed);
370
+ }
371
+
356
372
  // Dynamically change the memtable's capacity. If set below the current usage,
357
373
  // the next key added will trigger a flush. Can only increase size when
358
374
  // memtable prefix bloom is disabled, since we can't easily allocate more
@@ -527,6 +543,14 @@ class MemTable {
527
543
  }
528
544
  }
529
545
 
546
+ // Get the newest user-defined timestamp contained in this MemTable. Check
547
+ // `newest_udt_` for what newer means. This method should only be invoked for
548
+ // an MemTable that has enabled user-defined timestamp feature and set
549
+ // `persist_user_defined_timestamps` to false. The tracked newest UDT will be
550
+ // used by flush job in the background to help check the MemTable's
551
+ // eligibility for Flush.
552
+ const Slice& GetNewestUDT() const;
553
+
530
554
  // Returns Corruption status if verification fails.
531
555
  static Status VerifyEntryChecksum(const char* entry,
532
556
  uint32_t protection_bytes_per_key,
@@ -553,6 +577,7 @@ class MemTable {
553
577
  std::atomic<uint64_t> data_size_;
554
578
  std::atomic<uint64_t> num_entries_;
555
579
  std::atomic<uint64_t> num_deletes_;
580
+ std::atomic<uint64_t> num_range_deletes_;
556
581
 
557
582
  // Dynamically changeable memtable option
558
583
  std::atomic<size_t> write_buffer_size_;
@@ -596,7 +621,7 @@ class MemTable {
596
621
  const SliceTransform* insert_with_hint_prefix_extractor_;
597
622
 
598
623
  // Insert hints for each prefix.
599
- UnorderedMapH<Slice, void*, SliceHasher> insert_hints_;
624
+ UnorderedMapH<Slice, void*, SliceHasher32> insert_hints_;
600
625
 
601
626
  // Timestamp of oldest key
602
627
  std::atomic<uint64_t> oldest_key_time_;
@@ -614,9 +639,26 @@ class MemTable {
614
639
  // Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow`
615
640
  std::atomic<uint64_t> approximate_memory_usage_;
616
641
 
642
+ // max range deletions in a memtable, before automatic flushing, 0 for
643
+ // unlimited.
644
+ uint32_t memtable_max_range_deletions_ = 0;
645
+
617
646
  // Flush job info of the current memtable.
618
647
  std::unique_ptr<FlushJobInfo> flush_job_info_;
619
648
 
649
+ // Size in bytes for the user-defined timestamps.
650
+ size_t ts_sz_;
651
+
652
+ // Whether to persist user-defined timestamps
653
+ bool persist_user_defined_timestamps_;
654
+
655
+ // Newest user-defined timestamp contained in this MemTable. For ts1, and ts2
656
+ // if Comparator::CompareTimestamp(ts1, ts2) > 0, ts1 is considered newer than
657
+ // ts2. We track this field for a MemTable if its column family has UDT
658
+ // feature enabled and the `persist_user_defined_timestamp` flag is false.
659
+ // Otherwise, this field just contains an empty Slice.
660
+ Slice newest_udt_;
661
+
620
662
  // Updates flush_state_ using ShouldFlushNow()
621
663
  void UpdateFlushState();
622
664
 
@@ -653,6 +695,8 @@ class MemTable {
653
695
  void UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
654
696
  const Slice& key, const Slice& value, ValueType type,
655
697
  SequenceNumber s, char* checksum_ptr);
698
+
699
+ void MaybeUpdateNewestUDT(const Slice& user_key);
656
700
  };
657
701
 
658
702
  extern const char* EncodeKey(std::string* scratch, const Slice& target);
@@ -434,23 +434,57 @@ void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
434
434
  }
435
435
 
436
436
  void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
437
- uint64_t /*file_number*/) {
437
+ bool rollback_succeeding_memtables) {
438
+ TEST_SYNC_POINT("RollbackMemtableFlush");
438
439
  AutoThreadOperationStageUpdater stage_updater(
439
440
  ThreadStatus::STAGE_MEMTABLE_ROLLBACK);
440
- assert(!mems.empty());
441
-
442
- // If the flush was not successful, then just reset state.
443
- // Maybe a succeeding attempt to flush will be successful.
441
+ #ifndef NDEBUG
444
442
  for (MemTable* m : mems) {
445
443
  assert(m->flush_in_progress_);
446
444
  assert(m->file_number_ == 0);
445
+ }
446
+ #endif
447
+
448
+ if (rollback_succeeding_memtables && !mems.empty()) {
449
+ std::list<MemTable*>& memlist = current_->memlist_;
450
+ auto it = memlist.rbegin();
451
+ for (; *it != mems[0] && it != memlist.rend(); ++it) {
452
+ }
453
+ // mems should be in memlist
454
+ assert(*it == mems[0]);
455
+ if (*it == mems[0]) {
456
+ ++it;
457
+ }
458
+ while (it != memlist.rend()) {
459
+ MemTable* m = *it;
460
+ // Only rollback complete, not in-progress,
461
+ // in_progress can be flushes that are still writing SSTs
462
+ if (m->flush_completed_) {
463
+ m->flush_in_progress_ = false;
464
+ m->flush_completed_ = false;
465
+ m->edit_.Clear();
466
+ m->file_number_ = 0;
467
+ num_flush_not_started_++;
468
+ ++it;
469
+ } else {
470
+ break;
471
+ }
472
+ }
473
+ }
447
474
 
448
- m->flush_in_progress_ = false;
449
- m->flush_completed_ = false;
450
- m->edit_.Clear();
451
- num_flush_not_started_++;
475
+ for (MemTable* m : mems) {
476
+ if (m->flush_in_progress_) {
477
+ assert(m->file_number_ == 0);
478
+ m->file_number_ = 0;
479
+ m->flush_in_progress_ = false;
480
+ m->flush_completed_ = false;
481
+ m->edit_.Clear();
482
+ num_flush_not_started_++;
483
+ }
484
+ }
485
+ if (!mems.empty()) {
486
+ imm_flush_needed.store(true, std::memory_order_release);
452
487
  }
453
- imm_flush_needed.store(true, std::memory_order_release);
454
488
  }
455
489
 
456
490
  // Try record a successful flush in the manifest file. It might just return
@@ -271,8 +271,20 @@ class MemTableList {
271
271
 
272
272
  // Reset status of the given memtable list back to pending state so that
273
273
  // they can get picked up again on the next round of flush.
274
+ //
275
+ // @param rollback_succeeding_memtables If true, will rollback adjacent
276
+ // younger memtables whose flush is completed. Specifically, suppose the
277
+ // current immutable memtables are M_0,M_1...M_N ordered from youngest to
278
+ // oldest. Suppose that the youngest memtable in `mems` is M_K. We will try to
279
+ // rollback M_K-1, M_K-2... until the first memtable whose flush is
280
+ // not completed. These are the memtables that would have been installed
281
+ // by this flush job if it were to succeed. This flag is currently used
282
+ // by non atomic_flush rollback.
283
+ // Note that we also do rollback in `write_manifest_cb` by calling
284
+ // `RemoveMemTablesOrRestoreFlags()`. There we rollback the entire batch so
285
+ // it is similar to what we do here with rollback_succeeding_memtables=true.
274
286
  void RollbackMemtableFlush(const autovector<MemTable*>& mems,
275
- uint64_t file_number);
287
+ bool rollback_succeeding_memtables);
276
288
 
277
289
  // Try commit a successful flush in the manifest file. It might just return
278
290
  // Status::OK letting a concurrent flush to do the actual the recording.
@@ -382,6 +394,25 @@ class MemTableList {
382
394
  return memlist.front()->GetID();
383
395
  }
384
396
 
397
+ // DB mutex held.
398
+ // Gets the newest user-defined timestamp for the Memtables in ascending ID
399
+ // order, up to the `max_memtable_id`. Used by background flush job
400
+ // to check Memtables' eligibility for flush w.r.t retaining UDTs.
401
+ std::vector<Slice> GetTablesNewestUDT(uint64_t max_memtable_id) {
402
+ std::vector<Slice> newest_udts;
403
+ auto& memlist = current_->memlist_;
404
+ // Iterating through the memlist starting at the end, the vector<MemTable*>
405
+ // ret is filled with memtables already sorted in increasing MemTable ID.
406
+ for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
407
+ MemTable* m = *it;
408
+ if (m->GetID() > max_memtable_id) {
409
+ break;
410
+ }
411
+ newest_udts.push_back(m->GetNewestUDT());
412
+ }
413
+ return newest_udts;
414
+ }
415
+
385
416
  void AssignAtomicFlushSeq(const SequenceNumber& seq) {
386
417
  const auto& memlist = current_->memlist_;
387
418
  // Scan the memtable list from new to old