@nxtedition/rocksdb 5.2.21 → 5.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (909) hide show
  1. package/binding.cc +216 -252
  2. package/binding.gyp +78 -72
  3. package/deps/rocksdb/build_version.cc +70 -4
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
  5. package/deps/rocksdb/rocksdb/Makefile +459 -469
  6. package/deps/rocksdb/rocksdb/README.md +4 -4
  7. package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
  8. package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
  9. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
  10. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
  11. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
  12. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
  13. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
  14. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
  15. package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
  16. package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
  20. package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
  22. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
  23. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
  24. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
  25. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
  26. package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
  27. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
  28. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
  29. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
  30. package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
  31. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
  32. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
  33. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
  34. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
  35. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
  36. package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
  37. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
  38. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
  39. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
  51. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
  56. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
  57. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
  60. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
  61. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
  62. package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
  63. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
  64. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
  65. package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
  67. package/deps/rocksdb/rocksdb/db/builder.h +16 -37
  68. package/deps/rocksdb/rocksdb/db/c.cc +413 -208
  69. package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
  71. package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
  74. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
  75. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
  95. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
  96. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
  97. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
  98. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
  99. package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
  100. package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
  101. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
  102. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
  103. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
  104. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
  105. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
  106. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
  107. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
  108. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
  110. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
  111. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
  112. package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
  122. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
  123. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
  124. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
  125. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
  126. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
  127. package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
  128. package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
  129. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
  130. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
  131. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
  132. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
  133. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
  134. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
  135. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
  136. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
  137. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
  138. package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
  139. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
  140. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
  141. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
  142. package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
  143. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
  144. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
  145. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
  146. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
  147. package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
  148. package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
  149. package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
  150. package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
  151. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
  152. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
  153. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
  154. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
  155. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
  156. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
  157. package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
  158. package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
  159. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
  160. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
  161. package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
  162. package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
  163. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
  164. package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
  165. package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
  166. package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
  167. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
  168. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
  169. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
  170. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
  171. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
  172. package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
  174. package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
  175. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
  176. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
  178. package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
  179. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
  180. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
  182. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
  183. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
  184. package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
  185. package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
  186. package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
  187. package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
  188. package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
  189. package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
  190. package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
  191. package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
  192. package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
  193. package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
  194. package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
  195. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
  196. package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
  197. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
  198. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
  199. package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
  200. package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
  201. package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
  202. package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
  203. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
  204. package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
  205. package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
  206. package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
  207. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
  210. package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
  212. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
  213. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
  216. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
  217. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
  218. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
  219. package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
  220. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
  223. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
  224. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
  226. package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
  227. package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
  228. package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
  229. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
  230. package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
  231. package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
  232. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
  233. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
  234. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
  235. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
  236. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
  237. package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
  238. package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
  239. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
  240. package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
  241. package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
  242. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
  243. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
  244. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
  245. package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
  246. package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
  247. package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
  248. package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
  249. package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
  250. package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
  251. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
  252. package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
  253. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
  254. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
  255. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
  256. package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
  257. package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
  258. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
  259. package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
  260. package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
  261. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
  262. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
  263. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
  264. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
  265. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
  266. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
  267. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
  268. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
  269. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
  270. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
  271. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
  273. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
  274. package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
  280. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
  285. package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
  286. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
  287. package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
  288. package/deps/rocksdb/rocksdb/env/env.cc +632 -42
  289. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
  290. package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
  291. package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
  292. package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
  293. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
  294. package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
  295. package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
  296. package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
  297. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
  298. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
  299. package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
  300. package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
  301. package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
  302. package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
  303. package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
  304. package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
  305. package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
  306. package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
  307. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
  308. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
  309. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
  310. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
  313. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
  315. package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
  316. package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
  317. package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
  318. package/deps/rocksdb/rocksdb/file/filename.h +13 -8
  319. package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
  320. package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
  321. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
  324. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
  325. package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
  326. package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
  327. package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
  334. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
  335. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
  336. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
  342. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
  343. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
  344. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
  345. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
  346. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
  348. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
  349. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
  350. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
  352. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
  353. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
  354. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
  355. package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
  357. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
  358. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
  359. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
  360. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
  361. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
  362. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
  363. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
  364. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
  365. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
  366. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
  367. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
  368. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
  371. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
  372. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
  373. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
  375. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
  376. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
  378. package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
  380. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
  381. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
  382. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
  383. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
  386. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
  387. package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
  389. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
  407. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
  408. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
  409. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
  410. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
  412. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
  413. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
  414. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
  415. package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
  416. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
  417. package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
  418. package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
  419. package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
  420. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
  421. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
  422. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
  423. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
  424. package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
  425. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
  426. package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
  427. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
  428. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
  429. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
  430. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
  431. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
  432. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
  433. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
  434. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
  435. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
  436. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
  437. package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
  438. package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
  439. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
  440. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
  441. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
  442. package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
  443. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
  444. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
  445. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
  446. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
  447. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
  448. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
  449. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
  450. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
  451. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
  452. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
  453. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
  454. package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
  455. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
  456. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
  457. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
  458. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
  459. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
  460. package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
  461. package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
  462. package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
  463. package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
  464. package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
  465. package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
  466. package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
  467. package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
  468. package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
  469. package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
  470. package/deps/rocksdb/rocksdb/options/options.cc +49 -17
  471. package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
  472. package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
  473. package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
  474. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
  475. package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
  476. package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
  477. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
  478. package/deps/rocksdb/rocksdb/port/lang.h +52 -0
  479. package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
  480. package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
  481. package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
  482. package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
  483. package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
  484. package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
  485. package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
  486. package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
  487. package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
  488. package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
  489. package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
  490. package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
  491. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
  492. package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
  493. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
  494. package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
  495. package/deps/rocksdb/rocksdb/python.mk +9 -0
  496. package/deps/rocksdb/rocksdb/src.mk +82 -34
  497. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
  498. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
  499. package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
  500. package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
  501. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
  502. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
  503. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
  504. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
  505. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
  506. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
  507. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
  508. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
  509. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
  510. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
  511. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
  512. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
  513. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
  514. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
  515. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
  516. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
  517. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
  518. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
  519. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
  520. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
  521. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
  522. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
  523. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
  524. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
  525. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
  526. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
  527. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
  528. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
  529. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
  530. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
  531. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
  532. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  533. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
  534. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
  535. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
  536. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
  537. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
  538. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
  539. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
  540. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
  541. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
  542. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
  543. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
  544. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
  545. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
  546. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
  547. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
  548. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
  549. package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
  550. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
  551. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
  552. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
  553. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
  554. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
  555. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
  556. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
  557. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
  558. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
  559. package/deps/rocksdb/rocksdb/table/format.cc +258 -104
  560. package/deps/rocksdb/rocksdb/table/format.h +120 -109
  561. package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
  562. package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
  563. package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
  564. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
  565. package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
  566. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
  567. package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
  568. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
  569. package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
  570. package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
  571. package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
  572. package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
  573. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
  574. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
  575. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
  576. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
  577. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
  578. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
  579. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
  580. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
  581. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
  582. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
  583. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
  584. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
  585. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
  586. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
  587. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
  588. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
  589. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
  590. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
  591. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
  592. package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
  593. package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
  594. package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
  595. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
  596. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
  597. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
  598. package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
  599. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
  600. package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
  601. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
  602. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
  603. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
  604. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
  605. package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
  606. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
  607. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
  608. package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
  609. package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
  610. package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
  611. package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
  612. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
  613. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
  614. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
  615. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
  616. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
  617. package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
  618. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
  619. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
  620. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
  621. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
  622. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
  623. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
  624. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
  625. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
  626. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
  627. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
  628. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
  629. package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
  630. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
  631. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
  632. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
  633. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
  634. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
  635. package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
  636. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
  637. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
  638. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
  639. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
  640. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
  641. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
  642. package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
  643. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
  644. package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
  645. package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
  646. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
  647. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
  648. package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
  649. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
  650. package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
  651. package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
  652. package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
  653. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
  654. package/deps/rocksdb/rocksdb/util/channel.h +2 -0
  655. package/deps/rocksdb/rocksdb/util/coding.h +1 -33
  656. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
  657. package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
  658. package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
  659. package/deps/rocksdb/rocksdb/util/compression.h +212 -7
  660. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
  661. package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
  662. package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
  663. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
  664. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
  665. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
  666. package/deps/rocksdb/rocksdb/util/defer.h +30 -1
  667. package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
  668. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
  669. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
  670. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
  671. package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
  672. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
  673. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
  674. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
  675. package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
  676. package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
  677. package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
  678. package/deps/rocksdb/rocksdb/util/hash.h +31 -1
  679. package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
  680. package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
  681. package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
  682. package/deps/rocksdb/rocksdb/util/heap.h +6 -1
  683. package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
  684. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
  685. package/deps/rocksdb/rocksdb/util/math.h +74 -7
  686. package/deps/rocksdb/rocksdb/util/math128.h +13 -1
  687. package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
  688. package/deps/rocksdb/rocksdb/util/random.cc +9 -0
  689. package/deps/rocksdb/rocksdb/util/random.h +6 -0
  690. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
  691. package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
  692. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
  693. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
  694. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
  695. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
  696. package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
  697. package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
  698. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
  699. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
  700. package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
  701. package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
  702. package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
  703. package/deps/rocksdb/rocksdb/util/status.cc +32 -29
  704. package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
  705. package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
  706. package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
  707. package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
  708. package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
  709. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
  710. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
  711. package/deps/rocksdb/rocksdb/util/timer.h +55 -46
  712. package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
  713. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
  714. package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
  715. package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
  716. package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
  717. package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
  718. package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
  719. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
  720. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
  721. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
  722. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
  723. package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
  724. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
  725. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
  726. package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
  727. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
  728. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
  729. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
  730. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
  731. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
  732. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
  733. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
  734. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
  735. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
  736. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
  737. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
  738. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
  739. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
  740. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
  741. package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
  742. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
  743. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
  744. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
  745. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
  746. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
  747. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
  748. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
  749. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
  750. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
  751. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
  752. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
  753. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
  754. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
  755. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
  756. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
  757. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
  758. package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
  759. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
  760. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
  761. package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
  762. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
  763. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
  764. package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
  765. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
  766. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
  767. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
  768. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
  769. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
  770. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
  771. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
  772. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
  773. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
  774. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
  775. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
  776. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
  777. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
  778. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
  779. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
  780. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
  781. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
  782. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
  783. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
  784. package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
  785. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
  786. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
  787. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
  788. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
  789. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
  790. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
  791. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
  792. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
  793. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
  794. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
  795. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
  796. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
  797. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
  798. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
  799. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  800. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
  801. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
  802. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
  803. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
  804. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
  805. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
  806. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
  807. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
  808. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
  809. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
  810. package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
  811. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
  812. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
  813. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  814. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
  815. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
  816. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
  817. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
  818. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  819. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
  820. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
  821. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
  822. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
  823. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
  824. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
  825. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
  826. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
  827. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
  828. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
  829. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
  830. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
  831. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
  832. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
  833. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
  834. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
  835. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
  836. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
  837. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
  838. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
  839. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
  840. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
  841. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
  842. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
  843. package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
  844. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
  845. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
  846. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
  847. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
  848. package/deps/rocksdb/rocksdb.gyp +425 -446
  849. package/package.json +8 -8
  850. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  851. package/prebuilds/darwin-x86/node.napi.node +0 -0
  852. package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
  853. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
  854. package/deps/rocksdb/rocksdb/hdfs/README +0 -23
  855. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
  856. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
  857. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
  858. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
  859. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
  860. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
  861. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
  862. package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
  863. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
  864. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
  865. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
  866. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
  867. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
  868. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
  869. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
  870. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
  871. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
  872. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
  873. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
  874. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
  875. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
  876. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
  877. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
  878. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
  879. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
  880. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
  881. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
  882. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
  883. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
  884. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
  885. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
  886. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
  887. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
  888. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
  889. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
  890. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
  891. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
  892. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
  893. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
  894. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
  895. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
  896. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
  897. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
  898. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
  899. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
  900. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
  901. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
  902. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
  903. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
  904. package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
  905. package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
  906. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
  907. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
  908. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
  909. package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
@@ -22,12 +22,13 @@
22
22
  // kTypeColumnFamilySingleDeletion varint32 varstring
23
23
  // kTypeColumnFamilyRangeDeletion varint32 varstring varstring
24
24
  // kTypeColumnFamilyMerge varint32 varstring varstring
25
- // kTypeBeginPrepareXID varstring
26
- // kTypeEndPrepareXID
25
+ // kTypeBeginPrepareXID
26
+ // kTypeEndPrepareXID varstring
27
27
  // kTypeCommitXID varstring
28
+ // kTypeCommitXIDAndTimestamp varstring varstring
28
29
  // kTypeRollbackXID varstring
29
- // kTypeBeginPersistedPrepareXID varstring
30
- // kTypeBeginUnprepareXID varstring
30
+ // kTypeBeginPersistedPrepareXID
31
+ // kTypeBeginUnprepareXID
31
32
  // kTypeNoop
32
33
  // varstring :=
33
34
  // len: varint32
@@ -46,6 +47,7 @@
46
47
  #include "db/db_impl/db_impl.h"
47
48
  #include "db/dbformat.h"
48
49
  #include "db/flush_scheduler.h"
50
+ #include "db/kv_checksum.h"
49
51
  #include "db/memtable.h"
50
52
  #include "db/merge_context.h"
51
53
  #include "db/snapshot_impl.h"
@@ -55,6 +57,7 @@
55
57
  #include "monitoring/statistics.h"
56
58
  #include "port/lang.h"
57
59
  #include "rocksdb/merge_operator.h"
60
+ #include "rocksdb/system_clock.h"
58
61
  #include "util/autovector.h"
59
62
  #include "util/cast_util.h"
60
63
  #include "util/coding.h"
@@ -132,110 +135,16 @@ struct BatchContentClassifier : public WriteBatch::Handler {
132
135
  return Status::OK();
133
136
  }
134
137
 
135
- Status MarkRollback(const Slice&) override {
136
- content_flags |= ContentFlags::HAS_ROLLBACK;
137
- return Status::OK();
138
- }
139
- };
140
-
141
- class TimestampAssigner : public WriteBatch::Handler {
142
- public:
143
- explicit TimestampAssigner(const Slice& ts)
144
- : timestamp_(ts), timestamps_(kEmptyTimestampList) {}
145
- explicit TimestampAssigner(const std::vector<Slice>& ts_list)
146
- : timestamps_(ts_list) {
147
- SanityCheck();
148
- }
149
- ~TimestampAssigner() override {}
150
-
151
- Status PutCF(uint32_t, const Slice& key, const Slice&) override {
152
- AssignTimestamp(key);
153
- ++idx_;
154
- return Status::OK();
155
- }
156
-
157
- Status DeleteCF(uint32_t, const Slice& key) override {
158
- AssignTimestamp(key);
159
- ++idx_;
160
- return Status::OK();
161
- }
162
-
163
- Status SingleDeleteCF(uint32_t, const Slice& key) override {
164
- AssignTimestamp(key);
165
- ++idx_;
166
- return Status::OK();
167
- }
168
-
169
- Status DeleteRangeCF(uint32_t, const Slice& begin_key,
170
- const Slice& end_key) override {
171
- AssignTimestamp(begin_key);
172
- AssignTimestamp(end_key);
173
- ++idx_;
174
- return Status::OK();
175
- }
176
-
177
- Status MergeCF(uint32_t, const Slice& key, const Slice&) override {
178
- AssignTimestamp(key);
179
- ++idx_;
180
- return Status::OK();
181
- }
182
-
183
- Status PutBlobIndexCF(uint32_t, const Slice&, const Slice&) override {
184
- // TODO (yanqin): support blob db in the future.
185
- return Status::OK();
186
- }
187
-
188
- Status MarkBeginPrepare(bool) override {
189
- // TODO (yanqin): support in the future.
190
- return Status::OK();
191
- }
192
-
193
- Status MarkEndPrepare(const Slice&) override {
194
- // TODO (yanqin): support in the future.
195
- return Status::OK();
196
- }
197
-
198
- Status MarkCommit(const Slice&) override {
199
- // TODO (yanqin): support in the future.
138
+ Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
139
+ content_flags |= ContentFlags::HAS_COMMIT;
200
140
  return Status::OK();
201
141
  }
202
142
 
203
143
  Status MarkRollback(const Slice&) override {
204
- // TODO (yanqin): support in the future.
144
+ content_flags |= ContentFlags::HAS_ROLLBACK;
205
145
  return Status::OK();
206
146
  }
207
-
208
- private:
209
- void SanityCheck() const {
210
- assert(!timestamps_.empty());
211
- #ifndef NDEBUG
212
- const size_t ts_sz = timestamps_[0].size();
213
- for (size_t i = 1; i != timestamps_.size(); ++i) {
214
- assert(ts_sz == timestamps_[i].size());
215
- }
216
- #endif // !NDEBUG
217
- }
218
-
219
- void AssignTimestamp(const Slice& key) {
220
- assert(timestamps_.empty() || idx_ < timestamps_.size());
221
- const Slice& ts = timestamps_.empty() ? timestamp_ : timestamps_[idx_];
222
- size_t ts_sz = ts.size();
223
- char* ptr = const_cast<char*>(key.data() + key.size() - ts_sz);
224
- memcpy(ptr, ts.data(), ts_sz);
225
- }
226
-
227
- static const std::vector<Slice> kEmptyTimestampList;
228
- const Slice timestamp_;
229
- const std::vector<Slice>& timestamps_;
230
- size_t idx_ = 0;
231
-
232
- // No copy or move.
233
- TimestampAssigner(const TimestampAssigner&) = delete;
234
- TimestampAssigner(TimestampAssigner&&) = delete;
235
- TimestampAssigner& operator=(const TimestampAssigner&) = delete;
236
- TimestampAssigner&& operator=(TimestampAssigner&&) = delete;
237
147
  };
238
- const std::vector<Slice> TimestampAssigner::kEmptyTimestampList;
239
148
 
240
149
  } // anon namespace
241
150
 
@@ -244,42 +153,53 @@ struct SavePoints {
244
153
  };
245
154
 
246
155
  WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes)
247
- : content_flags_(0), max_bytes_(max_bytes), rep_(), timestamp_size_(0) {
156
+ : content_flags_(0), max_bytes_(max_bytes), rep_() {
248
157
  rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader)
249
158
  ? reserved_bytes
250
159
  : WriteBatchInternal::kHeader);
251
160
  rep_.resize(WriteBatchInternal::kHeader);
252
161
  }
253
162
 
254
- WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes, size_t ts_sz)
255
- : content_flags_(0), max_bytes_(max_bytes), rep_(), timestamp_size_(ts_sz) {
256
- rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader) ?
257
- reserved_bytes : WriteBatchInternal::kHeader);
163
+ WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes,
164
+ size_t protection_bytes_per_key, size_t default_cf_ts_sz)
165
+ : content_flags_(0),
166
+ max_bytes_(max_bytes),
167
+ default_cf_ts_sz_(default_cf_ts_sz),
168
+ rep_() {
169
+ // Currently `protection_bytes_per_key` can only be enabled at 8 bytes per
170
+ // entry.
171
+ assert(protection_bytes_per_key == 0 || protection_bytes_per_key == 8);
172
+ if (protection_bytes_per_key != 0) {
173
+ prot_info_.reset(new WriteBatch::ProtectionInfo());
174
+ }
175
+ rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader)
176
+ ? reserved_bytes
177
+ : WriteBatchInternal::kHeader);
258
178
  rep_.resize(WriteBatchInternal::kHeader);
259
179
  }
260
180
 
261
181
  WriteBatch::WriteBatch(const std::string& rep)
262
- : content_flags_(ContentFlags::DEFERRED),
263
- max_bytes_(0),
264
- rep_(rep),
265
- timestamp_size_(0) {}
182
+ : content_flags_(ContentFlags::DEFERRED), max_bytes_(0), rep_(rep) {}
266
183
 
267
184
  WriteBatch::WriteBatch(std::string&& rep)
268
185
  : content_flags_(ContentFlags::DEFERRED),
269
186
  max_bytes_(0),
270
- rep_(std::move(rep)),
271
- timestamp_size_(0) {}
187
+ rep_(std::move(rep)) {}
272
188
 
273
189
  WriteBatch::WriteBatch(const WriteBatch& src)
274
190
  : wal_term_point_(src.wal_term_point_),
275
191
  content_flags_(src.content_flags_.load(std::memory_order_relaxed)),
276
192
  max_bytes_(src.max_bytes_),
277
- rep_(src.rep_),
278
- timestamp_size_(src.timestamp_size_) {
193
+ default_cf_ts_sz_(src.default_cf_ts_sz_),
194
+ rep_(src.rep_) {
279
195
  if (src.save_points_ != nullptr) {
280
196
  save_points_.reset(new SavePoints());
281
197
  save_points_->stack = src.save_points_->stack;
282
198
  }
199
+ if (src.prot_info_ != nullptr) {
200
+ prot_info_.reset(new WriteBatch::ProtectionInfo());
201
+ prot_info_->entries_ = src.prot_info_->entries_;
202
+ }
283
203
  }
284
204
 
285
205
  WriteBatch::WriteBatch(WriteBatch&& src) noexcept
@@ -287,8 +207,9 @@ WriteBatch::WriteBatch(WriteBatch&& src) noexcept
287
207
  wal_term_point_(std::move(src.wal_term_point_)),
288
208
  content_flags_(src.content_flags_.load(std::memory_order_relaxed)),
289
209
  max_bytes_(src.max_bytes_),
290
- rep_(std::move(src.rep_)),
291
- timestamp_size_(src.timestamp_size_) {}
210
+ prot_info_(std::move(src.prot_info_)),
211
+ default_cf_ts_sz_(src.default_cf_ts_sz_),
212
+ rep_(std::move(src.rep_)) {}
292
213
 
293
214
  WriteBatch& WriteBatch::operator=(const WriteBatch& src) {
294
215
  if (&src != this) {
@@ -331,7 +252,11 @@ void WriteBatch::Clear() {
331
252
  }
332
253
  }
333
254
 
255
+ if (prot_info_ != nullptr) {
256
+ prot_info_->entries_.clear();
257
+ }
334
258
  wal_term_point_.clear();
259
+ default_cf_ts_sz_ = 0;
335
260
  }
336
261
 
337
262
  uint32_t WriteBatch::Count() const { return WriteBatchInternal::Count(this); }
@@ -359,6 +284,13 @@ void WriteBatch::MarkWalTerminationPoint() {
359
284
  wal_term_point_.content_flags = content_flags_;
360
285
  }
361
286
 
287
+ size_t WriteBatch::GetProtectionBytesPerKey() const {
288
+ if (prot_info_ != nullptr) {
289
+ return prot_info_->GetBytesPerKey();
290
+ }
291
+ return 0;
292
+ }
293
+
362
294
  bool WriteBatch::HasPut() const {
363
295
  return (ComputeContentFlags() & ContentFlags::HAS_PUT) != 0;
364
296
  }
@@ -496,6 +428,11 @@ Status ReadRecordFromWriteBatch(Slice* input, char* tag,
496
428
  return Status::Corruption("bad EndPrepare XID");
497
429
  }
498
430
  break;
431
+ case kTypeCommitXIDAndTimestamp:
432
+ if (!GetLengthPrefixedSlice(input, key)) {
433
+ return Status::Corruption("bad commit timestamp");
434
+ }
435
+ FALLTHROUGH_INTENDED;
499
436
  case kTypeCommitXID:
500
437
  if (!GetLengthPrefixedSlice(input, xid)) {
501
438
  return Status::Corruption("bad Commit XID");
@@ -705,6 +642,16 @@ Status WriteBatchInternal::Iterate(const WriteBatch* wb,
705
642
  assert(s.ok());
706
643
  empty_batch = true;
707
644
  break;
645
+ case kTypeCommitXIDAndTimestamp:
646
+ assert(wb->content_flags_.load(std::memory_order_relaxed) &
647
+ (ContentFlags::DEFERRED | ContentFlags::HAS_COMMIT));
648
+ // key stores the commit timestamp.
649
+ assert(!key.empty());
650
+ s = handler->MarkCommitWithTimestamp(xid, key);
651
+ if (LIKELY(s.ok())) {
652
+ empty_batch = true;
653
+ }
654
+ break;
708
655
  case kTypeRollbackXID:
709
656
  assert(wb->content_flags_.load(std::memory_order_relaxed) &
710
657
  (ContentFlags::DEFERRED | ContentFlags::HAS_ROLLBACK));
@@ -736,7 +683,7 @@ bool WriteBatchInternal::IsLatestPersistentState(const WriteBatch* b) {
736
683
  return b->is_latest_persistent_state_;
737
684
  }
738
685
 
739
- void WriteBatchInternal::SetAsLastestPersistentState(WriteBatch* b) {
686
+ void WriteBatchInternal::SetAsLatestPersistentState(WriteBatch* b) {
740
687
  b->is_latest_persistent_state_ = true;
741
688
  }
742
689
 
@@ -760,6 +707,45 @@ size_t WriteBatchInternal::GetFirstOffset(WriteBatch* /*b*/) {
760
707
  return WriteBatchInternal::kHeader;
761
708
  }
762
709
 
710
+ std::tuple<Status, uint32_t, size_t>
711
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(
712
+ WriteBatch* b, ColumnFamilyHandle* column_family) {
713
+ uint32_t cf_id = GetColumnFamilyID(column_family);
714
+ size_t ts_sz = 0;
715
+ Status s;
716
+ if (column_family) {
717
+ const Comparator* const ucmp = column_family->GetComparator();
718
+ if (ucmp) {
719
+ ts_sz = ucmp->timestamp_size();
720
+ if (0 == cf_id && b->default_cf_ts_sz_ != ts_sz) {
721
+ s = Status::InvalidArgument("Default cf timestamp size mismatch");
722
+ }
723
+ }
724
+ } else if (b->default_cf_ts_sz_ > 0) {
725
+ ts_sz = b->default_cf_ts_sz_;
726
+ }
727
+ return std::make_tuple(s, cf_id, ts_sz);
728
+ }
729
+
730
+ namespace {
731
+ Status CheckColumnFamilyTimestampSize(ColumnFamilyHandle* column_family,
732
+ const Slice& ts) {
733
+ if (!column_family) {
734
+ return Status::InvalidArgument("column family handle cannot be null");
735
+ }
736
+ const Comparator* const ucmp = column_family->GetComparator();
737
+ assert(ucmp);
738
+ size_t cf_ts_sz = ucmp->timestamp_size();
739
+ if (0 == cf_ts_sz) {
740
+ return Status::InvalidArgument("timestamp disabled");
741
+ }
742
+ if (cf_ts_sz != ts.size()) {
743
+ return Status::InvalidArgument("timestamp size mismatch");
744
+ }
745
+ return Status::OK();
746
+ }
747
+ } // namespace
748
+
763
749
  Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
764
750
  const Slice& key, const Slice& value) {
765
751
  if (key.size() > size_t{port::kMaxUint32}) {
@@ -777,25 +763,63 @@ Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
777
763
  b->rep_.push_back(static_cast<char>(kTypeColumnFamilyValue));
778
764
  PutVarint32(&b->rep_, column_family_id);
779
765
  }
780
- if (0 == b->timestamp_size_) {
781
- PutLengthPrefixedSlice(&b->rep_, key);
782
- } else {
783
- PutVarint32(&b->rep_,
784
- static_cast<uint32_t>(key.size() + b->timestamp_size_));
785
- b->rep_.append(key.data(), key.size());
786
- b->rep_.append(b->timestamp_size_, '\0');
787
- }
766
+ PutLengthPrefixedSlice(&b->rep_, key);
788
767
  PutLengthPrefixedSlice(&b->rep_, value);
789
768
  b->content_flags_.store(
790
769
  b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_PUT,
791
770
  std::memory_order_relaxed);
771
+ if (b->prot_info_ != nullptr) {
772
+ // Technically the optype could've been `kTypeColumnFamilyValue` with the
773
+ // CF ID encoded in the `WriteBatch`. That distinction is unimportant
774
+ // however since we verify CF ID is correct, as well as all other fields
775
+ // (a missing/extra encoded CF ID would corrupt another field). It is
776
+ // convenient to consolidate on `kTypeValue` here as that is what will be
777
+ // inserted into memtable.
778
+ b->prot_info_->entries_.emplace_back(ProtectionInfo64()
779
+ .ProtectKVO(key, value, kTypeValue)
780
+ .ProtectC(column_family_id));
781
+ }
792
782
  return save.commit();
793
783
  }
794
784
 
795
785
  Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key,
796
786
  const Slice& value) {
797
- return WriteBatchInternal::Put(this, GetColumnFamilyID(column_family), key,
798
- value);
787
+ size_t ts_sz = 0;
788
+ uint32_t cf_id = 0;
789
+ Status s;
790
+
791
+ std::tie(s, cf_id, ts_sz) =
792
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
793
+ column_family);
794
+
795
+ if (!s.ok()) {
796
+ return s;
797
+ }
798
+
799
+ if (0 == ts_sz) {
800
+ return WriteBatchInternal::Put(this, cf_id, key, value);
801
+ }
802
+
803
+ needs_in_place_update_ts_ = true;
804
+ has_key_with_ts_ = true;
805
+ std::string dummy_ts(ts_sz, '\0');
806
+ std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
807
+ return WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2),
808
+ SliceParts(&value, 1));
809
+ }
810
+
811
+ Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key,
812
+ const Slice& ts, const Slice& value) {
813
+ const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
814
+ if (!s.ok()) {
815
+ return s;
816
+ }
817
+ has_key_with_ts_ = true;
818
+ assert(column_family);
819
+ uint32_t cf_id = column_family->GetID();
820
+ std::array<Slice, 2> key_with_ts{{key, ts}};
821
+ return WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2),
822
+ SliceParts(&value, 1));
799
823
  }
800
824
 
801
825
  Status WriteBatchInternal::CheckSlicePartsLength(const SliceParts& key,
@@ -833,22 +857,41 @@ Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
833
857
  b->rep_.push_back(static_cast<char>(kTypeColumnFamilyValue));
834
858
  PutVarint32(&b->rep_, column_family_id);
835
859
  }
836
- if (0 == b->timestamp_size_) {
837
- PutLengthPrefixedSliceParts(&b->rep_, key);
838
- } else {
839
- PutLengthPrefixedSlicePartsWithPadding(&b->rep_, key, b->timestamp_size_);
840
- }
860
+ PutLengthPrefixedSliceParts(&b->rep_, key);
841
861
  PutLengthPrefixedSliceParts(&b->rep_, value);
842
862
  b->content_flags_.store(
843
863
  b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_PUT,
844
864
  std::memory_order_relaxed);
865
+ if (b->prot_info_ != nullptr) {
866
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
867
+ // `ValueType` argument passed to `ProtectKVO()`.
868
+ b->prot_info_->entries_.emplace_back(ProtectionInfo64()
869
+ .ProtectKVO(key, value, kTypeValue)
870
+ .ProtectC(column_family_id));
871
+ }
845
872
  return save.commit();
846
873
  }
847
874
 
848
875
  Status WriteBatch::Put(ColumnFamilyHandle* column_family, const SliceParts& key,
849
876
  const SliceParts& value) {
850
- return WriteBatchInternal::Put(this, GetColumnFamilyID(column_family), key,
851
- value);
877
+ size_t ts_sz = 0;
878
+ uint32_t cf_id = 0;
879
+ Status s;
880
+
881
+ std::tie(s, cf_id, ts_sz) =
882
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
883
+ column_family);
884
+
885
+ if (!s.ok()) {
886
+ return s;
887
+ }
888
+
889
+ if (ts_sz == 0) {
890
+ return WriteBatchInternal::Put(this, cf_id, key, value);
891
+ }
892
+
893
+ return Status::InvalidArgument(
894
+ "Cannot call this method on column family enabling timestamp");
852
895
  }
853
896
 
854
897
  Status WriteBatchInternal::InsertNoop(WriteBatch* b) {
@@ -897,6 +940,19 @@ Status WriteBatchInternal::MarkCommit(WriteBatch* b, const Slice& xid) {
897
940
  return Status::OK();
898
941
  }
899
942
 
943
+ Status WriteBatchInternal::MarkCommitWithTimestamp(WriteBatch* b,
944
+ const Slice& xid,
945
+ const Slice& commit_ts) {
946
+ assert(!commit_ts.empty());
947
+ b->rep_.push_back(static_cast<char>(kTypeCommitXIDAndTimestamp));
948
+ PutLengthPrefixedSlice(&b->rep_, commit_ts);
949
+ PutLengthPrefixedSlice(&b->rep_, xid);
950
+ b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
951
+ ContentFlags::HAS_COMMIT,
952
+ std::memory_order_relaxed);
953
+ return Status::OK();
954
+ }
955
+
900
956
  Status WriteBatchInternal::MarkRollback(WriteBatch* b, const Slice& xid) {
901
957
  b->rep_.push_back(static_cast<char>(kTypeRollbackXID));
902
958
  PutLengthPrefixedSlice(&b->rep_, xid);
@@ -916,23 +972,58 @@ Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
916
972
  b->rep_.push_back(static_cast<char>(kTypeColumnFamilyDeletion));
917
973
  PutVarint32(&b->rep_, column_family_id);
918
974
  }
919
- if (0 == b->timestamp_size_) {
920
- PutLengthPrefixedSlice(&b->rep_, key);
921
- } else {
922
- PutVarint32(&b->rep_,
923
- static_cast<uint32_t>(key.size() + b->timestamp_size_));
924
- b->rep_.append(key.data(), key.size());
925
- b->rep_.append(b->timestamp_size_, '\0');
926
- }
975
+ PutLengthPrefixedSlice(&b->rep_, key);
927
976
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
928
977
  ContentFlags::HAS_DELETE,
929
978
  std::memory_order_relaxed);
979
+ if (b->prot_info_ != nullptr) {
980
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
981
+ // `ValueType` argument passed to `ProtectKVO()`.
982
+ b->prot_info_->entries_.emplace_back(
983
+ ProtectionInfo64()
984
+ .ProtectKVO(key, "" /* value */, kTypeDeletion)
985
+ .ProtectC(column_family_id));
986
+ }
930
987
  return save.commit();
931
988
  }
932
989
 
933
990
  Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) {
934
- return WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family),
935
- key);
991
+ size_t ts_sz = 0;
992
+ uint32_t cf_id = 0;
993
+ Status s;
994
+
995
+ std::tie(s, cf_id, ts_sz) =
996
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
997
+ column_family);
998
+
999
+ if (!s.ok()) {
1000
+ return s;
1001
+ }
1002
+
1003
+ if (0 == ts_sz) {
1004
+ return WriteBatchInternal::Delete(this, cf_id, key);
1005
+ }
1006
+
1007
+ needs_in_place_update_ts_ = true;
1008
+ has_key_with_ts_ = true;
1009
+ std::string dummy_ts(ts_sz, '\0');
1010
+ std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
1011
+ return WriteBatchInternal::Delete(this, cf_id,
1012
+ SliceParts(key_with_ts.data(), 2));
1013
+ }
1014
+
1015
+ Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key,
1016
+ const Slice& ts) {
1017
+ const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
1018
+ if (!s.ok()) {
1019
+ return s;
1020
+ }
1021
+ assert(column_family);
1022
+ has_key_with_ts_ = true;
1023
+ uint32_t cf_id = column_family->GetID();
1024
+ std::array<Slice, 2> key_with_ts{{key, ts}};
1025
+ return WriteBatchInternal::Delete(this, cf_id,
1026
+ SliceParts(key_with_ts.data(), 2));
936
1027
  }
937
1028
 
938
1029
  Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
@@ -945,21 +1036,43 @@ Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
945
1036
  b->rep_.push_back(static_cast<char>(kTypeColumnFamilyDeletion));
946
1037
  PutVarint32(&b->rep_, column_family_id);
947
1038
  }
948
- if (0 == b->timestamp_size_) {
949
- PutLengthPrefixedSliceParts(&b->rep_, key);
950
- } else {
951
- PutLengthPrefixedSlicePartsWithPadding(&b->rep_, key, b->timestamp_size_);
952
- }
1039
+ PutLengthPrefixedSliceParts(&b->rep_, key);
953
1040
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
954
1041
  ContentFlags::HAS_DELETE,
955
1042
  std::memory_order_relaxed);
1043
+ if (b->prot_info_ != nullptr) {
1044
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1045
+ // `ValueType` argument passed to `ProtectKVO()`.
1046
+ b->prot_info_->entries_.emplace_back(
1047
+ ProtectionInfo64()
1048
+ .ProtectKVO(key,
1049
+ SliceParts(nullptr /* _parts */, 0 /* _num_parts */),
1050
+ kTypeDeletion)
1051
+ .ProtectC(column_family_id));
1052
+ }
956
1053
  return save.commit();
957
1054
  }
958
1055
 
959
1056
  Status WriteBatch::Delete(ColumnFamilyHandle* column_family,
960
1057
  const SliceParts& key) {
961
- return WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family),
962
- key);
1058
+ size_t ts_sz = 0;
1059
+ uint32_t cf_id = 0;
1060
+ Status s;
1061
+
1062
+ std::tie(s, cf_id, ts_sz) =
1063
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1064
+ column_family);
1065
+
1066
+ if (!s.ok()) {
1067
+ return s;
1068
+ }
1069
+
1070
+ if (0 == ts_sz) {
1071
+ return WriteBatchInternal::Delete(this, cf_id, key);
1072
+ }
1073
+
1074
+ return Status::InvalidArgument(
1075
+ "Cannot call this method on column family enabling timestamp");
963
1076
  }
964
1077
 
965
1078
  Status WriteBatchInternal::SingleDelete(WriteBatch* b,
@@ -977,13 +1090,55 @@ Status WriteBatchInternal::SingleDelete(WriteBatch* b,
977
1090
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
978
1091
  ContentFlags::HAS_SINGLE_DELETE,
979
1092
  std::memory_order_relaxed);
1093
+ if (b->prot_info_ != nullptr) {
1094
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1095
+ // `ValueType` argument passed to `ProtectKVO()`.
1096
+ b->prot_info_->entries_.emplace_back(
1097
+ ProtectionInfo64()
1098
+ .ProtectKVO(key, "" /* value */, kTypeSingleDeletion)
1099
+ .ProtectC(column_family_id));
1100
+ }
980
1101
  return save.commit();
981
1102
  }
982
1103
 
983
1104
  Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family,
984
1105
  const Slice& key) {
985
- return WriteBatchInternal::SingleDelete(
986
- this, GetColumnFamilyID(column_family), key);
1106
+ size_t ts_sz = 0;
1107
+ uint32_t cf_id = 0;
1108
+ Status s;
1109
+
1110
+ std::tie(s, cf_id, ts_sz) =
1111
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1112
+ column_family);
1113
+
1114
+ if (!s.ok()) {
1115
+ return s;
1116
+ }
1117
+
1118
+ if (0 == ts_sz) {
1119
+ return WriteBatchInternal::SingleDelete(this, cf_id, key);
1120
+ }
1121
+
1122
+ needs_in_place_update_ts_ = true;
1123
+ has_key_with_ts_ = true;
1124
+ std::string dummy_ts(ts_sz, '\0');
1125
+ std::array<Slice, 2> key_with_ts{{key, dummy_ts}};
1126
+ return WriteBatchInternal::SingleDelete(this, cf_id,
1127
+ SliceParts(key_with_ts.data(), 2));
1128
+ }
1129
+
1130
+ Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family,
1131
+ const Slice& key, const Slice& ts) {
1132
+ const Status s = CheckColumnFamilyTimestampSize(column_family, ts);
1133
+ if (!s.ok()) {
1134
+ return s;
1135
+ }
1136
+ has_key_with_ts_ = true;
1137
+ assert(column_family);
1138
+ uint32_t cf_id = column_family->GetID();
1139
+ std::array<Slice, 2> key_with_ts{{key, ts}};
1140
+ return WriteBatchInternal::SingleDelete(this, cf_id,
1141
+ SliceParts(key_with_ts.data(), 2));
987
1142
  }
988
1143
 
989
1144
  Status WriteBatchInternal::SingleDelete(WriteBatch* b,
@@ -1001,13 +1156,40 @@ Status WriteBatchInternal::SingleDelete(WriteBatch* b,
1001
1156
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
1002
1157
  ContentFlags::HAS_SINGLE_DELETE,
1003
1158
  std::memory_order_relaxed);
1159
+ if (b->prot_info_ != nullptr) {
1160
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1161
+ // `ValueType` argument passed to `ProtectKVO()`.
1162
+ b->prot_info_->entries_.emplace_back(
1163
+ ProtectionInfo64()
1164
+ .ProtectKVO(key,
1165
+ SliceParts(nullptr /* _parts */,
1166
+ 0 /* _num_parts */) /* value */,
1167
+ kTypeSingleDeletion)
1168
+ .ProtectC(column_family_id));
1169
+ }
1004
1170
  return save.commit();
1005
1171
  }
1006
1172
 
1007
1173
  Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family,
1008
1174
  const SliceParts& key) {
1009
- return WriteBatchInternal::SingleDelete(
1010
- this, GetColumnFamilyID(column_family), key);
1175
+ size_t ts_sz = 0;
1176
+ uint32_t cf_id = 0;
1177
+ Status s;
1178
+
1179
+ std::tie(s, cf_id, ts_sz) =
1180
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1181
+ column_family);
1182
+
1183
+ if (!s.ok()) {
1184
+ return s;
1185
+ }
1186
+
1187
+ if (0 == ts_sz) {
1188
+ return WriteBatchInternal::SingleDelete(this, cf_id, key);
1189
+ }
1190
+
1191
+ return Status::InvalidArgument(
1192
+ "Cannot call this method on column family enabling timestamp");
1011
1193
  }
1012
1194
 
1013
1195
  Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id,
@@ -1026,13 +1208,38 @@ Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id,
1026
1208
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
1027
1209
  ContentFlags::HAS_DELETE_RANGE,
1028
1210
  std::memory_order_relaxed);
1211
+ if (b->prot_info_ != nullptr) {
1212
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1213
+ // `ValueType` argument passed to `ProtectKVO()`.
1214
+ // In `DeleteRange()`, the end key is treated as the value.
1215
+ b->prot_info_->entries_.emplace_back(
1216
+ ProtectionInfo64()
1217
+ .ProtectKVO(begin_key, end_key, kTypeRangeDeletion)
1218
+ .ProtectC(column_family_id));
1219
+ }
1029
1220
  return save.commit();
1030
1221
  }
1031
1222
 
1032
1223
  Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family,
1033
1224
  const Slice& begin_key, const Slice& end_key) {
1034
- return WriteBatchInternal::DeleteRange(this, GetColumnFamilyID(column_family),
1035
- begin_key, end_key);
1225
+ size_t ts_sz = 0;
1226
+ uint32_t cf_id = 0;
1227
+ Status s;
1228
+
1229
+ std::tie(s, cf_id, ts_sz) =
1230
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1231
+ column_family);
1232
+
1233
+ if (!s.ok()) {
1234
+ return s;
1235
+ }
1236
+
1237
+ if (0 == ts_sz) {
1238
+ return WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key);
1239
+ }
1240
+
1241
+ return Status::InvalidArgument(
1242
+ "Cannot call this method on column family enabling timestamp");
1036
1243
  }
1037
1244
 
1038
1245
  Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id,
@@ -1051,14 +1258,39 @@ Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id,
1051
1258
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
1052
1259
  ContentFlags::HAS_DELETE_RANGE,
1053
1260
  std::memory_order_relaxed);
1261
+ if (b->prot_info_ != nullptr) {
1262
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1263
+ // `ValueType` argument passed to `ProtectKVO()`.
1264
+ // In `DeleteRange()`, the end key is treated as the value.
1265
+ b->prot_info_->entries_.emplace_back(
1266
+ ProtectionInfo64()
1267
+ .ProtectKVO(begin_key, end_key, kTypeRangeDeletion)
1268
+ .ProtectC(column_family_id));
1269
+ }
1054
1270
  return save.commit();
1055
1271
  }
1056
1272
 
1057
1273
  Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family,
1058
1274
  const SliceParts& begin_key,
1059
1275
  const SliceParts& end_key) {
1060
- return WriteBatchInternal::DeleteRange(this, GetColumnFamilyID(column_family),
1061
- begin_key, end_key);
1276
+ size_t ts_sz = 0;
1277
+ uint32_t cf_id = 0;
1278
+ Status s;
1279
+
1280
+ std::tie(s, cf_id, ts_sz) =
1281
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1282
+ column_family);
1283
+
1284
+ if (!s.ok()) {
1285
+ return s;
1286
+ }
1287
+
1288
+ if (0 == ts_sz) {
1289
+ return WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key);
1290
+ }
1291
+
1292
+ return Status::InvalidArgument(
1293
+ "Cannot call this method on column family enabling timestamp");
1062
1294
  }
1063
1295
 
1064
1296
  Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
@@ -1083,13 +1315,36 @@ Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
1083
1315
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
1084
1316
  ContentFlags::HAS_MERGE,
1085
1317
  std::memory_order_relaxed);
1318
+ if (b->prot_info_ != nullptr) {
1319
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1320
+ // `ValueType` argument passed to `ProtectKVO()`.
1321
+ b->prot_info_->entries_.emplace_back(ProtectionInfo64()
1322
+ .ProtectKVO(key, value, kTypeMerge)
1323
+ .ProtectC(column_family_id));
1324
+ }
1086
1325
  return save.commit();
1087
1326
  }
1088
1327
 
1089
1328
  Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key,
1090
1329
  const Slice& value) {
1091
- return WriteBatchInternal::Merge(this, GetColumnFamilyID(column_family), key,
1092
- value);
1330
+ size_t ts_sz = 0;
1331
+ uint32_t cf_id = 0;
1332
+ Status s;
1333
+
1334
+ std::tie(s, cf_id, ts_sz) =
1335
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1336
+ column_family);
1337
+
1338
+ if (!s.ok()) {
1339
+ return s;
1340
+ }
1341
+
1342
+ if (0 == ts_sz) {
1343
+ return WriteBatchInternal::Merge(this, cf_id, key, value);
1344
+ }
1345
+
1346
+ return Status::InvalidArgument(
1347
+ "Cannot call this method on column family enabling timestamp");
1093
1348
  }
1094
1349
 
1095
1350
  Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
@@ -1113,13 +1368,36 @@ Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
1113
1368
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
1114
1369
  ContentFlags::HAS_MERGE,
1115
1370
  std::memory_order_relaxed);
1371
+ if (b->prot_info_ != nullptr) {
1372
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1373
+ // `ValueType` argument passed to `ProtectKVO()`.
1374
+ b->prot_info_->entries_.emplace_back(ProtectionInfo64()
1375
+ .ProtectKVO(key, value, kTypeMerge)
1376
+ .ProtectC(column_family_id));
1377
+ }
1116
1378
  return save.commit();
1117
1379
  }
1118
1380
 
1119
1381
  Status WriteBatch::Merge(ColumnFamilyHandle* column_family,
1120
1382
  const SliceParts& key, const SliceParts& value) {
1121
- return WriteBatchInternal::Merge(this, GetColumnFamilyID(column_family), key,
1122
- value);
1383
+ size_t ts_sz = 0;
1384
+ uint32_t cf_id = 0;
1385
+ Status s;
1386
+
1387
+ std::tie(s, cf_id, ts_sz) =
1388
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
1389
+ column_family);
1390
+
1391
+ if (!s.ok()) {
1392
+ return s;
1393
+ }
1394
+
1395
+ if (0 == ts_sz) {
1396
+ return WriteBatchInternal::Merge(this, cf_id, key, value);
1397
+ }
1398
+
1399
+ return Status::InvalidArgument(
1400
+ "Cannot call this method on column family enabling timestamp");
1123
1401
  }
1124
1402
 
1125
1403
  Status WriteBatchInternal::PutBlobIndex(WriteBatch* b,
@@ -1138,6 +1416,14 @@ Status WriteBatchInternal::PutBlobIndex(WriteBatch* b,
1138
1416
  b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
1139
1417
  ContentFlags::HAS_BLOB_INDEX,
1140
1418
  std::memory_order_relaxed);
1419
+ if (b->prot_info_ != nullptr) {
1420
+ // See comment in first `WriteBatchInternal::Put()` overload concerning the
1421
+ // `ValueType` argument passed to `ProtectKVO()`.
1422
+ b->prot_info_->entries_.emplace_back(
1423
+ ProtectionInfo64()
1424
+ .ProtectKVO(key, value, kTypeBlobIndex)
1425
+ .ProtectC(column_family_id));
1426
+ }
1141
1427
  return save.commit();
1142
1428
  }
1143
1429
 
@@ -1176,6 +1462,9 @@ Status WriteBatch::RollbackToSavePoint() {
1176
1462
  Clear();
1177
1463
  } else {
1178
1464
  rep_.resize(savepoint.size);
1465
+ if (prot_info_ != nullptr) {
1466
+ prot_info_->entries_.resize(savepoint.count);
1467
+ }
1179
1468
  WriteBatchInternal::SetCount(this, savepoint.count);
1180
1469
  content_flags_.store(savepoint.content_flags, std::memory_order_relaxed);
1181
1470
  }
@@ -1194,14 +1483,15 @@ Status WriteBatch::PopSavePoint() {
1194
1483
  return Status::OK();
1195
1484
  }
1196
1485
 
1197
- Status WriteBatch::AssignTimestamp(const Slice& ts) {
1198
- TimestampAssigner ts_assigner(ts);
1199
- return Iterate(&ts_assigner);
1200
- }
1201
-
1202
- Status WriteBatch::AssignTimestamps(const std::vector<Slice>& ts_list) {
1203
- TimestampAssigner ts_assigner(ts_list);
1204
- return Iterate(&ts_assigner);
1486
+ Status WriteBatch::UpdateTimestamps(
1487
+ const Slice& ts, std::function<size_t(uint32_t)> ts_sz_func) {
1488
+ TimestampUpdater<decltype(ts_sz_func)> ts_updater(prot_info_.get(),
1489
+ std::move(ts_sz_func), ts);
1490
+ const Status s = Iterate(&ts_updater);
1491
+ if (s.ok()) {
1492
+ needs_in_place_update_ts_ = false;
1493
+ }
1494
+ return s;
1205
1495
  }
1206
1496
 
1207
1497
  class MemTableInserter : public WriteBatch::Handler {
@@ -1217,6 +1507,8 @@ class MemTableInserter : public WriteBatch::Handler {
1217
1507
  DBImpl* db_;
1218
1508
  const bool concurrent_memtable_writes_;
1219
1509
  bool post_info_created_;
1510
+ const WriteBatch::ProtectionInfo* prot_info_;
1511
+ size_t prot_info_idx_;
1220
1512
 
1221
1513
  bool* has_valid_writes_;
1222
1514
  // On some (!) platforms just default creating
@@ -1279,6 +1571,16 @@ class MemTableInserter : public WriteBatch::Handler {
1279
1571
  (&duplicate_detector_)->IsDuplicateKeySeq(column_family_id, key, sequence_);
1280
1572
  }
1281
1573
 
1574
+ const ProtectionInfoKVOC64* NextProtectionInfo() {
1575
+ const ProtectionInfoKVOC64* res = nullptr;
1576
+ if (prot_info_ != nullptr) {
1577
+ assert(prot_info_idx_ < prot_info_->entries_.size());
1578
+ res = &prot_info_->entries_[prot_info_idx_];
1579
+ ++prot_info_idx_;
1580
+ }
1581
+ return res;
1582
+ }
1583
+
1282
1584
  protected:
1283
1585
  bool WriteBeforePrepare() const override { return write_before_prepare_; }
1284
1586
  bool WriteAfterCommit() const override { return write_after_commit_; }
@@ -1291,6 +1593,7 @@ class MemTableInserter : public WriteBatch::Handler {
1291
1593
  bool ignore_missing_column_families,
1292
1594
  uint64_t recovering_log_number, DB* db,
1293
1595
  bool concurrent_memtable_writes,
1596
+ const WriteBatch::ProtectionInfo* prot_info,
1294
1597
  bool* has_valid_writes = nullptr, bool seq_per_batch = false,
1295
1598
  bool batch_per_txn = true, bool hint_per_batch = false)
1296
1599
  : sequence_(_sequence),
@@ -1303,6 +1606,8 @@ class MemTableInserter : public WriteBatch::Handler {
1303
1606
  db_(static_cast_with_check<DBImpl>(db)),
1304
1607
  concurrent_memtable_writes_(concurrent_memtable_writes),
1305
1608
  post_info_created_(false),
1609
+ prot_info_(prot_info),
1610
+ prot_info_idx_(0),
1306
1611
  has_valid_writes_(has_valid_writes),
1307
1612
  rebuilding_trx_(nullptr),
1308
1613
  rebuilding_trx_seq_(0),
@@ -1360,6 +1665,10 @@ class MemTableInserter : public WriteBatch::Handler {
1360
1665
  }
1361
1666
 
1362
1667
  void set_log_number_ref(uint64_t log) { log_number_ref_ = log; }
1668
+ void set_prot_info(const WriteBatch::ProtectionInfo* prot_info) {
1669
+ prot_info_ = prot_info;
1670
+ prot_info_idx_ = 0;
1671
+ }
1363
1672
 
1364
1673
  SequenceNumber sequence() const { return sequence_; }
1365
1674
 
@@ -1415,9 +1724,11 @@ class MemTableInserter : public WriteBatch::Handler {
1415
1724
  }
1416
1725
 
1417
1726
  Status PutCFImpl(uint32_t column_family_id, const Slice& key,
1418
- const Slice& value, ValueType value_type) {
1727
+ const Slice& value, ValueType value_type,
1728
+ const ProtectionInfoKVOS64* kv_prot_info) {
1419
1729
  // optimize for non-recovery mode
1420
1730
  if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
1731
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1421
1732
  return WriteBatchInternal::Put(rebuilding_trx_, column_family_id, key,
1422
1733
  value);
1423
1734
  // else insert the values to the memtable right away
@@ -1429,6 +1740,7 @@ class MemTableInserter : public WriteBatch::Handler {
1429
1740
  assert(!write_after_commit_);
1430
1741
  // The CF is probably flushed and hence no need for insert but we still
1431
1742
  // need to keep track of the keys for upcoming rollback/commit.
1743
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1432
1744
  ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
1433
1745
  key, value);
1434
1746
  if (ret_status.ok()) {
@@ -1448,15 +1760,15 @@ class MemTableInserter : public WriteBatch::Handler {
1448
1760
  assert(!seq_per_batch_ || !moptions->inplace_update_support);
1449
1761
  if (!moptions->inplace_update_support) {
1450
1762
  ret_status =
1451
- mem->Add(sequence_, value_type, key, value,
1763
+ mem->Add(sequence_, value_type, key, value, kv_prot_info,
1452
1764
  concurrent_memtable_writes_, get_post_process_info(mem),
1453
1765
  hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
1454
1766
  } else if (moptions->inplace_callback == nullptr) {
1455
1767
  assert(!concurrent_memtable_writes_);
1456
- ret_status = mem->Update(sequence_, key, value);
1768
+ ret_status = mem->Update(sequence_, key, value, kv_prot_info);
1457
1769
  } else {
1458
1770
  assert(!concurrent_memtable_writes_);
1459
- ret_status = mem->UpdateCallback(sequence_, key, value);
1771
+ ret_status = mem->UpdateCallback(sequence_, key, value, kv_prot_info);
1460
1772
  if (ret_status.IsNotFound()) {
1461
1773
  // key not found in memtable. Do sst get, update, add
1462
1774
  SnapshotImpl read_from_snapshot;
@@ -1484,7 +1796,6 @@ class MemTableInserter : public WriteBatch::Handler {
1484
1796
  } else {
1485
1797
  ret_status = Status::OK();
1486
1798
  }
1487
-
1488
1799
  if (ret_status.ok()) {
1489
1800
  UpdateStatus update_status;
1490
1801
  char* prev_buffer = const_cast<char*>(prev_value.c_str());
@@ -1499,16 +1810,35 @@ class MemTableInserter : public WriteBatch::Handler {
1499
1810
  }
1500
1811
  if (update_status == UpdateStatus::UPDATED_INPLACE) {
1501
1812
  assert(get_status.ok());
1502
- // prev_value is updated in-place with final value.
1503
- ret_status = mem->Add(sequence_, value_type, key,
1504
- Slice(prev_buffer, prev_size));
1813
+ if (kv_prot_info != nullptr) {
1814
+ ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
1815
+ updated_kv_prot_info.UpdateV(value,
1816
+ Slice(prev_buffer, prev_size));
1817
+ // prev_value is updated in-place with final value.
1818
+ ret_status = mem->Add(sequence_, value_type, key,
1819
+ Slice(prev_buffer, prev_size),
1820
+ &updated_kv_prot_info);
1821
+ } else {
1822
+ ret_status = mem->Add(sequence_, value_type, key,
1823
+ Slice(prev_buffer, prev_size),
1824
+ nullptr /* kv_prot_info */);
1825
+ }
1505
1826
  if (ret_status.ok()) {
1506
1827
  RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN);
1507
1828
  }
1508
1829
  } else if (update_status == UpdateStatus::UPDATED) {
1509
- // merged_value contains the final value.
1510
- ret_status =
1511
- mem->Add(sequence_, value_type, key, Slice(merged_value));
1830
+ if (kv_prot_info != nullptr) {
1831
+ ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
1832
+ updated_kv_prot_info.UpdateV(value, merged_value);
1833
+ // merged_value contains the final value.
1834
+ ret_status = mem->Add(sequence_, value_type, key,
1835
+ Slice(merged_value), &updated_kv_prot_info);
1836
+ } else {
1837
+ // merged_value contains the final value.
1838
+ ret_status =
1839
+ mem->Add(sequence_, value_type, key, Slice(merged_value),
1840
+ nullptr /* kv_prot_info */);
1841
+ }
1512
1842
  if (ret_status.ok()) {
1513
1843
  RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN);
1514
1844
  }
@@ -1531,6 +1861,7 @@ class MemTableInserter : public WriteBatch::Handler {
1531
1861
  // away. So we only need to add to it when `ret_status.ok()`.
1532
1862
  if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
1533
1863
  assert(!write_after_commit_);
1864
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1534
1865
  ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
1535
1866
  key, value);
1536
1867
  }
@@ -1539,15 +1870,25 @@ class MemTableInserter : public WriteBatch::Handler {
1539
1870
 
1540
1871
  Status PutCF(uint32_t column_family_id, const Slice& key,
1541
1872
  const Slice& value) override {
1542
- return PutCFImpl(column_family_id, key, value, kTypeValue);
1873
+ const auto* kv_prot_info = NextProtectionInfo();
1874
+ if (kv_prot_info != nullptr) {
1875
+ // Memtable needs seqno, doesn't need CF ID
1876
+ auto mem_kv_prot_info =
1877
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
1878
+ return PutCFImpl(column_family_id, key, value, kTypeValue,
1879
+ &mem_kv_prot_info);
1880
+ }
1881
+ return PutCFImpl(column_family_id, key, value, kTypeValue,
1882
+ nullptr /* kv_prot_info */);
1543
1883
  }
1544
1884
 
1545
1885
  Status DeleteImpl(uint32_t /*column_family_id*/, const Slice& key,
1546
- const Slice& value, ValueType delete_type) {
1886
+ const Slice& value, ValueType delete_type,
1887
+ const ProtectionInfoKVOS64* kv_prot_info) {
1547
1888
  Status ret_status;
1548
1889
  MemTable* mem = cf_mems_->GetMemTable();
1549
1890
  ret_status =
1550
- mem->Add(sequence_, delete_type, key, value,
1891
+ mem->Add(sequence_, delete_type, key, value, kv_prot_info,
1551
1892
  concurrent_memtable_writes_, get_post_process_info(mem),
1552
1893
  hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
1553
1894
  if (UNLIKELY(ret_status.IsTryAgain())) {
@@ -1562,8 +1903,10 @@ class MemTableInserter : public WriteBatch::Handler {
1562
1903
  }
1563
1904
 
1564
1905
  Status DeleteCF(uint32_t column_family_id, const Slice& key) override {
1906
+ const auto* kv_prot_info = NextProtectionInfo();
1565
1907
  // optimize for non-recovery mode
1566
1908
  if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
1909
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1567
1910
  return WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
1568
1911
  // else insert the values to the memtable right away
1569
1912
  }
@@ -1574,6 +1917,7 @@ class MemTableInserter : public WriteBatch::Handler {
1574
1917
  assert(!write_after_commit_);
1575
1918
  // The CF is probably flushed and hence no need for insert but we still
1576
1919
  // need to keep track of the keys for upcoming rollback/commit.
1920
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1577
1921
  ret_status =
1578
1922
  WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
1579
1923
  if (ret_status.ok()) {
@@ -1592,7 +1936,16 @@ class MemTableInserter : public WriteBatch::Handler {
1592
1936
  : 0;
1593
1937
  const ValueType delete_type =
1594
1938
  (0 == ts_sz) ? kTypeDeletion : kTypeDeletionWithTimestamp;
1595
- ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type);
1939
+ if (kv_prot_info != nullptr) {
1940
+ auto mem_kv_prot_info =
1941
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
1942
+ mem_kv_prot_info.UpdateO(kTypeDeletion, delete_type);
1943
+ ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type,
1944
+ &mem_kv_prot_info);
1945
+ } else {
1946
+ ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type,
1947
+ nullptr /* kv_prot_info */);
1948
+ }
1596
1949
  // optimize for non-recovery mode
1597
1950
  // If `ret_status` is `TryAgain` then the next (successful) try will add
1598
1951
  // the key to the rebuilding transaction object. If `ret_status` is
@@ -1600,6 +1953,7 @@ class MemTableInserter : public WriteBatch::Handler {
1600
1953
  // away. So we only need to add to it when `ret_status.ok()`.
1601
1954
  if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
1602
1955
  assert(!write_after_commit_);
1956
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1603
1957
  ret_status =
1604
1958
  WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
1605
1959
  }
@@ -1607,8 +1961,10 @@ class MemTableInserter : public WriteBatch::Handler {
1607
1961
  }
1608
1962
 
1609
1963
  Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override {
1964
+ const auto* kv_prot_info = NextProtectionInfo();
1610
1965
  // optimize for non-recovery mode
1611
1966
  if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
1967
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1612
1968
  return WriteBatchInternal::SingleDelete(rebuilding_trx_, column_family_id,
1613
1969
  key);
1614
1970
  // else insert the values to the memtable right away
@@ -1620,6 +1976,7 @@ class MemTableInserter : public WriteBatch::Handler {
1620
1976
  assert(!write_after_commit_);
1621
1977
  // The CF is probably flushed and hence no need for insert but we still
1622
1978
  // need to keep track of the keys for upcoming rollback/commit.
1979
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1623
1980
  ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
1624
1981
  column_family_id, key);
1625
1982
  if (ret_status.ok()) {
@@ -1632,8 +1989,15 @@ class MemTableInserter : public WriteBatch::Handler {
1632
1989
  }
1633
1990
  assert(ret_status.ok());
1634
1991
 
1635
- ret_status =
1636
- DeleteImpl(column_family_id, key, Slice(), kTypeSingleDeletion);
1992
+ if (kv_prot_info != nullptr) {
1993
+ auto mem_kv_prot_info =
1994
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
1995
+ ret_status = DeleteImpl(column_family_id, key, Slice(),
1996
+ kTypeSingleDeletion, &mem_kv_prot_info);
1997
+ } else {
1998
+ ret_status = DeleteImpl(column_family_id, key, Slice(),
1999
+ kTypeSingleDeletion, nullptr /* kv_prot_info */);
2000
+ }
1637
2001
  // optimize for non-recovery mode
1638
2002
  // If `ret_status` is `TryAgain` then the next (successful) try will add
1639
2003
  // the key to the rebuilding transaction object. If `ret_status` is
@@ -1641,6 +2005,7 @@ class MemTableInserter : public WriteBatch::Handler {
1641
2005
  // away. So we only need to add to it when `ret_status.ok()`.
1642
2006
  if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
1643
2007
  assert(!write_after_commit_);
2008
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1644
2009
  ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
1645
2010
  column_family_id, key);
1646
2011
  }
@@ -1649,8 +2014,10 @@ class MemTableInserter : public WriteBatch::Handler {
1649
2014
 
1650
2015
  Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key,
1651
2016
  const Slice& end_key) override {
2017
+ const auto* kv_prot_info = NextProtectionInfo();
1652
2018
  // optimize for non-recovery mode
1653
2019
  if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
2020
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1654
2021
  return WriteBatchInternal::DeleteRange(rebuilding_trx_, column_family_id,
1655
2022
  begin_key, end_key);
1656
2023
  // else insert the values to the memtable right away
@@ -1662,6 +2029,7 @@ class MemTableInserter : public WriteBatch::Handler {
1662
2029
  assert(!write_after_commit_);
1663
2030
  // The CF is probably flushed and hence no need for insert but we still
1664
2031
  // need to keep track of the keys for upcoming rollback/commit.
2032
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1665
2033
  ret_status = WriteBatchInternal::DeleteRange(
1666
2034
  rebuilding_trx_, column_family_id, begin_key, end_key);
1667
2035
  if (ret_status.ok()) {
@@ -1704,8 +2072,15 @@ class MemTableInserter : public WriteBatch::Handler {
1704
2072
  }
1705
2073
  }
1706
2074
 
1707
- ret_status =
1708
- DeleteImpl(column_family_id, begin_key, end_key, kTypeRangeDeletion);
2075
+ if (kv_prot_info != nullptr) {
2076
+ auto mem_kv_prot_info =
2077
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2078
+ ret_status = DeleteImpl(column_family_id, begin_key, end_key,
2079
+ kTypeRangeDeletion, &mem_kv_prot_info);
2080
+ } else {
2081
+ ret_status = DeleteImpl(column_family_id, begin_key, end_key,
2082
+ kTypeRangeDeletion, nullptr /* kv_prot_info */);
2083
+ }
1709
2084
  // optimize for non-recovery mode
1710
2085
  // If `ret_status` is `TryAgain` then the next (successful) try will add
1711
2086
  // the key to the rebuilding transaction object. If `ret_status` is
@@ -1713,6 +2088,7 @@ class MemTableInserter : public WriteBatch::Handler {
1713
2088
  // away. So we only need to add to it when `ret_status.ok()`.
1714
2089
  if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) {
1715
2090
  assert(!write_after_commit_);
2091
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1716
2092
  ret_status = WriteBatchInternal::DeleteRange(
1717
2093
  rebuilding_trx_, column_family_id, begin_key, end_key);
1718
2094
  }
@@ -1721,8 +2097,10 @@ class MemTableInserter : public WriteBatch::Handler {
1721
2097
 
1722
2098
  Status MergeCF(uint32_t column_family_id, const Slice& key,
1723
2099
  const Slice& value) override {
2100
+ const auto* kv_prot_info = NextProtectionInfo();
1724
2101
  // optimize for non-recovery mode
1725
2102
  if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
2103
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1726
2104
  return WriteBatchInternal::Merge(rebuilding_trx_, column_family_id, key,
1727
2105
  value);
1728
2106
  // else insert the values to the memtable right away
@@ -1734,6 +2112,7 @@ class MemTableInserter : public WriteBatch::Handler {
1734
2112
  assert(!write_after_commit_);
1735
2113
  // The CF is probably flushed and hence no need for insert but we still
1736
2114
  // need to keep track of the keys for upcoming rollback/commit.
2115
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1737
2116
  ret_status = WriteBatchInternal::Merge(rebuilding_trx_,
1738
2117
  column_family_id, key, value);
1739
2118
  if (ret_status.ok()) {
@@ -1801,10 +2180,10 @@ class MemTableInserter : public WriteBatch::Handler {
1801
2180
  assert(merge_operator);
1802
2181
 
1803
2182
  std::string new_value;
1804
-
1805
2183
  Status merge_status = MergeHelper::TimedFullMerge(
1806
2184
  merge_operator, key, &get_value_slice, {value}, &new_value,
1807
- moptions->info_log, moptions->statistics, Env::Default());
2185
+ moptions->info_log, moptions->statistics,
2186
+ SystemClock::Default().get());
1808
2187
 
1809
2188
  if (!merge_status.ok()) {
1810
2189
  // Failed to merge!
@@ -1813,16 +2192,35 @@ class MemTableInserter : public WriteBatch::Handler {
1813
2192
  } else {
1814
2193
  // 3) Add value to memtable
1815
2194
  assert(!concurrent_memtable_writes_);
1816
- ret_status = mem->Add(sequence_, kTypeValue, key, new_value);
2195
+ if (kv_prot_info != nullptr) {
2196
+ auto merged_kv_prot_info =
2197
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2198
+ merged_kv_prot_info.UpdateV(value, new_value);
2199
+ merged_kv_prot_info.UpdateO(kTypeMerge, kTypeValue);
2200
+ ret_status = mem->Add(sequence_, kTypeValue, key, new_value,
2201
+ &merged_kv_prot_info);
2202
+ } else {
2203
+ ret_status = mem->Add(sequence_, kTypeValue, key, new_value,
2204
+ nullptr /* kv_prot_info */);
2205
+ }
1817
2206
  }
1818
2207
  }
1819
2208
  }
1820
2209
 
1821
2210
  if (!perform_merge) {
2211
+ assert(ret_status.ok());
1822
2212
  // Add merge operand to memtable
1823
- ret_status =
1824
- mem->Add(sequence_, kTypeMerge, key, value,
1825
- concurrent_memtable_writes_, get_post_process_info(mem));
2213
+ if (kv_prot_info != nullptr) {
2214
+ auto mem_kv_prot_info =
2215
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2216
+ ret_status =
2217
+ mem->Add(sequence_, kTypeMerge, key, value, &mem_kv_prot_info,
2218
+ concurrent_memtable_writes_, get_post_process_info(mem));
2219
+ } else {
2220
+ ret_status = mem->Add(
2221
+ sequence_, kTypeMerge, key, value, nullptr /* kv_prot_info */,
2222
+ concurrent_memtable_writes_, get_post_process_info(mem));
2223
+ }
1826
2224
  }
1827
2225
 
1828
2226
  if (UNLIKELY(ret_status.IsTryAgain())) {
@@ -1840,6 +2238,7 @@ class MemTableInserter : public WriteBatch::Handler {
1840
2238
  // away. So we only need to add to it when `ret_status.ok()`.
1841
2239
  if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
1842
2240
  assert(!write_after_commit_);
2241
+ // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
1843
2242
  ret_status = WriteBatchInternal::Merge(rebuilding_trx_, column_family_id,
1844
2243
  key, value);
1845
2244
  }
@@ -1848,8 +2247,18 @@ class MemTableInserter : public WriteBatch::Handler {
1848
2247
 
1849
2248
  Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key,
1850
2249
  const Slice& value) override {
1851
- // Same as PutCF except for value type.
1852
- return PutCFImpl(column_family_id, key, value, kTypeBlobIndex);
2250
+ const auto* kv_prot_info = NextProtectionInfo();
2251
+ if (kv_prot_info != nullptr) {
2252
+ // Memtable needs seqno, doesn't need CF ID
2253
+ auto mem_kv_prot_info =
2254
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2255
+ // Same as PutCF except for value type.
2256
+ return PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
2257
+ &mem_kv_prot_info);
2258
+ } else {
2259
+ return PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
2260
+ nullptr /* kv_prot_info */);
2261
+ }
1853
2262
  }
1854
2263
 
1855
2264
  void CheckMemtableFull() {
@@ -1881,8 +2290,8 @@ class MemTableInserter : public WriteBatch::Handler {
1881
2290
  const MemTable* const mem = cfd->mem();
1882
2291
  assert(mem);
1883
2292
 
1884
- if (mem->ApproximateMemoryUsageFast() +
1885
- imm->ApproximateMemoryUsageExcludingLast() >=
2293
+ if (mem->MemoryAllocatedBytes() +
2294
+ imm->MemoryAllocatedBytesExcludingLast() >=
1886
2295
  size_to_maintain &&
1887
2296
  imm->MarkTrimHistoryNeeded()) {
1888
2297
  trim_history_scheduler_->ScheduleWork(cfd);
@@ -1899,6 +2308,7 @@ class MemTableInserter : public WriteBatch::Handler {
1899
2308
  assert(db_);
1900
2309
 
1901
2310
  if (recovering_log_number_ != 0) {
2311
+ db_->mutex()->AssertHeld();
1902
2312
  // during recovery we rebuild a hollow transaction
1903
2313
  // from all encountered prepare sections of the wal
1904
2314
  if (db_->allow_2pc() == false) {
@@ -1929,6 +2339,7 @@ class MemTableInserter : public WriteBatch::Handler {
1929
2339
  assert((rebuilding_trx_ != nullptr) == (recovering_log_number_ != 0));
1930
2340
 
1931
2341
  if (recovering_log_number_ != 0) {
2342
+ db_->mutex()->AssertHeld();
1932
2343
  assert(db_->allow_2pc());
1933
2344
  size_t batch_cnt =
1934
2345
  write_after_commit_
@@ -1949,6 +2360,9 @@ class MemTableInserter : public WriteBatch::Handler {
1949
2360
  }
1950
2361
 
1951
2362
  Status MarkNoop(bool empty_batch) override {
2363
+ if (recovering_log_number_ != 0) {
2364
+ db_->mutex()->AssertHeld();
2365
+ }
1952
2366
  // A hack in pessimistic transaction could result into a noop at the start
1953
2367
  // of the write batch, that should be ignored.
1954
2368
  if (!empty_batch) {
@@ -1967,6 +2381,8 @@ class MemTableInserter : public WriteBatch::Handler {
1967
2381
  Status s;
1968
2382
 
1969
2383
  if (recovering_log_number_ != 0) {
2384
+ // We must hold db mutex in recovery.
2385
+ db_->mutex()->AssertHeld();
1970
2386
  // in recovery when we encounter a commit marker
1971
2387
  // we lookup this transaction in our set of rebuilt transactions
1972
2388
  // and commit.
@@ -2009,6 +2425,72 @@ class MemTableInserter : public WriteBatch::Handler {
2009
2425
  return s;
2010
2426
  }
2011
2427
 
2428
+ Status MarkCommitWithTimestamp(const Slice& name,
2429
+ const Slice& commit_ts) override {
2430
+ assert(db_);
2431
+
2432
+ Status s;
2433
+
2434
+ if (recovering_log_number_ != 0) {
2435
+ // In recovery, db mutex must be held.
2436
+ db_->mutex()->AssertHeld();
2437
+ // in recovery when we encounter a commit marker
2438
+ // we lookup this transaction in our set of rebuilt transactions
2439
+ // and commit.
2440
+ auto trx = db_->GetRecoveredTransaction(name.ToString());
2441
+ // the log containing the prepared section may have
2442
+ // been released in the last incarnation because the
2443
+ // data was flushed to L0
2444
+ if (trx) {
2445
+ // at this point individual CF lognumbers will prevent
2446
+ // duplicate re-insertion of values.
2447
+ assert(0 == log_number_ref_);
2448
+ if (write_after_commit_) {
2449
+ // write_after_commit_ can only have one batch in trx.
2450
+ assert(trx->batches_.size() == 1);
2451
+ const auto& batch_info = trx->batches_.begin()->second;
2452
+ // all inserts must reference this trx log number
2453
+ log_number_ref_ = batch_info.log_number_;
2454
+
2455
+ s = batch_info.batch_->UpdateTimestamps(
2456
+ commit_ts, [this](uint32_t cf) {
2457
+ assert(db_);
2458
+ VersionSet* const vset = db_->GetVersionSet();
2459
+ assert(vset);
2460
+ ColumnFamilySet* const cf_set = vset->GetColumnFamilySet();
2461
+ assert(cf_set);
2462
+ ColumnFamilyData* cfd = cf_set->GetColumnFamily(cf);
2463
+ assert(cfd);
2464
+ const auto* const ucmp = cfd->user_comparator();
2465
+ assert(ucmp);
2466
+ return ucmp->timestamp_size();
2467
+ });
2468
+ if (s.ok()) {
2469
+ s = batch_info.batch_->Iterate(this);
2470
+ log_number_ref_ = 0;
2471
+ }
2472
+ }
2473
+ // else the values are already inserted before the commit
2474
+
2475
+ if (s.ok()) {
2476
+ db_->DeleteRecoveredTransaction(name.ToString());
2477
+ }
2478
+ if (has_valid_writes_) {
2479
+ *has_valid_writes_ = true;
2480
+ }
2481
+ }
2482
+ } else {
2483
+ // When writes are not delayed until commit, there is no connection
2484
+ // between a memtable write and the WAL that supports it. So the commit
2485
+ // need not reference any log as the only log to which it depends.
2486
+ assert(!write_after_commit_ || log_number_ref_ > 0);
2487
+ }
2488
+ constexpr bool batch_boundary = true;
2489
+ MaybeAdvanceSeq(batch_boundary);
2490
+
2491
+ return s;
2492
+ }
2493
+
2012
2494
  Status MarkRollback(const Slice& name) override {
2013
2495
  assert(db_);
2014
2496
 
@@ -2055,8 +2537,8 @@ Status WriteBatchInternal::InsertInto(
2055
2537
  MemTableInserter inserter(
2056
2538
  sequence, memtables, flush_scheduler, trim_history_scheduler,
2057
2539
  ignore_missing_column_families, recovery_log_number, db,
2058
- concurrent_memtable_writes, nullptr /*has_valid_writes*/, seq_per_batch,
2059
- batch_per_txn);
2540
+ concurrent_memtable_writes, nullptr /* prot_info */,
2541
+ nullptr /*has_valid_writes*/, seq_per_batch, batch_per_txn);
2060
2542
  for (auto w : write_group) {
2061
2543
  if (w->CallbackFailed()) {
2062
2544
  continue;
@@ -2069,6 +2551,7 @@ Status WriteBatchInternal::InsertInto(
2069
2551
  }
2070
2552
  SetSequence(w->batch, inserter.sequence());
2071
2553
  inserter.set_log_number_ref(w->log_ref);
2554
+ inserter.set_prot_info(w->batch->prot_info_.get());
2072
2555
  w->status = w->batch->Iterate(&inserter);
2073
2556
  if (!w->status.ok()) {
2074
2557
  return w->status;
@@ -2090,13 +2573,15 @@ Status WriteBatchInternal::InsertInto(
2090
2573
  (void)batch_cnt;
2091
2574
  #endif
2092
2575
  assert(writer->ShouldWriteToMemtable());
2093
- MemTableInserter inserter(
2094
- sequence, memtables, flush_scheduler, trim_history_scheduler,
2095
- ignore_missing_column_families, log_number, db,
2096
- concurrent_memtable_writes, nullptr /*has_valid_writes*/, seq_per_batch,
2097
- batch_per_txn, hint_per_batch);
2576
+ MemTableInserter inserter(sequence, memtables, flush_scheduler,
2577
+ trim_history_scheduler,
2578
+ ignore_missing_column_families, log_number, db,
2579
+ concurrent_memtable_writes, nullptr /* prot_info */,
2580
+ nullptr /*has_valid_writes*/, seq_per_batch,
2581
+ batch_per_txn, hint_per_batch);
2098
2582
  SetSequence(writer->batch, sequence);
2099
2583
  inserter.set_log_number_ref(writer->log_ref);
2584
+ inserter.set_prot_info(writer->batch->prot_info_.get());
2100
2585
  Status s = writer->batch->Iterate(&inserter);
2101
2586
  assert(!seq_per_batch || batch_cnt != 0);
2102
2587
  assert(!seq_per_batch || inserter.sequence() - sequence == batch_cnt);
@@ -2116,8 +2601,8 @@ Status WriteBatchInternal::InsertInto(
2116
2601
  MemTableInserter inserter(Sequence(batch), memtables, flush_scheduler,
2117
2602
  trim_history_scheduler,
2118
2603
  ignore_missing_column_families, log_number, db,
2119
- concurrent_memtable_writes, has_valid_writes,
2120
- seq_per_batch, batch_per_txn);
2604
+ concurrent_memtable_writes, batch->prot_info_.get(),
2605
+ has_valid_writes, seq_per_batch, batch_per_txn);
2121
2606
  Status s = batch->Iterate(&inserter);
2122
2607
  if (next_seq != nullptr) {
2123
2608
  *next_seq = inserter.sequence();
@@ -2130,6 +2615,7 @@ Status WriteBatchInternal::InsertInto(
2130
2615
 
2131
2616
  Status WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
2132
2617
  assert(contents.size() >= WriteBatchInternal::kHeader);
2618
+ assert(b->prot_info_ == nullptr);
2133
2619
  b->rep_.assign(contents.data(), contents.size());
2134
2620
  b->content_flags_.store(ContentFlags::DEFERRED, std::memory_order_relaxed);
2135
2621
  return Status::OK();
@@ -2137,6 +2623,8 @@ Status WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
2137
2623
 
2138
2624
  Status WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src,
2139
2625
  const bool wal_only) {
2626
+ assert(dst->Count() == 0 ||
2627
+ (dst->prot_info_ == nullptr) == (src->prot_info_ == nullptr));
2140
2628
  size_t src_len;
2141
2629
  int src_count;
2142
2630
  uint32_t src_flags;
@@ -2153,6 +2641,13 @@ Status WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src,
2153
2641
  src_flags = src->content_flags_.load(std::memory_order_relaxed);
2154
2642
  }
2155
2643
 
2644
+ if (dst->prot_info_ != nullptr) {
2645
+ std::copy(src->prot_info_->entries_.begin(),
2646
+ src->prot_info_->entries_.begin() + src_count,
2647
+ std::back_inserter(dst->prot_info_->entries_));
2648
+ } else if (src->prot_info_ != nullptr) {
2649
+ dst->prot_info_.reset(new WriteBatch::ProtectionInfo(*src->prot_info_));
2650
+ }
2156
2651
  SetCount(dst, Count(dst) + src_count);
2157
2652
  assert(src->rep_.size() >= WriteBatchInternal::kHeader);
2158
2653
  dst->rep_.append(src->rep_.data() + WriteBatchInternal::kHeader, src_len);