@nxtedition/rocksdb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1088) hide show
  1. package/CHANGELOG.md +294 -0
  2. package/LICENSE +21 -0
  3. package/README.md +102 -0
  4. package/UPGRADING.md +91 -0
  5. package/binding.cc +1276 -0
  6. package/binding.gyp +73 -0
  7. package/binding.js +1 -0
  8. package/chained-batch.js +44 -0
  9. package/deps/rocksdb/build_version.cc +4 -0
  10. package/deps/rocksdb/rocksdb/CMakeLists.txt +1356 -0
  11. package/deps/rocksdb/rocksdb/COPYING +339 -0
  12. package/deps/rocksdb/rocksdb/LICENSE.Apache +202 -0
  13. package/deps/rocksdb/rocksdb/LICENSE.leveldb +29 -0
  14. package/deps/rocksdb/rocksdb/Makefile +2521 -0
  15. package/deps/rocksdb/rocksdb/TARGETS +2100 -0
  16. package/deps/rocksdb/rocksdb/cache/cache.cc +63 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +381 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +114 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_test.cc +775 -0
  20. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +769 -0
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.h +16 -0
  22. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +574 -0
  23. package/deps/rocksdb/rocksdb/cache/lru_cache.h +339 -0
  24. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +199 -0
  25. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +162 -0
  26. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +111 -0
  27. package/deps/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in +54 -0
  28. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  29. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  30. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  37. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +108 -0
  38. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +115 -0
  39. package/deps/rocksdb/rocksdb/db/blob/blob_constants.h +16 -0
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +154 -0
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.h +67 -0
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +206 -0
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +316 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +91 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +660 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +99 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +49 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +268 -0
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.cc +134 -0
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.h +57 -0
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc +173 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +55 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +164 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +423 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +81 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +771 -0
  57. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +184 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +145 -0
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +148 -0
  60. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +132 -0
  61. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +76 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +168 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +83 -0
  64. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +307 -0
  65. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +464 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +358 -0
  67. package/deps/rocksdb/rocksdb/db/builder.h +95 -0
  68. package/deps/rocksdb/rocksdb/db/c.cc +5281 -0
  69. package/deps/rocksdb/rocksdb/db/c_test.c +2883 -0
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +1602 -0
  71. package/deps/rocksdb/rocksdb/db/column_family.h +787 -0
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +3427 -0
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +425 -0
  74. package/deps/rocksdb/rocksdb/db/compacted_db_impl.cc +169 -0
  75. package/deps/rocksdb/rocksdb/db/compacted_db_impl.h +118 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +591 -0
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +389 -0
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +37 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +1023 -0
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +353 -0
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1254 -0
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1917 -0
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +208 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +1037 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1224 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1135 -0
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +318 -0
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +255 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +57 -0
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +510 -0
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +33 -0
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2190 -0
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1103 -0
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +32 -0
  95. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +44 -0
  96. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +660 -0
  97. package/deps/rocksdb/rocksdb/db/convenience.cc +78 -0
  98. package/deps/rocksdb/rocksdb/db/corruption_test.cc +921 -0
  99. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +359 -0
  100. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3820 -0
  101. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1058 -0
  102. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2128 -0
  103. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +851 -0
  104. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +6292 -0
  105. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +509 -0
  106. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +130 -0
  107. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +137 -0
  108. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1119 -0
  109. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5057 -0
  110. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +2274 -0
  111. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +3421 -0
  112. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +298 -0
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +151 -0
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +967 -0
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1806 -0
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +270 -0
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +146 -0
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +683 -0
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +333 -0
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2024 -0
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_secondary_test.cc +932 -0
  122. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +137 -0
  123. package/deps/rocksdb/rocksdb/db/db_info_dumper.h +15 -0
  124. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +178 -0
  125. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +592 -0
  126. package/deps/rocksdb/rocksdb/db/db_iter.cc +1493 -0
  127. package/deps/rocksdb/rocksdb/db/db_iter.h +390 -0
  128. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +657 -0
  129. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +3268 -0
  130. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +3197 -0
  131. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +299 -0
  132. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +513 -0
  133. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +329 -0
  134. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +241 -0
  135. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +671 -0
  136. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1022 -0
  137. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1723 -0
  138. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1694 -0
  139. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1261 -0
  140. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +164 -0
  141. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +488 -0
  142. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +567 -0
  143. package/deps/rocksdb/rocksdb/db/db_test.cc +6736 -0
  144. package/deps/rocksdb/rocksdb/db/db_test2.cc +5408 -0
  145. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1633 -0
  146. package/deps/rocksdb/rocksdb/db/db_test_util.h +1194 -0
  147. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +2235 -0
  148. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +1780 -0
  149. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2520 -0
  150. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +119 -0
  151. package/deps/rocksdb/rocksdb/db/db_write_test.cc +465 -0
  152. package/deps/rocksdb/rocksdb/db/dbformat.cc +222 -0
  153. package/deps/rocksdb/rocksdb/db/dbformat.h +786 -0
  154. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +206 -0
  155. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +580 -0
  156. package/deps/rocksdb/rocksdb/db/error_handler.cc +726 -0
  157. package/deps/rocksdb/rocksdb/db/error_handler.h +117 -0
  158. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +2598 -0
  159. package/deps/rocksdb/rocksdb/db/event_helpers.cc +233 -0
  160. package/deps/rocksdb/rocksdb/db/event_helpers.h +57 -0
  161. package/deps/rocksdb/rocksdb/db/experimental.cc +50 -0
  162. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1559 -0
  163. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +910 -0
  164. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +195 -0
  165. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2936 -0
  166. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +556 -0
  167. package/deps/rocksdb/rocksdb/db/file_indexer.cc +216 -0
  168. package/deps/rocksdb/rocksdb/db/file_indexer.h +142 -0
  169. package/deps/rocksdb/rocksdb/db/file_indexer_test.cc +350 -0
  170. package/deps/rocksdb/rocksdb/db/filename_test.cc +179 -0
  171. package/deps/rocksdb/rocksdb/db/flush_job.cc +514 -0
  172. package/deps/rocksdb/rocksdb/db/flush_job.h +169 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +647 -0
  174. package/deps/rocksdb/rocksdb/db/flush_scheduler.cc +86 -0
  175. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +54 -0
  176. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +1023 -0
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.h +163 -0
  178. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +377 -0
  179. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +282 -0
  180. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +75 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +632 -0
  182. package/deps/rocksdb/rocksdb/db/internal_stats.cc +1461 -0
  183. package/deps/rocksdb/rocksdb/db/internal_stats.h +712 -0
  184. package/deps/rocksdb/rocksdb/db/job_context.h +226 -0
  185. package/deps/rocksdb/rocksdb/db/listener_test.cc +1118 -0
  186. package/deps/rocksdb/rocksdb/db/log_format.h +48 -0
  187. package/deps/rocksdb/rocksdb/db/log_reader.cc +654 -0
  188. package/deps/rocksdb/rocksdb/db/log_reader.h +192 -0
  189. package/deps/rocksdb/rocksdb/db/log_test.cc +901 -0
  190. package/deps/rocksdb/rocksdb/db/log_writer.cc +164 -0
  191. package/deps/rocksdb/rocksdb/db/log_writer.h +115 -0
  192. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.cc +67 -0
  193. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +63 -0
  194. package/deps/rocksdb/rocksdb/db/lookup_key.h +66 -0
  195. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +54 -0
  196. package/deps/rocksdb/rocksdb/db/malloc_stats.h +24 -0
  197. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +296 -0
  198. package/deps/rocksdb/rocksdb/db/memtable.cc +1169 -0
  199. package/deps/rocksdb/rocksdb/db/memtable.h +554 -0
  200. package/deps/rocksdb/rocksdb/db/memtable_list.cc +888 -0
  201. package/deps/rocksdb/rocksdb/db/memtable_list.h +438 -0
  202. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +935 -0
  203. package/deps/rocksdb/rocksdb/db/merge_context.h +134 -0
  204. package/deps/rocksdb/rocksdb/db/merge_helper.cc +421 -0
  205. package/deps/rocksdb/rocksdb/db/merge_helper.h +197 -0
  206. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +290 -0
  207. package/deps/rocksdb/rocksdb/db/merge_operator.cc +86 -0
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +608 -0
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +338 -0
  210. package/deps/rocksdb/rocksdb/db/options_file_test.cc +119 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.cc +30 -0
  212. package/deps/rocksdb/rocksdb/db/output_validator.h +47 -0
  213. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +993 -0
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +113 -0
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +76 -0
  216. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +231 -0
  217. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +87 -0
  218. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1374 -0
  219. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +38 -0
  220. package/deps/rocksdb/rocksdb/db/prefix_test.cc +910 -0
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +489 -0
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +446 -0
  223. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +260 -0
  224. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +709 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +439 -0
  226. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +256 -0
  227. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +552 -0
  228. package/deps/rocksdb/rocksdb/db/read_callback.h +53 -0
  229. package/deps/rocksdb/rocksdb/db/repair.cc +722 -0
  230. package/deps/rocksdb/rocksdb/db/repair_test.cc +390 -0
  231. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +61 -0
  232. package/deps/rocksdb/rocksdb/db/snapshot_impl.cc +26 -0
  233. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +167 -0
  234. package/deps/rocksdb/rocksdb/db/table_cache.cc +704 -0
  235. package/deps/rocksdb/rocksdb/db/table_cache.h +233 -0
  236. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +75 -0
  237. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +107 -0
  238. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +517 -0
  239. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +318 -0
  240. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +128 -0
  241. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.cc +54 -0
  242. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.h +44 -0
  243. package/deps/rocksdb/rocksdb/db/version_builder.cc +1078 -0
  244. package/deps/rocksdb/rocksdb/db/version_builder.h +69 -0
  245. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +1551 -0
  246. package/deps/rocksdb/rocksdb/db/version_edit.cc +955 -0
  247. package/deps/rocksdb/rocksdb/db/version_edit.h +609 -0
  248. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +699 -0
  249. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +252 -0
  250. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +597 -0
  251. package/deps/rocksdb/rocksdb/db/version_set.cc +6333 -0
  252. package/deps/rocksdb/rocksdb/db/version_set.h +1485 -0
  253. package/deps/rocksdb/rocksdb/db/version_set_test.cc +3035 -0
  254. package/deps/rocksdb/rocksdb/db/wal_edit.cc +204 -0
  255. package/deps/rocksdb/rocksdb/db/wal_edit.h +166 -0
  256. package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +214 -0
  257. package/deps/rocksdb/rocksdb/db/wal_manager.cc +517 -0
  258. package/deps/rocksdb/rocksdb/db/wal_manager.h +119 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +340 -0
  260. package/deps/rocksdb/rocksdb/db/write_batch.cc +2174 -0
  261. package/deps/rocksdb/rocksdb/db/write_batch_base.cc +94 -0
  262. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +250 -0
  263. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +907 -0
  264. package/deps/rocksdb/rocksdb/db/write_callback.h +27 -0
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +457 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +128 -0
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +144 -0
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +135 -0
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +796 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +433 -0
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +14 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +341 -0
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +520 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +23 -0
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +337 -0
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +554 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +79 -0
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +173 -0
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +17 -0
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +38 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +763 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +222 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.cc +27 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +428 -0
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +218 -0
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +64 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2430 -0
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +237 -0
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +343 -0
  290. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +800 -0
  291. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +920 -0
  292. package/deps/rocksdb/rocksdb/env/env.cc +733 -0
  293. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +352 -0
  294. package/deps/rocksdb/rocksdb/env/env_chroot.cc +346 -0
  295. package/deps/rocksdb/rocksdb/env/env_chroot.h +22 -0
  296. package/deps/rocksdb/rocksdb/env/env_encryption.cc +1148 -0
  297. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +137 -0
  298. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +648 -0
  299. package/deps/rocksdb/rocksdb/env/env_posix.cc +514 -0
  300. package/deps/rocksdb/rocksdb/env/env_test.cc +2230 -0
  301. package/deps/rocksdb/rocksdb/env/file_system.cc +132 -0
  302. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +448 -0
  303. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +415 -0
  304. package/deps/rocksdb/rocksdb/env/fs_posix.cc +1086 -0
  305. package/deps/rocksdb/rocksdb/env/io_posix.cc +1499 -0
  306. package/deps/rocksdb/rocksdb/env/io_posix.h +402 -0
  307. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +140 -0
  308. package/deps/rocksdb/rocksdb/env/mock_env.cc +1066 -0
  309. package/deps/rocksdb/rocksdb/env/mock_env.h +41 -0
  310. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +85 -0
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +402 -0
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +150 -0
  313. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +717 -0
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +156 -0
  315. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +99 -0
  316. package/deps/rocksdb/rocksdb/file/file_util.cc +268 -0
  317. package/deps/rocksdb/rocksdb/file/file_util.h +96 -0
  318. package/deps/rocksdb/rocksdb/file/filename.cc +473 -0
  319. package/deps/rocksdb/rocksdb/file/filename.h +182 -0
  320. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +188 -0
  321. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +315 -0
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +142 -0
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +482 -0
  324. package/deps/rocksdb/rocksdb/file/read_write_util.cc +67 -0
  325. package/deps/rocksdb/rocksdb/file/read_write_util.h +34 -0
  326. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +169 -0
  327. package/deps/rocksdb/rocksdb/file/readahead_raf.h +29 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +237 -0
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +63 -0
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +552 -0
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +203 -0
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +523 -0
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +251 -0
  334. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +386 -0
  335. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +839 -0
  336. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +2218 -0
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +294 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +71 -0
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +214 -0
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +98 -0
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +137 -0
  342. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +40 -0
  343. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +46 -0
  344. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +359 -0
  345. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +499 -0
  346. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +138 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1697 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h +11 -0
  349. package/deps/rocksdb/rocksdb/include/rocksdb/db_dump_tool.h +45 -0
  350. package/deps/rocksdb/rocksdb/include/rocksdb/db_stress_tool.h +11 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1671 -0
  352. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +405 -0
  353. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +29 -0
  354. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +129 -0
  355. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1472 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +238 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +61 -0
  358. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +269 -0
  359. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +56 -0
  360. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +128 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +43 -0
  362. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +556 -0
  363. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +77 -0
  364. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +385 -0
  365. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +257 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +155 -0
  367. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +1702 -0
  368. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +237 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/perf_level.h +35 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +73 -0
  371. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +139 -0
  372. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +10 -0
  373. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +269 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +103 -0
  375. package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +48 -0
  376. package/deps/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h +19 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +136 -0
  378. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +47 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +145 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +135 -0
  381. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +592 -0
  382. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +69 -0
  383. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +608 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +711 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +280 -0
  386. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +188 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +58 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +48 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +121 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +74 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +86 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +535 -0
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +61 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/convenience.h +10 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +72 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +49 -0
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +175 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +180 -0
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/info_log_finder.h +19 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +288 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +71 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/leveldb_options.h +145 -0
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +43 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +55 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +50 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +205 -0
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +100 -0
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +19 -0
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +876 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +128 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +94 -0
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +504 -0
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +95 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +626 -0
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +432 -0
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +92 -0
  417. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +34 -0
  418. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +279 -0
  419. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +16 -0
  420. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +102 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +377 -0
  422. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +127 -0
  423. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +106 -0
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +300 -0
  425. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +165 -0
  426. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +684 -0
  427. package/deps/rocksdb/rocksdb/logging/env_logger.h +165 -0
  428. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +162 -0
  429. package/deps/rocksdb/rocksdb/logging/event_logger.cc +70 -0
  430. package/deps/rocksdb/rocksdb/logging/event_logger.h +203 -0
  431. package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +43 -0
  432. package/deps/rocksdb/rocksdb/logging/log_buffer.cc +92 -0
  433. package/deps/rocksdb/rocksdb/logging/log_buffer.h +56 -0
  434. package/deps/rocksdb/rocksdb/logging/logging.h +68 -0
  435. package/deps/rocksdb/rocksdb/logging/posix_logger.h +185 -0
  436. package/deps/rocksdb/rocksdb/memory/allocator.h +57 -0
  437. package/deps/rocksdb/rocksdb/memory/arena.cc +233 -0
  438. package/deps/rocksdb/rocksdb/memory/arena.h +141 -0
  439. package/deps/rocksdb/rocksdb/memory/arena_test.cc +204 -0
  440. package/deps/rocksdb/rocksdb/memory/concurrent_arena.cc +47 -0
  441. package/deps/rocksdb/rocksdb/memory/concurrent_arena.h +218 -0
  442. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +206 -0
  443. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +78 -0
  444. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +33 -0
  445. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +27 -0
  446. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +102 -0
  447. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +38 -0
  448. package/deps/rocksdb/rocksdb/memory/memory_usage.h +25 -0
  449. package/deps/rocksdb/rocksdb/memtable/alloc_tracker.cc +62 -0
  450. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +844 -0
  451. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +49 -0
  452. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +349 -0
  453. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +44 -0
  454. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +997 -0
  455. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +663 -0
  456. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +677 -0
  457. package/deps/rocksdb/rocksdb/memtable/skiplist.h +496 -0
  458. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +388 -0
  459. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +280 -0
  460. package/deps/rocksdb/rocksdb/memtable/stl_wrappers.h +33 -0
  461. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +301 -0
  462. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -0
  463. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +203 -0
  464. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +23 -0
  465. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +287 -0
  466. package/deps/rocksdb/rocksdb/monitoring/histogram.h +149 -0
  467. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +231 -0
  468. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +200 -0
  469. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +84 -0
  470. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.cc +49 -0
  471. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.h +74 -0
  472. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +71 -0
  473. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +98 -0
  474. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +62 -0
  475. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +60 -0
  476. package/deps/rocksdb/rocksdb/monitoring/iostats_context_test.cc +29 -0
  477. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +566 -0
  478. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +97 -0
  479. package/deps/rocksdb/rocksdb/monitoring/perf_level.cc +28 -0
  480. package/deps/rocksdb/rocksdb/monitoring/perf_level_imp.h +18 -0
  481. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +79 -0
  482. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.cc +169 -0
  483. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.h +83 -0
  484. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +431 -0
  485. package/deps/rocksdb/rocksdb/monitoring/statistics.h +138 -0
  486. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -0
  487. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +652 -0
  488. package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +163 -0
  489. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +314 -0
  490. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +233 -0
  491. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +43 -0
  492. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +206 -0
  493. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +134 -0
  494. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +32 -0
  495. package/deps/rocksdb/rocksdb/options/cf_options.cc +1026 -0
  496. package/deps/rocksdb/rocksdb/options/cf_options.h +308 -0
  497. package/deps/rocksdb/rocksdb/options/configurable.cc +681 -0
  498. package/deps/rocksdb/rocksdb/options/configurable_helper.h +251 -0
  499. package/deps/rocksdb/rocksdb/options/configurable_test.cc +757 -0
  500. package/deps/rocksdb/rocksdb/options/configurable_test.h +127 -0
  501. package/deps/rocksdb/rocksdb/options/customizable.cc +77 -0
  502. package/deps/rocksdb/rocksdb/options/customizable_helper.h +216 -0
  503. package/deps/rocksdb/rocksdb/options/customizable_test.cc +625 -0
  504. package/deps/rocksdb/rocksdb/options/db_options.cc +835 -0
  505. package/deps/rocksdb/rocksdb/options/db_options.h +126 -0
  506. package/deps/rocksdb/rocksdb/options/options.cc +664 -0
  507. package/deps/rocksdb/rocksdb/options/options_helper.cc +1391 -0
  508. package/deps/rocksdb/rocksdb/options/options_helper.h +118 -0
  509. package/deps/rocksdb/rocksdb/options/options_parser.cc +721 -0
  510. package/deps/rocksdb/rocksdb/options/options_parser.h +151 -0
  511. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +583 -0
  512. package/deps/rocksdb/rocksdb/options/options_test.cc +3794 -0
  513. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +106 -0
  514. package/deps/rocksdb/rocksdb/port/lang.h +16 -0
  515. package/deps/rocksdb/rocksdb/port/likely.h +18 -0
  516. package/deps/rocksdb/rocksdb/port/malloc.h +17 -0
  517. package/deps/rocksdb/rocksdb/port/port.h +21 -0
  518. package/deps/rocksdb/rocksdb/port/port_dirent.h +44 -0
  519. package/deps/rocksdb/rocksdb/port/port_example.h +101 -0
  520. package/deps/rocksdb/rocksdb/port/port_posix.cc +266 -0
  521. package/deps/rocksdb/rocksdb/port/port_posix.h +223 -0
  522. package/deps/rocksdb/rocksdb/port/stack_trace.cc +179 -0
  523. package/deps/rocksdb/rocksdb/port/stack_trace.h +28 -0
  524. package/deps/rocksdb/rocksdb/port/sys_time.h +47 -0
  525. package/deps/rocksdb/rocksdb/port/util_logger.h +20 -0
  526. package/deps/rocksdb/rocksdb/port/win/env_default.cc +45 -0
  527. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1449 -0
  528. package/deps/rocksdb/rocksdb/port/win/env_win.h +294 -0
  529. package/deps/rocksdb/rocksdb/port/win/io_win.cc +1084 -0
  530. package/deps/rocksdb/rocksdb/port/win/io_win.h +494 -0
  531. package/deps/rocksdb/rocksdb/port/win/port_win.cc +283 -0
  532. package/deps/rocksdb/rocksdb/port/win/port_win.h +411 -0
  533. package/deps/rocksdb/rocksdb/port/win/win_jemalloc.cc +79 -0
  534. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +194 -0
  535. package/deps/rocksdb/rocksdb/port/win/win_logger.h +67 -0
  536. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +183 -0
  537. package/deps/rocksdb/rocksdb/port/win/win_thread.h +122 -0
  538. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +221 -0
  539. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +26 -0
  540. package/deps/rocksdb/rocksdb/port/xpress.h +17 -0
  541. package/deps/rocksdb/rocksdb/src.mk +631 -0
  542. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +126 -0
  543. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +57 -0
  544. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +73 -0
  545. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.h +48 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/block.cc +1049 -0
  547. package/deps/rocksdb/rocksdb/table/block_based/block.h +720 -0
  548. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +348 -0
  549. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +119 -0
  550. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +434 -0
  551. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1835 -0
  552. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +193 -0
  553. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +839 -0
  554. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +95 -0
  555. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +383 -0
  556. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +251 -0
  557. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3563 -0
  558. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +681 -0
  559. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +190 -0
  560. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +347 -0
  561. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +201 -0
  562. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +78 -0
  563. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +66 -0
  564. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +32 -0
  565. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.cc +232 -0
  566. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.h +66 -0
  567. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +623 -0
  568. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  569. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +220 -0
  570. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +59 -0
  571. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +25 -0
  572. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc +93 -0
  573. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +136 -0
  574. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +717 -0
  575. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +180 -0
  576. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +102 -0
  577. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +55 -0
  578. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1407 -0
  579. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +168 -0
  580. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +88 -0
  581. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +41 -0
  582. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +344 -0
  583. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +139 -0
  584. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +333 -0
  585. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +147 -0
  586. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.h +49 -0
  587. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +248 -0
  588. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +444 -0
  589. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +54 -0
  590. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +85 -0
  591. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +56 -0
  592. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +22 -0
  593. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +40 -0
  594. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +521 -0
  595. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +144 -0
  596. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +424 -0
  597. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +163 -0
  598. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +142 -0
  599. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +186 -0
  600. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +51 -0
  601. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +64 -0
  602. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +38 -0
  603. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +120 -0
  604. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +59 -0
  605. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +324 -0
  606. package/deps/rocksdb/rocksdb/table/block_fetcher.h +129 -0
  607. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +534 -0
  608. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +277 -0
  609. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +543 -0
  610. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +136 -0
  611. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +663 -0
  612. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +107 -0
  613. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +81 -0
  614. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +404 -0
  615. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +101 -0
  616. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +585 -0
  617. package/deps/rocksdb/rocksdb/table/format.cc +422 -0
  618. package/deps/rocksdb/rocksdb/table/format.h +348 -0
  619. package/deps/rocksdb/rocksdb/table/get_context.cc +408 -0
  620. package/deps/rocksdb/rocksdb/table/get_context.h +212 -0
  621. package/deps/rocksdb/rocksdb/table/internal_iterator.h +205 -0
  622. package/deps/rocksdb/rocksdb/table/iter_heap.h +42 -0
  623. package/deps/rocksdb/rocksdb/table/iterator.cc +210 -0
  624. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +180 -0
  625. package/deps/rocksdb/rocksdb/table/merger_test.cc +180 -0
  626. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +481 -0
  627. package/deps/rocksdb/rocksdb/table/merging_iterator.h +64 -0
  628. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +541 -0
  629. package/deps/rocksdb/rocksdb/table/meta_blocks.h +154 -0
  630. package/deps/rocksdb/rocksdb/table/mock_table.cc +328 -0
  631. package/deps/rocksdb/rocksdb/table/mock_table.h +89 -0
  632. package/deps/rocksdb/rocksdb/table/multiget_context.h +282 -0
  633. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +116 -0
  634. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +44 -0
  635. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +34 -0
  636. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.cc +78 -0
  637. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +135 -0
  638. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +332 -0
  639. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +153 -0
  640. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +263 -0
  641. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +182 -0
  642. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +211 -0
  643. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +249 -0
  644. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +506 -0
  645. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +201 -0
  646. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +781 -0
  647. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +247 -0
  648. package/deps/rocksdb/rocksdb/table/scoped_arena_iterator.h +61 -0
  649. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +502 -0
  650. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +96 -0
  651. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +98 -0
  652. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +228 -0
  653. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +340 -0
  654. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +94 -0
  655. package/deps/rocksdb/rocksdb/table/table_builder.h +203 -0
  656. package/deps/rocksdb/rocksdb/table/table_factory.cc +38 -0
  657. package/deps/rocksdb/rocksdb/table/table_properties.cc +300 -0
  658. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +30 -0
  659. package/deps/rocksdb/rocksdb/table/table_reader.h +147 -0
  660. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +347 -0
  661. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +39 -0
  662. package/deps/rocksdb/rocksdb/table/table_test.cc +4769 -0
  663. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +215 -0
  664. package/deps/rocksdb/rocksdb/table/two_level_iterator.h +43 -0
  665. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +38 -0
  666. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +74 -0
  667. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +93 -0
  668. package/deps/rocksdb/rocksdb/test_util/sync_point.h +161 -0
  669. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +129 -0
  670. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +74 -0
  671. package/deps/rocksdb/rocksdb/test_util/testharness.cc +56 -0
  672. package/deps/rocksdb/rocksdb/test_util/testharness.h +53 -0
  673. package/deps/rocksdb/rocksdb/test_util/testutil.cc +566 -0
  674. package/deps/rocksdb/rocksdb/test_util/testutil.h +887 -0
  675. package/deps/rocksdb/rocksdb/test_util/testutil_test.cc +43 -0
  676. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +388 -0
  677. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.h +132 -0
  678. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +27 -0
  679. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +45 -0
  680. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +166 -0
  681. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +570 -0
  682. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +92 -0
  683. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +54 -0
  684. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +152 -0
  685. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +59 -0
  686. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +141 -0
  687. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +33 -0
  688. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +74 -0
  689. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +117 -0
  690. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +263 -0
  691. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +96 -0
  692. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +40 -0
  693. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +29 -0
  694. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +144 -0
  695. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +30 -0
  696. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +51 -0
  697. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +28 -0
  698. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +10 -0
  699. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +26 -0
  700. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +138 -0
  701. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +23 -0
  702. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +57 -0
  703. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +260 -0
  704. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +52 -0
  705. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +328 -0
  706. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +1703 -0
  707. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +16 -0
  708. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +304 -0
  709. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +39 -0
  710. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +26 -0
  711. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +318 -0
  712. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.cpp +12 -0
  713. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +57 -0
  714. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +219 -0
  715. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +207 -0
  716. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +164 -0
  717. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +57 -0
  718. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +77 -0
  719. package/deps/rocksdb/rocksdb/third-party/gcc/ppc-asm.h +390 -0
  720. package/deps/rocksdb/rocksdb/thirdparty.inc +268 -0
  721. package/deps/rocksdb/rocksdb/tools/CMakeLists.txt +30 -0
  722. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +110 -0
  723. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/__init__.py +2 -0
  724. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +2000 -0
  725. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.sh +156 -0
  726. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +734 -0
  727. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +2307 -0
  728. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +395 -0
  729. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +721 -0
  730. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +719 -0
  731. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.cc +25 -0
  732. package/deps/rocksdb/rocksdb/tools/db_bench.cc +21 -0
  733. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +7416 -0
  734. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +328 -0
  735. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +130 -0
  736. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +297 -0
  737. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +259 -0
  738. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_dump.cc +63 -0
  739. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +62 -0
  740. package/deps/rocksdb/rocksdb/tools/io_tracer_parser.cc +25 -0
  741. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +187 -0
  742. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +120 -0
  743. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.h +40 -0
  744. package/deps/rocksdb/rocksdb/tools/ldb.cc +21 -0
  745. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3609 -0
  746. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +665 -0
  747. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +746 -0
  748. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +159 -0
  749. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +221 -0
  750. package/deps/rocksdb/rocksdb/tools/sst_dump.cc +20 -0
  751. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +427 -0
  752. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +541 -0
  753. package/deps/rocksdb/rocksdb/tools/trace_analyzer.cc +25 -0
  754. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +752 -0
  755. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +2001 -0
  756. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +292 -0
  757. package/deps/rocksdb/rocksdb/tools/write_stress.cc +305 -0
  758. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +496 -0
  759. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +294 -0
  760. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +379 -0
  761. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +229 -0
  762. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +174 -0
  763. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +215 -0
  764. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +491 -0
  765. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +195 -0
  766. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +255 -0
  767. package/deps/rocksdb/rocksdb/util/autovector.h +367 -0
  768. package/deps/rocksdb/rocksdb/util/autovector_test.cc +330 -0
  769. package/deps/rocksdb/rocksdb/util/bloom_impl.h +485 -0
  770. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1191 -0
  771. package/deps/rocksdb/rocksdb/util/build_version.cc.in +5 -0
  772. package/deps/rocksdb/rocksdb/util/build_version.h +15 -0
  773. package/deps/rocksdb/rocksdb/util/cast_util.h +20 -0
  774. package/deps/rocksdb/rocksdb/util/channel.h +67 -0
  775. package/deps/rocksdb/rocksdb/util/coding.cc +89 -0
  776. package/deps/rocksdb/rocksdb/util/coding.h +419 -0
  777. package/deps/rocksdb/rocksdb/util/coding_lean.h +101 -0
  778. package/deps/rocksdb/rocksdb/util/coding_test.cc +217 -0
  779. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +92 -0
  780. package/deps/rocksdb/rocksdb/util/comparator.cc +219 -0
  781. package/deps/rocksdb/rocksdb/util/compression.h +1529 -0
  782. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +108 -0
  783. package/deps/rocksdb/rocksdb/util/compression_context_cache.h +47 -0
  784. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.cc +67 -0
  785. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.h +67 -0
  786. package/deps/rocksdb/rocksdb/util/core_local.h +83 -0
  787. package/deps/rocksdb/rocksdb/util/crc32c.cc +1283 -0
  788. package/deps/rocksdb/rocksdb/util/crc32c.h +51 -0
  789. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +169 -0
  790. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +50 -0
  791. package/deps/rocksdb/rocksdb/util/crc32c_ppc.c +94 -0
  792. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +19 -0
  793. package/deps/rocksdb/rocksdb/util/crc32c_ppc_asm.S +756 -0
  794. package/deps/rocksdb/rocksdb/util/crc32c_ppc_constants.h +900 -0
  795. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +180 -0
  796. package/deps/rocksdb/rocksdb/util/defer.h +52 -0
  797. package/deps/rocksdb/rocksdb/util/defer_test.cc +39 -0
  798. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +68 -0
  799. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +70 -0
  800. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +214 -0
  801. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +323 -0
  802. package/deps/rocksdb/rocksdb/util/fastrange.h +112 -0
  803. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +136 -0
  804. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +98 -0
  805. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +449 -0
  806. package/deps/rocksdb/rocksdb/util/filelock_test.cc +152 -0
  807. package/deps/rocksdb/rocksdb/util/filter_bench.cc +781 -0
  808. package/deps/rocksdb/rocksdb/util/gflags_compat.h +20 -0
  809. package/deps/rocksdb/rocksdb/util/hash.cc +83 -0
  810. package/deps/rocksdb/rocksdb/util/hash.h +107 -0
  811. package/deps/rocksdb/rocksdb/util/hash_map.h +67 -0
  812. package/deps/rocksdb/rocksdb/util/hash_test.cc +593 -0
  813. package/deps/rocksdb/rocksdb/util/heap.h +166 -0
  814. package/deps/rocksdb/rocksdb/util/heap_test.cc +139 -0
  815. package/deps/rocksdb/rocksdb/util/kv_map.h +33 -0
  816. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +86 -0
  817. package/deps/rocksdb/rocksdb/util/math.h +186 -0
  818. package/deps/rocksdb/rocksdb/util/math128.h +298 -0
  819. package/deps/rocksdb/rocksdb/util/murmurhash.cc +191 -0
  820. package/deps/rocksdb/rocksdb/util/murmurhash.h +42 -0
  821. package/deps/rocksdb/rocksdb/util/mutexlock.h +186 -0
  822. package/deps/rocksdb/rocksdb/util/ppc-opcode.h +27 -0
  823. package/deps/rocksdb/rocksdb/util/random.cc +56 -0
  824. package/deps/rocksdb/rocksdb/util/random.h +186 -0
  825. package/deps/rocksdb/rocksdb/util/random_test.cc +105 -0
  826. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +340 -0
  827. package/deps/rocksdb/rocksdb/util/rate_limiter.h +113 -0
  828. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +251 -0
  829. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +151 -0
  830. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +107 -0
  831. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1201 -0
  832. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +1062 -0
  833. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +931 -0
  834. package/deps/rocksdb/rocksdb/util/set_comparator.h +22 -0
  835. package/deps/rocksdb/rocksdb/util/slice.cc +243 -0
  836. package/deps/rocksdb/rocksdb/util/slice_test.cc +163 -0
  837. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +153 -0
  838. package/deps/rocksdb/rocksdb/util/status.cc +149 -0
  839. package/deps/rocksdb/rocksdb/util/stderr_logger.h +31 -0
  840. package/deps/rocksdb/rocksdb/util/stop_watch.h +118 -0
  841. package/deps/rocksdb/rocksdb/util/string_util.cc +422 -0
  842. package/deps/rocksdb/rocksdb/util/string_util.h +144 -0
  843. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +354 -0
  844. package/deps/rocksdb/rocksdb/util/thread_local.cc +554 -0
  845. package/deps/rocksdb/rocksdb/util/thread_local.h +101 -0
  846. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +583 -0
  847. package/deps/rocksdb/rocksdb/util/thread_operation.h +121 -0
  848. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +506 -0
  849. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +112 -0
  850. package/deps/rocksdb/rocksdb/util/timer.h +331 -0
  851. package/deps/rocksdb/rocksdb/util/timer_queue.h +230 -0
  852. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +72 -0
  853. package/deps/rocksdb/rocksdb/util/timer_test.cc +399 -0
  854. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +80 -0
  855. package/deps/rocksdb/rocksdb/util/vector_iterator.h +101 -0
  856. package/deps/rocksdb/rocksdb/util/work_queue.h +148 -0
  857. package/deps/rocksdb/rocksdb/util/work_queue_test.cc +268 -0
  858. package/deps/rocksdb/rocksdb/util/xxh3p.h +1392 -0
  859. package/deps/rocksdb/rocksdb/util/xxhash.cc +1158 -0
  860. package/deps/rocksdb/rocksdb/util/xxhash.h +598 -0
  861. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +2354 -0
  862. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db_test.cc +2955 -0
  863. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +488 -0
  864. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +199 -0
  865. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +112 -0
  866. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +266 -0
  867. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +52 -0
  868. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2167 -0
  869. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +500 -0
  870. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +113 -0
  871. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +147 -0
  872. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +66 -0
  873. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2386 -0
  874. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +281 -0
  875. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h +58 -0
  876. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +314 -0
  877. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +244 -0
  878. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +47 -0
  879. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +42 -0
  880. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc +375 -0
  881. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +327 -0
  882. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc +114 -0
  883. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc +187 -0
  884. package/deps/rocksdb/rocksdb/utilities/cassandra/format.cc +390 -0
  885. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +184 -0
  886. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +67 -0
  887. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +44 -0
  888. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +75 -0
  889. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +72 -0
  890. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +43 -0
  891. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +588 -0
  892. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +82 -0
  893. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +821 -0
  894. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +37 -0
  895. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +29 -0
  896. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +27 -0
  897. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +25 -0
  898. package/deps/rocksdb/rocksdb/utilities/debug.cc +82 -0
  899. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +1497 -0
  900. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +1146 -0
  901. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +262 -0
  902. package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +223 -0
  903. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +145 -0
  904. package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +44 -0
  905. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +490 -0
  906. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +242 -0
  907. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +581 -0
  908. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +437 -0
  909. package/deps/rocksdb/rocksdb/utilities/leveldb_options/leveldb_options.cc +56 -0
  910. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +275 -0
  911. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +52 -0
  912. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.cc +59 -0
  913. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +39 -0
  914. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +77 -0
  915. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +83 -0
  916. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +97 -0
  917. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +38 -0
  918. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +59 -0
  919. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +31 -0
  920. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +117 -0
  921. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +49 -0
  922. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +598 -0
  923. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +69 -0
  924. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +55 -0
  925. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +87 -0
  926. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +174 -0
  927. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +168 -0
  928. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +431 -0
  929. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +159 -0
  930. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +655 -0
  931. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +425 -0
  932. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +156 -0
  933. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +609 -0
  934. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +296 -0
  935. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file_buffer.h +127 -0
  936. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc +86 -0
  937. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +125 -0
  938. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h +238 -0
  939. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_bench.cc +308 -0
  940. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +168 -0
  941. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +160 -0
  942. package/deps/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h +174 -0
  943. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +360 -0
  944. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +456 -0
  945. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h +286 -0
  946. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc +167 -0
  947. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +339 -0
  948. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_util.h +67 -0
  949. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +140 -0
  950. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +142 -0
  951. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +285 -0
  952. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.h +231 -0
  953. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +494 -0
  954. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +356 -0
  955. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +224 -0
  956. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +122 -0
  957. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h +72 -0
  958. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +244 -0
  959. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +125 -0
  960. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +48 -0
  961. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +29 -0
  962. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h +82 -0
  963. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_tracker.h +209 -0
  964. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +720 -0
  965. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +223 -0
  966. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +181 -0
  967. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +319 -0
  968. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc +270 -0
  969. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.h +99 -0
  970. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +30 -0
  971. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +306 -0
  972. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.AGPLv3 +661 -0
  973. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.APACHEv2 +174 -0
  974. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.GPLv2 +339 -0
  975. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h +76 -0
  976. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h +138 -0
  977. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h +102 -0
  978. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc +139 -0
  979. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h +174 -0
  980. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc +222 -0
  981. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h +141 -0
  982. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc +525 -0
  983. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h +253 -0
  984. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +1007 -0
  985. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +560 -0
  986. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc +527 -0
  987. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc +265 -0
  988. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h +178 -0
  989. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc +520 -0
  990. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h +302 -0
  991. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc +120 -0
  992. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h +92 -0
  993. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc +213 -0
  994. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h +124 -0
  995. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/memory.h +215 -0
  996. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h +39 -0
  997. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h +130 -0
  998. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +82 -0
  999. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h +286 -0
  1000. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h +87 -0
  1001. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h +520 -0
  1002. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h +179 -0
  1003. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +172 -0
  1004. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h +27 -0
  1005. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc +132 -0
  1006. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc +153 -0
  1007. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h +98 -0
  1008. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +144 -0
  1009. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc +201 -0
  1010. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h +141 -0
  1011. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h +794 -0
  1012. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h +1295 -0
  1013. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h +165 -0
  1014. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h +76 -0
  1015. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +479 -0
  1016. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +130 -0
  1017. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc +156 -0
  1018. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h +146 -0
  1019. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +196 -0
  1020. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h +101 -0
  1021. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +111 -0
  1022. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +87 -0
  1023. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1418 -0
  1024. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +752 -0
  1025. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +232 -0
  1026. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +628 -0
  1027. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +228 -0
  1028. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +49 -0
  1029. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +678 -0
  1030. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +373 -0
  1031. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +135 -0
  1032. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h +26 -0
  1033. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +6350 -0
  1034. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +522 -0
  1035. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +188 -0
  1036. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +80 -0
  1037. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +3531 -0
  1038. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +483 -0
  1039. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +119 -0
  1040. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +999 -0
  1041. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +1109 -0
  1042. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +786 -0
  1043. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +1039 -0
  1044. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +341 -0
  1045. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +470 -0
  1046. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +108 -0
  1047. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +332 -0
  1048. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +353 -0
  1049. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +703 -0
  1050. package/deps/rocksdb/rocksdb/utilities/util_merge_operators_test.cc +99 -0
  1051. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +617 -0
  1052. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +345 -0
  1053. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +569 -0
  1054. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1867 -0
  1055. package/deps/rocksdb/rocksdb.gyp +475 -0
  1056. package/deps/snappy/freebsd/config.h +135 -0
  1057. package/deps/snappy/freebsd/snappy-stubs-public.h +100 -0
  1058. package/deps/snappy/linux/config.h +135 -0
  1059. package/deps/snappy/linux/snappy-stubs-public.h +100 -0
  1060. package/deps/snappy/mac/config.h +137 -0
  1061. package/deps/snappy/mac/snappy-stubs-public.h +100 -0
  1062. package/deps/snappy/openbsd/config.h +135 -0
  1063. package/deps/snappy/openbsd/snappy-stubs-public.h +100 -0
  1064. package/deps/snappy/snappy-1.1.7/COPYING +54 -0
  1065. package/deps/snappy/snappy-1.1.7/cmake/SnappyConfig.cmake +1 -0
  1066. package/deps/snappy/snappy-1.1.7/cmake/config.h.in +62 -0
  1067. package/deps/snappy/snappy-1.1.7/snappy-c.cc +90 -0
  1068. package/deps/snappy/snappy-1.1.7/snappy-c.h +138 -0
  1069. package/deps/snappy/snappy-1.1.7/snappy-internal.h +224 -0
  1070. package/deps/snappy/snappy-1.1.7/snappy-sinksource.cc +104 -0
  1071. package/deps/snappy/snappy-1.1.7/snappy-sinksource.h +182 -0
  1072. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.cc +42 -0
  1073. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.h +561 -0
  1074. package/deps/snappy/snappy-1.1.7/snappy-stubs-public.h.in +94 -0
  1075. package/deps/snappy/snappy-1.1.7/snappy-test.cc +612 -0
  1076. package/deps/snappy/snappy-1.1.7/snappy-test.h +573 -0
  1077. package/deps/snappy/snappy-1.1.7/snappy.cc +1515 -0
  1078. package/deps/snappy/snappy-1.1.7/snappy.h +203 -0
  1079. package/deps/snappy/snappy-1.1.7/snappy_unittest.cc +1410 -0
  1080. package/deps/snappy/snappy.gyp +90 -0
  1081. package/deps/snappy/solaris/config.h +135 -0
  1082. package/deps/snappy/solaris/snappy-stubs-public.h +100 -0
  1083. package/deps/snappy/win32/config.h +29 -0
  1084. package/deps/snappy/win32/snappy-stubs-public.h +100 -0
  1085. package/iterator.js +55 -0
  1086. package/leveldown.js +113 -0
  1087. package/package-lock.json +23687 -0
  1088. package/package.json +70 -0
@@ -0,0 +1,3531 @@
1
+ // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #ifndef ROCKSDB_LITE
7
+
8
+ #include <algorithm>
9
+ #include <atomic>
10
+ #include <cinttypes>
11
+ #include <functional>
12
+ #include <string>
13
+ #include <thread>
14
+
15
+ #include "db/db_impl/db_impl.h"
16
+ #include "db/dbformat.h"
17
+ #include "port/port.h"
18
+ #include "rocksdb/db.h"
19
+ #include "rocksdb/options.h"
20
+ #include "rocksdb/types.h"
21
+ #include "rocksdb/utilities/debug.h"
22
+ #include "rocksdb/utilities/transaction.h"
23
+ #include "rocksdb/utilities/transaction_db.h"
24
+ #include "table/mock_table.h"
25
+ #include "test_util/sync_point.h"
26
+ #include "test_util/testharness.h"
27
+ #include "test_util/testutil.h"
28
+ #include "test_util/transaction_test_util.h"
29
+ #include "util/mutexlock.h"
30
+ #include "util/random.h"
31
+ #include "util/string_util.h"
32
+ #include "utilities/fault_injection_env.h"
33
+ #include "utilities/merge_operators.h"
34
+ #include "utilities/merge_operators/string_append/stringappend.h"
35
+ #include "utilities/transactions/pessimistic_transaction_db.h"
36
+ #include "utilities/transactions/transaction_test.h"
37
+ #include "utilities/transactions/write_prepared_txn_db.h"
38
+
39
+ using std::string;
40
+
41
+ namespace ROCKSDB_NAMESPACE {
42
+
43
+ using CommitEntry = WritePreparedTxnDB::CommitEntry;
44
+ using CommitEntry64b = WritePreparedTxnDB::CommitEntry64b;
45
+ using CommitEntry64bFormat = WritePreparedTxnDB::CommitEntry64bFormat;
46
+
47
+ TEST(PreparedHeap, BasicsTest) {
48
+ WritePreparedTxnDB::PreparedHeap heap;
49
+ {
50
+ MutexLock ml(heap.push_pop_mutex());
51
+ heap.push(14l);
52
+ // Test with one element
53
+ ASSERT_EQ(14l, heap.top());
54
+ heap.push(24l);
55
+ heap.push(34l);
56
+ // Test that old min is still on top
57
+ ASSERT_EQ(14l, heap.top());
58
+ heap.push(44l);
59
+ heap.push(54l);
60
+ heap.push(64l);
61
+ heap.push(74l);
62
+ heap.push(84l);
63
+ }
64
+ // Test that old min is still on top
65
+ ASSERT_EQ(14l, heap.top());
66
+ heap.erase(24l);
67
+ // Test that old min is still on top
68
+ ASSERT_EQ(14l, heap.top());
69
+ heap.erase(14l);
70
+ // Test that the new comes to the top after multiple erase
71
+ ASSERT_EQ(34l, heap.top());
72
+ heap.erase(34l);
73
+ // Test that the new comes to the top after single erase
74
+ ASSERT_EQ(44l, heap.top());
75
+ heap.erase(54l);
76
+ ASSERT_EQ(44l, heap.top());
77
+ heap.pop(); // pop 44l
78
+ // Test that the erased items are ignored after pop
79
+ ASSERT_EQ(64l, heap.top());
80
+ heap.erase(44l);
81
+ // Test that erasing an already popped item would work
82
+ ASSERT_EQ(64l, heap.top());
83
+ heap.erase(84l);
84
+ ASSERT_EQ(64l, heap.top());
85
+ {
86
+ MutexLock ml(heap.push_pop_mutex());
87
+ heap.push(85l);
88
+ heap.push(86l);
89
+ heap.push(87l);
90
+ heap.push(88l);
91
+ heap.push(89l);
92
+ }
93
+ heap.erase(87l);
94
+ heap.erase(85l);
95
+ heap.erase(89l);
96
+ heap.erase(86l);
97
+ heap.erase(88l);
98
+ // Test top remains the same after a random order of many erases
99
+ ASSERT_EQ(64l, heap.top());
100
+ heap.pop();
101
+ // Test that pop works with a series of random pending erases
102
+ ASSERT_EQ(74l, heap.top());
103
+ ASSERT_FALSE(heap.empty());
104
+ heap.pop();
105
+ // Test that empty works
106
+ ASSERT_TRUE(heap.empty());
107
+ }
108
+
109
+ // This is a scenario reconstructed from a buggy trace. Test that the bug does
110
+ // not resurface again.
111
+ TEST(PreparedHeap, EmptyAtTheEnd) {
112
+ WritePreparedTxnDB::PreparedHeap heap;
113
+ {
114
+ MutexLock ml(heap.push_pop_mutex());
115
+ heap.push(40l);
116
+ }
117
+ ASSERT_EQ(40l, heap.top());
118
+ // Although not a recommended scenario, we must be resilient against erase
119
+ // without a prior push.
120
+ heap.erase(50l);
121
+ ASSERT_EQ(40l, heap.top());
122
+ {
123
+ MutexLock ml(heap.push_pop_mutex());
124
+ heap.push(60l);
125
+ }
126
+ ASSERT_EQ(40l, heap.top());
127
+
128
+ heap.erase(60l);
129
+ ASSERT_EQ(40l, heap.top());
130
+ heap.erase(40l);
131
+ ASSERT_TRUE(heap.empty());
132
+
133
+ {
134
+ MutexLock ml(heap.push_pop_mutex());
135
+ heap.push(40l);
136
+ }
137
+ ASSERT_EQ(40l, heap.top());
138
+ heap.erase(50l);
139
+ ASSERT_EQ(40l, heap.top());
140
+ {
141
+ MutexLock ml(heap.push_pop_mutex());
142
+ heap.push(60l);
143
+ }
144
+ ASSERT_EQ(40l, heap.top());
145
+
146
+ heap.erase(40l);
147
+ // Test that the erase has not emptied the heap (we had a bug doing that)
148
+ ASSERT_FALSE(heap.empty());
149
+ ASSERT_EQ(60l, heap.top());
150
+ heap.erase(60l);
151
+ ASSERT_TRUE(heap.empty());
152
+ }
153
+
154
+ // Generate random order of PreparedHeap access and test that the heap will be
155
+ // successfully emptied at the end.
156
+ TEST(PreparedHeap, Concurrent) {
157
+ const size_t t_cnt = 10;
158
+ ROCKSDB_NAMESPACE::port::Thread t[t_cnt + 1];
159
+ WritePreparedTxnDB::PreparedHeap heap;
160
+ port::RWMutex prepared_mutex;
161
+ std::atomic<size_t> last;
162
+
163
+ for (size_t n = 0; n < 100; n++) {
164
+ last = 0;
165
+ t[0] = ROCKSDB_NAMESPACE::port::Thread([&]() {
166
+ Random rnd(1103);
167
+ for (size_t seq = 1; seq <= t_cnt; seq++) {
168
+ // This is not recommended usage but we should be resilient against it.
169
+ bool skip_push = rnd.OneIn(5);
170
+ if (!skip_push) {
171
+ MutexLock ml(heap.push_pop_mutex());
172
+ std::this_thread::yield();
173
+ heap.push(seq);
174
+ last.store(seq);
175
+ }
176
+ }
177
+ });
178
+ for (size_t i = 1; i <= t_cnt; i++) {
179
+ t[i] =
180
+ ROCKSDB_NAMESPACE::port::Thread([&heap, &prepared_mutex, &last, i]() {
181
+ auto seq = i;
182
+ do {
183
+ std::this_thread::yield();
184
+ } while (last.load() < seq);
185
+ WriteLock wl(&prepared_mutex);
186
+ heap.erase(seq);
187
+ });
188
+ }
189
+ for (size_t i = 0; i <= t_cnt; i++) {
190
+ t[i].join();
191
+ }
192
+ ASSERT_TRUE(heap.empty());
193
+ }
194
+ }
195
+
196
+ // Test that WriteBatchWithIndex correctly counts the number of sub-batches
197
+ TEST(WriteBatchWithIndex, SubBatchCnt) {
198
+ ColumnFamilyOptions cf_options;
199
+ std::string cf_name = "two";
200
+ DB* db;
201
+ Options options;
202
+ options.create_if_missing = true;
203
+ const std::string dbname = test::PerThreadDBPath("transaction_testdb");
204
+ EXPECT_OK(DestroyDB(dbname, options));
205
+ ASSERT_OK(DB::Open(options, dbname, &db));
206
+ ColumnFamilyHandle* cf_handle = nullptr;
207
+ ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle));
208
+ WriteOptions write_options;
209
+ size_t batch_cnt = 1;
210
+ size_t save_points = 0;
211
+ std::vector<size_t> batch_cnt_at;
212
+ WriteBatchWithIndex batch(db->DefaultColumnFamily()->GetComparator(), 0, true,
213
+ 0);
214
+ ASSERT_EQ(batch_cnt, batch.SubBatchCnt());
215
+ batch_cnt_at.push_back(batch_cnt);
216
+ batch.SetSavePoint();
217
+ save_points++;
218
+ ASSERT_OK(batch.Put(Slice("key"), Slice("value")));
219
+ ASSERT_EQ(batch_cnt, batch.SubBatchCnt());
220
+ batch_cnt_at.push_back(batch_cnt);
221
+ batch.SetSavePoint();
222
+ save_points++;
223
+ ASSERT_OK(batch.Put(Slice("key2"), Slice("value2")));
224
+ ASSERT_EQ(batch_cnt, batch.SubBatchCnt());
225
+ // duplicate the keys
226
+ batch_cnt_at.push_back(batch_cnt);
227
+ batch.SetSavePoint();
228
+ save_points++;
229
+ ASSERT_OK(batch.Put(Slice("key"), Slice("value3")));
230
+ batch_cnt++;
231
+ ASSERT_EQ(batch_cnt, batch.SubBatchCnt());
232
+ // duplicate the 2nd key. It should not be counted duplicate since a
233
+ // sub-patch is cut after the last duplicate.
234
+ batch_cnt_at.push_back(batch_cnt);
235
+ batch.SetSavePoint();
236
+ save_points++;
237
+ ASSERT_OK(batch.Put(Slice("key2"), Slice("value4")));
238
+ ASSERT_EQ(batch_cnt, batch.SubBatchCnt());
239
+ // duplicate the keys but in a different cf. It should not be counted as
240
+ // duplicate keys
241
+ batch_cnt_at.push_back(batch_cnt);
242
+ batch.SetSavePoint();
243
+ save_points++;
244
+ ASSERT_OK(batch.Put(cf_handle, Slice("key"), Slice("value5")));
245
+ ASSERT_EQ(batch_cnt, batch.SubBatchCnt());
246
+
247
+ // Test that the number of sub-batches matches what we count with
248
+ // SubBatchCounter
249
+ std::map<uint32_t, const Comparator*> comparators;
250
+ comparators[0] = db->DefaultColumnFamily()->GetComparator();
251
+ comparators[cf_handle->GetID()] = cf_handle->GetComparator();
252
+ SubBatchCounter counter(comparators);
253
+ ASSERT_OK(batch.GetWriteBatch()->Iterate(&counter));
254
+ ASSERT_EQ(batch_cnt, counter.BatchCount());
255
+
256
+ // Test that RollbackToSavePoint will properly resets the number of
257
+ // sub-batches
258
+ for (size_t i = save_points; i > 0; i--) {
259
+ ASSERT_OK(batch.RollbackToSavePoint());
260
+ ASSERT_EQ(batch_cnt_at[i - 1], batch.SubBatchCnt());
261
+ }
262
+
263
+ // Test the count is right with random batches
264
+ {
265
+ const size_t TOTAL_KEYS = 20; // 20 ~= 10 to cause a few randoms
266
+ Random rnd(1131);
267
+ std::string keys[TOTAL_KEYS];
268
+ for (size_t k = 0; k < TOTAL_KEYS; k++) {
269
+ int len = static_cast<int>(rnd.Uniform(50));
270
+ keys[k] = test::RandomKey(&rnd, len);
271
+ }
272
+ for (size_t i = 0; i < 1000; i++) { // 1000 random batches
273
+ WriteBatchWithIndex rndbatch(db->DefaultColumnFamily()->GetComparator(),
274
+ 0, true, 0);
275
+ for (size_t k = 0; k < 10; k++) { // 10 key per batch
276
+ size_t ki = static_cast<size_t>(rnd.Uniform(TOTAL_KEYS));
277
+ Slice key = Slice(keys[ki]);
278
+ std::string tmp = rnd.RandomString(16);
279
+ Slice value = Slice(tmp);
280
+ ASSERT_OK(rndbatch.Put(key, value));
281
+ }
282
+ SubBatchCounter batch_counter(comparators);
283
+ ASSERT_OK(rndbatch.GetWriteBatch()->Iterate(&batch_counter));
284
+ ASSERT_EQ(rndbatch.SubBatchCnt(), batch_counter.BatchCount());
285
+ }
286
+ }
287
+
288
+ delete cf_handle;
289
+ delete db;
290
+ }
291
+
292
+ TEST(CommitEntry64b, BasicTest) {
293
+ const size_t INDEX_BITS = static_cast<size_t>(21);
294
+ const size_t INDEX_SIZE = static_cast<size_t>(1ull << INDEX_BITS);
295
+ const CommitEntry64bFormat FORMAT(static_cast<size_t>(INDEX_BITS));
296
+
297
+ // zero-initialized CommitEntry64b should indicate an empty entry
298
+ CommitEntry64b empty_entry64b;
299
+ uint64_t empty_index = 11ul;
300
+ CommitEntry empty_entry;
301
+ bool ok = empty_entry64b.Parse(empty_index, &empty_entry, FORMAT);
302
+ ASSERT_FALSE(ok);
303
+
304
+ // the zero entry is reserved for un-initialized entries
305
+ const size_t MAX_COMMIT = (1 << FORMAT.COMMIT_BITS) - 1 - 1;
306
+ // Samples over the numbers that are covered by that many index bits
307
+ std::array<uint64_t, 4> is = {{0, 1, INDEX_SIZE / 2 + 1, INDEX_SIZE - 1}};
308
+ // Samples over the numbers that are covered by that many commit bits
309
+ std::array<uint64_t, 4> ds = {{0, 1, MAX_COMMIT / 2 + 1, MAX_COMMIT}};
310
+ // Iterate over prepare numbers that have i) cover all bits of a sequence
311
+ // number, and ii) include some bits that fall into the range of index or
312
+ // commit bits
313
+ for (uint64_t base = 1; base < kMaxSequenceNumber; base *= 2) {
314
+ for (uint64_t i : is) {
315
+ for (uint64_t d : ds) {
316
+ uint64_t p = base + i + d;
317
+ for (uint64_t c : {p, p + d / 2, p + d}) {
318
+ uint64_t index = p % INDEX_SIZE;
319
+ CommitEntry before(p, c), after;
320
+ CommitEntry64b entry64b(before, FORMAT);
321
+ ok = entry64b.Parse(index, &after, FORMAT);
322
+ ASSERT_TRUE(ok);
323
+ if (!(before == after)) {
324
+ printf("base %" PRIu64 " i %" PRIu64 " d %" PRIu64 " p %" PRIu64
325
+ " c %" PRIu64 " index %" PRIu64 "\n",
326
+ base, i, d, p, c, index);
327
+ }
328
+ ASSERT_EQ(before, after);
329
+ }
330
+ }
331
+ }
332
+ }
333
+ }
334
+
335
+ class WritePreparedTxnDBMock : public WritePreparedTxnDB {
336
+ public:
337
+ WritePreparedTxnDBMock(DBImpl* db_impl, TransactionDBOptions& opt)
338
+ : WritePreparedTxnDB(db_impl, opt) {}
339
+ void SetDBSnapshots(const std::vector<SequenceNumber>& snapshots) {
340
+ snapshots_ = snapshots;
341
+ }
342
+ void TakeSnapshot(SequenceNumber seq) { snapshots_.push_back(seq); }
343
+
344
+ protected:
345
+ const std::vector<SequenceNumber> GetSnapshotListFromDB(
346
+ SequenceNumber /* unused */) override {
347
+ return snapshots_;
348
+ }
349
+
350
+ private:
351
+ std::vector<SequenceNumber> snapshots_;
352
+ };
353
+
354
+ class WritePreparedTransactionTestBase : public TransactionTestBase {
355
+ public:
356
+ WritePreparedTransactionTestBase(bool use_stackable_db, bool two_write_queue,
357
+ TxnDBWritePolicy write_policy,
358
+ WriteOrdering write_ordering)
359
+ : TransactionTestBase(use_stackable_db, two_write_queue, write_policy,
360
+ write_ordering){};
361
+
362
+ protected:
363
+ void UpdateTransactionDBOptions(size_t snapshot_cache_bits,
364
+ size_t commit_cache_bits) {
365
+ txn_db_options.wp_snapshot_cache_bits = snapshot_cache_bits;
366
+ txn_db_options.wp_commit_cache_bits = commit_cache_bits;
367
+ }
368
+ void UpdateTransactionDBOptions(size_t snapshot_cache_bits) {
369
+ txn_db_options.wp_snapshot_cache_bits = snapshot_cache_bits;
370
+ }
371
+ // If expect_update is set, check if it actually updated old_commit_map_. If
372
+ // it did not and yet suggested not to check the next snapshot, do the
373
+ // opposite to check if it was not a bad suggestion.
374
+ void MaybeUpdateOldCommitMapTestWithNext(uint64_t prepare, uint64_t commit,
375
+ uint64_t snapshot,
376
+ uint64_t next_snapshot,
377
+ bool expect_update) {
378
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
379
+ // reset old_commit_map_empty_ so that its value indicate whether
380
+ // old_commit_map_ was updated
381
+ wp_db->old_commit_map_empty_ = true;
382
+ bool check_next = wp_db->MaybeUpdateOldCommitMap(prepare, commit, snapshot,
383
+ snapshot < next_snapshot);
384
+ if (expect_update == wp_db->old_commit_map_empty_) {
385
+ printf("prepare: %" PRIu64 " commit: %" PRIu64 " snapshot: %" PRIu64
386
+ " next: %" PRIu64 "\n",
387
+ prepare, commit, snapshot, next_snapshot);
388
+ }
389
+ EXPECT_EQ(!expect_update, wp_db->old_commit_map_empty_);
390
+ if (!check_next && wp_db->old_commit_map_empty_) {
391
+ // do the opposite to make sure it was not a bad suggestion
392
+ const bool dont_care_bool = true;
393
+ wp_db->MaybeUpdateOldCommitMap(prepare, commit, next_snapshot,
394
+ dont_care_bool);
395
+ if (!wp_db->old_commit_map_empty_) {
396
+ printf("prepare: %" PRIu64 " commit: %" PRIu64 " snapshot: %" PRIu64
397
+ " next: %" PRIu64 "\n",
398
+ prepare, commit, snapshot, next_snapshot);
399
+ }
400
+ EXPECT_TRUE(wp_db->old_commit_map_empty_);
401
+ }
402
+ }
403
+
404
+ // Test that a CheckAgainstSnapshots thread reading old_snapshots will not
405
+ // miss a snapshot because of a concurrent update by UpdateSnapshots that is
406
+ // writing new_snapshots. Both threads are broken at two points. The sync
407
+ // points to enforce them are specified by a1, a2, b1, and b2. CommitEntry
408
+ // entry is expected to be vital for one of the snapshots that is common
409
+ // between the old and new list of snapshots.
410
+ void SnapshotConcurrentAccessTestInternal(
411
+ WritePreparedTxnDB* wp_db,
412
+ const std::vector<SequenceNumber>& old_snapshots,
413
+ const std::vector<SequenceNumber>& new_snapshots, CommitEntry& entry,
414
+ SequenceNumber& version, size_t a1, size_t a2, size_t b1, size_t b2) {
415
+ // First reset the snapshot list
416
+ const std::vector<SequenceNumber> empty_snapshots;
417
+ wp_db->old_commit_map_empty_ = true;
418
+ wp_db->UpdateSnapshots(empty_snapshots, ++version);
419
+ // Then initialize it with the old_snapshots
420
+ wp_db->UpdateSnapshots(old_snapshots, ++version);
421
+
422
+ // Starting from the first thread, cut each thread at two points
423
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
424
+ {"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(a1),
425
+ "WritePreparedTxnDB::UpdateSnapshots:s:start"},
426
+ {"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(b1),
427
+ "WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(a1)},
428
+ {"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(a2),
429
+ "WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(b1)},
430
+ {"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(b2),
431
+ "WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(a2)},
432
+ {"WritePreparedTxnDB::CheckAgainstSnapshots:p:end",
433
+ "WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(b2)},
434
+ });
435
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
436
+ {
437
+ ASSERT_TRUE(wp_db->old_commit_map_empty_);
438
+ ROCKSDB_NAMESPACE::port::Thread t1(
439
+ [&]() { wp_db->UpdateSnapshots(new_snapshots, version); });
440
+ ROCKSDB_NAMESPACE::port::Thread t2(
441
+ [&]() { wp_db->CheckAgainstSnapshots(entry); });
442
+ t1.join();
443
+ t2.join();
444
+ ASSERT_FALSE(wp_db->old_commit_map_empty_);
445
+ }
446
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
447
+
448
+ wp_db->old_commit_map_empty_ = true;
449
+ wp_db->UpdateSnapshots(empty_snapshots, ++version);
450
+ wp_db->UpdateSnapshots(old_snapshots, ++version);
451
+ // Starting from the second thread, cut each thread at two points
452
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
453
+ {"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(a1),
454
+ "WritePreparedTxnDB::CheckAgainstSnapshots:s:start"},
455
+ {"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(b1),
456
+ "WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(a1)},
457
+ {"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(a2),
458
+ "WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(b1)},
459
+ {"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(b2),
460
+ "WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(a2)},
461
+ {"WritePreparedTxnDB::UpdateSnapshots:p:end",
462
+ "WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(b2)},
463
+ });
464
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
465
+ {
466
+ ASSERT_TRUE(wp_db->old_commit_map_empty_);
467
+ ROCKSDB_NAMESPACE::port::Thread t1(
468
+ [&]() { wp_db->UpdateSnapshots(new_snapshots, version); });
469
+ ROCKSDB_NAMESPACE::port::Thread t2(
470
+ [&]() { wp_db->CheckAgainstSnapshots(entry); });
471
+ t1.join();
472
+ t2.join();
473
+ ASSERT_FALSE(wp_db->old_commit_map_empty_);
474
+ }
475
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
476
+ }
477
+
478
+ // Verify value of keys.
479
+ void VerifyKeys(const std::unordered_map<std::string, std::string>& data,
480
+ const Snapshot* snapshot = nullptr) {
481
+ std::string value;
482
+ ReadOptions read_options;
483
+ read_options.snapshot = snapshot;
484
+ for (auto& kv : data) {
485
+ auto s = db->Get(read_options, kv.first, &value);
486
+ ASSERT_TRUE(s.ok() || s.IsNotFound());
487
+ if (s.ok()) {
488
+ if (kv.second != value) {
489
+ printf("key = %s\n", kv.first.c_str());
490
+ }
491
+ ASSERT_EQ(kv.second, value);
492
+ } else {
493
+ ASSERT_EQ(kv.second, "NOT_FOUND");
494
+ }
495
+
496
+ // Try with MultiGet API too
497
+ std::vector<std::string> values;
498
+ auto s_vec = db->MultiGet(read_options, {db->DefaultColumnFamily()},
499
+ {kv.first}, &values);
500
+ ASSERT_EQ(1, values.size());
501
+ ASSERT_EQ(1, s_vec.size());
502
+ s = s_vec[0];
503
+ ASSERT_TRUE(s.ok() || s.IsNotFound());
504
+ if (s.ok()) {
505
+ ASSERT_TRUE(kv.second == values[0]);
506
+ } else {
507
+ ASSERT_EQ(kv.second, "NOT_FOUND");
508
+ }
509
+ }
510
+ }
511
+
512
+ // Verify all versions of keys.
513
+ void VerifyInternalKeys(const std::vector<KeyVersion>& expected_versions) {
514
+ std::vector<KeyVersion> versions;
515
+ const size_t kMaxKeys = 100000;
516
+ ASSERT_OK(GetAllKeyVersions(db, expected_versions.front().user_key,
517
+ expected_versions.back().user_key, kMaxKeys,
518
+ &versions));
519
+ ASSERT_EQ(expected_versions.size(), versions.size());
520
+ for (size_t i = 0; i < versions.size(); i++) {
521
+ ASSERT_EQ(expected_versions[i].user_key, versions[i].user_key);
522
+ ASSERT_EQ(expected_versions[i].sequence, versions[i].sequence);
523
+ ASSERT_EQ(expected_versions[i].type, versions[i].type);
524
+ if (versions[i].type != kTypeDeletion &&
525
+ versions[i].type != kTypeSingleDeletion) {
526
+ ASSERT_EQ(expected_versions[i].value, versions[i].value);
527
+ }
528
+ // Range delete not supported.
529
+ ASSERT_NE(expected_versions[i].type, kTypeRangeDeletion);
530
+ }
531
+ }
532
+ };
533
+
534
+ class WritePreparedTransactionTest
535
+ : public WritePreparedTransactionTestBase,
536
+ virtual public ::testing::WithParamInterface<
537
+ std::tuple<bool, bool, TxnDBWritePolicy, WriteOrdering>> {
538
+ public:
539
+ WritePreparedTransactionTest()
540
+ : WritePreparedTransactionTestBase(
541
+ std::get<0>(GetParam()), std::get<1>(GetParam()),
542
+ std::get<2>(GetParam()), std::get<3>(GetParam())){};
543
+ };
544
+
545
+ #ifndef ROCKSDB_VALGRIND_RUN
546
+ class SnapshotConcurrentAccessTest
547
+ : public WritePreparedTransactionTestBase,
548
+ virtual public ::testing::WithParamInterface<std::tuple<
549
+ bool, bool, TxnDBWritePolicy, WriteOrdering, size_t, size_t>> {
550
+ public:
551
+ SnapshotConcurrentAccessTest()
552
+ : WritePreparedTransactionTestBase(
553
+ std::get<0>(GetParam()), std::get<1>(GetParam()),
554
+ std::get<2>(GetParam()), std::get<3>(GetParam())),
555
+ split_id_(std::get<4>(GetParam())),
556
+ split_cnt_(std::get<5>(GetParam())){};
557
+
558
+ protected:
559
+ // A test is split into split_cnt_ tests, each identified with split_id_ where
560
+ // 0 <= split_id_ < split_cnt_
561
+ size_t split_id_;
562
+ size_t split_cnt_;
563
+ };
564
+ #endif // ROCKSDB_VALGRIND_RUN
565
+
566
+ class SeqAdvanceConcurrentTest
567
+ : public WritePreparedTransactionTestBase,
568
+ virtual public ::testing::WithParamInterface<std::tuple<
569
+ bool, bool, TxnDBWritePolicy, WriteOrdering, size_t, size_t>> {
570
+ public:
571
+ SeqAdvanceConcurrentTest()
572
+ : WritePreparedTransactionTestBase(
573
+ std::get<0>(GetParam()), std::get<1>(GetParam()),
574
+ std::get<2>(GetParam()), std::get<3>(GetParam())),
575
+ split_id_(std::get<4>(GetParam())),
576
+ split_cnt_(std::get<5>(GetParam())) {
577
+ special_env.skip_fsync_ = true;
578
+ };
579
+
580
+ protected:
581
+ // A test is split into split_cnt_ tests, each identified with split_id_ where
582
+ // 0 <= split_id_ < split_cnt_
583
+ size_t split_id_;
584
+ size_t split_cnt_;
585
+ };
586
+
587
+ INSTANTIATE_TEST_CASE_P(
588
+ WritePreparedTransaction, WritePreparedTransactionTest,
589
+ ::testing::Values(
590
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite),
591
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite),
592
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite)));
593
+
594
+ #ifndef ROCKSDB_VALGRIND_RUN
595
+ INSTANTIATE_TEST_CASE_P(
596
+ TwoWriteQueues, SnapshotConcurrentAccessTest,
597
+ ::testing::Values(
598
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 0, 20),
599
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 1, 20),
600
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 2, 20),
601
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 3, 20),
602
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 4, 20),
603
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 5, 20),
604
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 6, 20),
605
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 7, 20),
606
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 8, 20),
607
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 9, 20),
608
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 10, 20),
609
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 11, 20),
610
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 12, 20),
611
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 13, 20),
612
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 14, 20),
613
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 15, 20),
614
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 16, 20),
615
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 17, 20),
616
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 18, 20),
617
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 19, 20),
618
+
619
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 0, 20),
620
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 1, 20),
621
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 2, 20),
622
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 3, 20),
623
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 4, 20),
624
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 5, 20),
625
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 6, 20),
626
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 7, 20),
627
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 8, 20),
628
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 9, 20),
629
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 10, 20),
630
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 11, 20),
631
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 12, 20),
632
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 13, 20),
633
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 14, 20),
634
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 15, 20),
635
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 16, 20),
636
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 17, 20),
637
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 18, 20),
638
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 19, 20)));
639
+
640
+ INSTANTIATE_TEST_CASE_P(
641
+ OneWriteQueue, SnapshotConcurrentAccessTest,
642
+ ::testing::Values(
643
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 0, 20),
644
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 1, 20),
645
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 2, 20),
646
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 3, 20),
647
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 4, 20),
648
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 5, 20),
649
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 6, 20),
650
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 7, 20),
651
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 8, 20),
652
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 9, 20),
653
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 10, 20),
654
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 11, 20),
655
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 12, 20),
656
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 13, 20),
657
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 14, 20),
658
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 15, 20),
659
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 16, 20),
660
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 17, 20),
661
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 18, 20),
662
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 19, 20)));
663
+
664
+ INSTANTIATE_TEST_CASE_P(
665
+ TwoWriteQueues, SeqAdvanceConcurrentTest,
666
+ ::testing::Values(
667
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 0, 10),
668
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 1, 10),
669
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 2, 10),
670
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 3, 10),
671
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 4, 10),
672
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 5, 10),
673
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 6, 10),
674
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 7, 10),
675
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 8, 10),
676
+ std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, 9, 10),
677
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 0, 10),
678
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 1, 10),
679
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 2, 10),
680
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 3, 10),
681
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 4, 10),
682
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 5, 10),
683
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 6, 10),
684
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 7, 10),
685
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 8, 10),
686
+ std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, 9, 10)));
687
+
688
+ INSTANTIATE_TEST_CASE_P(
689
+ OneWriteQueue, SeqAdvanceConcurrentTest,
690
+ ::testing::Values(
691
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 0, 10),
692
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 1, 10),
693
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 2, 10),
694
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 3, 10),
695
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 4, 10),
696
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 5, 10),
697
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 6, 10),
698
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 7, 10),
699
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 8, 10),
700
+ std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, 9, 10)));
701
+ #endif // ROCKSDB_VALGRIND_RUN
702
+
703
+ TEST_P(WritePreparedTransactionTest, CommitMap) {
704
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
705
+ ASSERT_NE(wp_db, nullptr);
706
+ ASSERT_NE(wp_db->db_impl_, nullptr);
707
+ size_t size = wp_db->COMMIT_CACHE_SIZE;
708
+ CommitEntry c = {5, 12}, e;
709
+ bool evicted = wp_db->AddCommitEntry(c.prep_seq % size, c, &e);
710
+ ASSERT_FALSE(evicted);
711
+
712
+ // Should be able to read the same value
713
+ CommitEntry64b dont_care;
714
+ bool found = wp_db->GetCommitEntry(c.prep_seq % size, &dont_care, &e);
715
+ ASSERT_TRUE(found);
716
+ ASSERT_EQ(c, e);
717
+ // Should be able to distinguish between overlapping entries
718
+ found = wp_db->GetCommitEntry((c.prep_seq + size) % size, &dont_care, &e);
719
+ ASSERT_TRUE(found);
720
+ ASSERT_NE(c.prep_seq + size, e.prep_seq);
721
+ // Should be able to detect non-existent entry
722
+ found = wp_db->GetCommitEntry((c.prep_seq + 1) % size, &dont_care, &e);
723
+ ASSERT_FALSE(found);
724
+
725
+ // Reject an invalid exchange
726
+ CommitEntry e2 = {c.prep_seq + size, c.commit_seq + size};
727
+ CommitEntry64b e2_64b(e2, wp_db->FORMAT);
728
+ bool exchanged = wp_db->ExchangeCommitEntry(e2.prep_seq % size, e2_64b, e);
729
+ ASSERT_FALSE(exchanged);
730
+ // check whether it did actually reject that
731
+ found = wp_db->GetCommitEntry(e2.prep_seq % size, &dont_care, &e);
732
+ ASSERT_TRUE(found);
733
+ ASSERT_EQ(c, e);
734
+
735
+ // Accept a valid exchange
736
+ CommitEntry64b c_64b(c, wp_db->FORMAT);
737
+ CommitEntry e3 = {c.prep_seq + size, c.commit_seq + size + 1};
738
+ exchanged = wp_db->ExchangeCommitEntry(c.prep_seq % size, c_64b, e3);
739
+ ASSERT_TRUE(exchanged);
740
+ // check whether it did actually accepted that
741
+ found = wp_db->GetCommitEntry(c.prep_seq % size, &dont_care, &e);
742
+ ASSERT_TRUE(found);
743
+ ASSERT_EQ(e3, e);
744
+
745
+ // Rewrite an entry
746
+ CommitEntry e4 = {e3.prep_seq + size, e3.commit_seq + size + 1};
747
+ evicted = wp_db->AddCommitEntry(e4.prep_seq % size, e4, &e);
748
+ ASSERT_TRUE(evicted);
749
+ ASSERT_EQ(e3, e);
750
+ found = wp_db->GetCommitEntry(e4.prep_seq % size, &dont_care, &e);
751
+ ASSERT_TRUE(found);
752
+ ASSERT_EQ(e4, e);
753
+ }
754
+
755
+ TEST_P(WritePreparedTransactionTest, MaybeUpdateOldCommitMap) {
756
+ // If prepare <= snapshot < commit we should keep the entry around since its
757
+ // nonexistence could be interpreted as committed in the snapshot while it is
758
+ // not true. We keep such entries around by adding them to the
759
+ // old_commit_map_.
760
+ uint64_t p /*prepare*/, c /*commit*/, s /*snapshot*/, ns /*next_snapshot*/;
761
+ p = 10l, c = 15l, s = 20l, ns = 21l;
762
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
763
+ // If we do not expect the old commit map to be updated, try also with a next
764
+ // snapshot that is expected to update the old commit map. This would test
765
+ // that MaybeUpdateOldCommitMap would not prevent us from checking the next
766
+ // snapshot that must be checked.
767
+ p = 10l, c = 15l, s = 20l, ns = 11l;
768
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
769
+
770
+ p = 10l, c = 20l, s = 20l, ns = 19l;
771
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
772
+ p = 10l, c = 20l, s = 20l, ns = 21l;
773
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
774
+
775
+ p = 20l, c = 20l, s = 20l, ns = 21l;
776
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
777
+ p = 20l, c = 20l, s = 20l, ns = 19l;
778
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
779
+
780
+ p = 10l, c = 25l, s = 20l, ns = 21l;
781
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, true);
782
+
783
+ p = 20l, c = 25l, s = 20l, ns = 21l;
784
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, true);
785
+
786
+ p = 21l, c = 25l, s = 20l, ns = 22l;
787
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
788
+ p = 21l, c = 25l, s = 20l, ns = 19l;
789
+ MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
790
+ }
791
+
792
+ // Trigger the condition where some old memtables are skipped when doing
793
+ // TransactionUtil::CheckKey(), and make sure the result is still correct.
794
+ TEST_P(WritePreparedTransactionTest, CheckKeySkipOldMemtable) {
795
+ const int kAttemptHistoryMemtable = 0;
796
+ const int kAttemptImmMemTable = 1;
797
+ for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable;
798
+ attempt++) {
799
+ options.max_write_buffer_number_to_maintain = 3;
800
+ ASSERT_OK(ReOpen());
801
+
802
+ WriteOptions write_options;
803
+ ReadOptions read_options;
804
+ TransactionOptions txn_options;
805
+ txn_options.set_snapshot = true;
806
+ string value;
807
+
808
+ ASSERT_OK(db->Put(write_options, Slice("foo"), Slice("bar")));
809
+ ASSERT_OK(db->Put(write_options, Slice("foo2"), Slice("bar")));
810
+
811
+ Transaction* txn = db->BeginTransaction(write_options, txn_options);
812
+ ASSERT_TRUE(txn != nullptr);
813
+ ASSERT_OK(txn->SetName("txn"));
814
+
815
+ Transaction* txn2 = db->BeginTransaction(write_options, txn_options);
816
+ ASSERT_TRUE(txn2 != nullptr);
817
+ ASSERT_OK(txn2->SetName("txn2"));
818
+
819
+ // This transaction is created to cause potential conflict.
820
+ Transaction* txn_x = db->BeginTransaction(write_options);
821
+ ASSERT_OK(txn_x->SetName("txn_x"));
822
+ ASSERT_OK(txn_x->Put(Slice("foo"), Slice("bar3")));
823
+ ASSERT_OK(txn_x->Prepare());
824
+
825
+ // Create snapshots after the prepare, but there should still
826
+ // be a conflict when trying to read "foo".
827
+
828
+ if (attempt == kAttemptImmMemTable) {
829
+ // For the second attempt, hold flush from beginning. The memtable
830
+ // will be switched to immutable after calling TEST_SwitchMemtable()
831
+ // while CheckKey() is called.
832
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
833
+ {{"WritePreparedTransactionTest.CheckKeySkipOldMemtable",
834
+ "FlushJob::Start"}});
835
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
836
+ }
837
+
838
+ // force a memtable flush. The memtable should still be kept
839
+ FlushOptions flush_ops;
840
+ if (attempt == kAttemptHistoryMemtable) {
841
+ ASSERT_OK(db->Flush(flush_ops));
842
+ } else {
843
+ ASSERT_EQ(attempt, kAttemptImmMemTable);
844
+ DBImpl* db_impl = static_cast<DBImpl*>(db->GetRootDB());
845
+ ASSERT_OK(db_impl->TEST_SwitchMemtable());
846
+ }
847
+ uint64_t num_imm_mems;
848
+ ASSERT_TRUE(db->GetIntProperty(DB::Properties::kNumImmutableMemTable,
849
+ &num_imm_mems));
850
+ if (attempt == kAttemptHistoryMemtable) {
851
+ ASSERT_EQ(0, num_imm_mems);
852
+ } else {
853
+ ASSERT_EQ(attempt, kAttemptImmMemTable);
854
+ ASSERT_EQ(1, num_imm_mems);
855
+ }
856
+
857
+ // Put something in active memtable
858
+ ASSERT_OK(db->Put(write_options, Slice("foo3"), Slice("bar")));
859
+
860
+ // Create txn3 after flushing, but this transaction also needs to
861
+ // check all memtables because of they contains uncommitted data.
862
+ Transaction* txn3 = db->BeginTransaction(write_options, txn_options);
863
+ ASSERT_TRUE(txn3 != nullptr);
864
+ ASSERT_OK(txn3->SetName("txn3"));
865
+
866
+ // Commit the pending write
867
+ ASSERT_OK(txn_x->Commit());
868
+
869
+ // Commit txn, txn2 and tx3. txn and tx3 will conflict but txn2 will
870
+ // pass. In all cases, both memtables are queried.
871
+ SetPerfLevel(PerfLevel::kEnableCount);
872
+ get_perf_context()->Reset();
873
+ ASSERT_TRUE(txn3->GetForUpdate(read_options, "foo", &value).IsBusy());
874
+ // We should have checked two memtables, active and either immutable
875
+ // or history memtable, depending on the test case.
876
+ ASSERT_EQ(2, get_perf_context()->get_from_memtable_count);
877
+
878
+ get_perf_context()->Reset();
879
+ ASSERT_TRUE(txn->GetForUpdate(read_options, "foo", &value).IsBusy());
880
+ // We should have checked two memtables, active and either immutable
881
+ // or history memtable, depending on the test case.
882
+ ASSERT_EQ(2, get_perf_context()->get_from_memtable_count);
883
+
884
+ get_perf_context()->Reset();
885
+ ASSERT_OK(txn2->GetForUpdate(read_options, "foo2", &value));
886
+ ASSERT_EQ(value, "bar");
887
+ // We should have checked two memtables, and since there is no
888
+ // conflict, another Get() will be made and fetch the data from
889
+ // DB. If it is in immutable memtable, two extra memtable reads
890
+ // will be issued. If it is not (in history), only one will
891
+ // be made, which is to the active memtable.
892
+ if (attempt == kAttemptHistoryMemtable) {
893
+ ASSERT_EQ(3, get_perf_context()->get_from_memtable_count);
894
+ } else {
895
+ ASSERT_EQ(attempt, kAttemptImmMemTable);
896
+ ASSERT_EQ(4, get_perf_context()->get_from_memtable_count);
897
+ }
898
+
899
+ Transaction* txn4 = db->BeginTransaction(write_options, txn_options);
900
+ ASSERT_TRUE(txn4 != nullptr);
901
+ ASSERT_OK(txn4->SetName("txn4"));
902
+ get_perf_context()->Reset();
903
+ ASSERT_OK(txn4->GetForUpdate(read_options, "foo", &value));
904
+ if (attempt == kAttemptHistoryMemtable) {
905
+ // Active memtable will be checked in snapshot validation and when
906
+ // getting the value.
907
+ ASSERT_EQ(2, get_perf_context()->get_from_memtable_count);
908
+ } else {
909
+ // Only active memtable will be checked in snapshot validation but
910
+ // both of active and immutable snapshot will be queried when
911
+ // getting the value.
912
+ ASSERT_EQ(attempt, kAttemptImmMemTable);
913
+ ASSERT_EQ(3, get_perf_context()->get_from_memtable_count);
914
+ }
915
+
916
+ ASSERT_OK(txn2->Commit());
917
+ ASSERT_OK(txn4->Commit());
918
+
919
+ TEST_SYNC_POINT("WritePreparedTransactionTest.CheckKeySkipOldMemtable");
920
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
921
+
922
+ SetPerfLevel(PerfLevel::kDisable);
923
+
924
+ delete txn;
925
+ delete txn2;
926
+ delete txn3;
927
+ delete txn4;
928
+ delete txn_x;
929
+ }
930
+ }
931
+
932
+ // Reproduce the bug with two snapshots with the same seuqence number and test
933
+ // that the release of the first snapshot will not affect the reads by the other
934
+ // snapshot
935
+ TEST_P(WritePreparedTransactionTest, DoubleSnapshot) {
936
+ TransactionOptions txn_options;
937
+ Status s;
938
+
939
+ // Insert initial value
940
+ ASSERT_OK(db->Put(WriteOptions(), "key", "value1"));
941
+
942
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
943
+ Transaction* txn =
944
+ wp_db->BeginTransaction(WriteOptions(), txn_options, nullptr);
945
+ ASSERT_OK(txn->SetName("txn"));
946
+ ASSERT_OK(txn->Put("key", "value2"));
947
+ ASSERT_OK(txn->Prepare());
948
+ // Three snapshots with the same seq number
949
+ const Snapshot* snapshot0 = wp_db->GetSnapshot();
950
+ const Snapshot* snapshot1 = wp_db->GetSnapshot();
951
+ const Snapshot* snapshot2 = wp_db->GetSnapshot();
952
+ ASSERT_OK(txn->Commit());
953
+ SequenceNumber cache_size = wp_db->COMMIT_CACHE_SIZE;
954
+ SequenceNumber overlap_seq = txn->GetId() + cache_size;
955
+ delete txn;
956
+
957
+ // 4th snapshot with a larger seq
958
+ const Snapshot* snapshot3 = wp_db->GetSnapshot();
959
+ // Cause an eviction to advance max evicted seq number
960
+ // This also fetches the 4 snapshots from db since their seq is lower than the
961
+ // new max
962
+ wp_db->AddCommitted(overlap_seq, overlap_seq);
963
+
964
+ ReadOptions ropt;
965
+ // It should see the value before commit
966
+ ropt.snapshot = snapshot2;
967
+ PinnableSlice pinnable_val;
968
+ s = wp_db->Get(ropt, wp_db->DefaultColumnFamily(), "key", &pinnable_val);
969
+ ASSERT_OK(s);
970
+ ASSERT_TRUE(pinnable_val == "value1");
971
+ pinnable_val.Reset();
972
+
973
+ wp_db->ReleaseSnapshot(snapshot1);
974
+
975
+ // It should still see the value before commit
976
+ s = wp_db->Get(ropt, wp_db->DefaultColumnFamily(), "key", &pinnable_val);
977
+ ASSERT_OK(s);
978
+ ASSERT_TRUE(pinnable_val == "value1");
979
+ pinnable_val.Reset();
980
+
981
+ // Cause an eviction to advance max evicted seq number and trigger updating
982
+ // the snapshot list
983
+ overlap_seq += cache_size;
984
+ wp_db->AddCommitted(overlap_seq, overlap_seq);
985
+
986
+ // It should still see the value before commit
987
+ s = wp_db->Get(ropt, wp_db->DefaultColumnFamily(), "key", &pinnable_val);
988
+ ASSERT_OK(s);
989
+ ASSERT_TRUE(pinnable_val == "value1");
990
+ pinnable_val.Reset();
991
+
992
+ wp_db->ReleaseSnapshot(snapshot0);
993
+ wp_db->ReleaseSnapshot(snapshot2);
994
+ wp_db->ReleaseSnapshot(snapshot3);
995
+ }
996
+
997
+ size_t UniqueCnt(std::vector<SequenceNumber> vec) {
998
+ std::set<SequenceNumber> aset;
999
+ for (auto i : vec) {
1000
+ aset.insert(i);
1001
+ }
1002
+ return aset.size();
1003
+ }
1004
+ // Test that the entries in old_commit_map_ get garbage collected properly
1005
+ TEST_P(WritePreparedTransactionTest, OldCommitMapGC) {
1006
+ const size_t snapshot_cache_bits = 0;
1007
+ const size_t commit_cache_bits = 0;
1008
+ DBImpl* mock_db = new DBImpl(options, dbname);
1009
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
1010
+ std::unique_ptr<WritePreparedTxnDBMock> wp_db(
1011
+ new WritePreparedTxnDBMock(mock_db, txn_db_options));
1012
+
1013
+ SequenceNumber seq = 0;
1014
+ // Take the first snapshot that overlaps with two txn
1015
+ auto prep_seq = ++seq;
1016
+ wp_db->AddPrepared(prep_seq);
1017
+ auto prep_seq2 = ++seq;
1018
+ wp_db->AddPrepared(prep_seq2);
1019
+ auto snap_seq1 = seq;
1020
+ wp_db->TakeSnapshot(snap_seq1);
1021
+ auto commit_seq = ++seq;
1022
+ wp_db->AddCommitted(prep_seq, commit_seq);
1023
+ wp_db->RemovePrepared(prep_seq);
1024
+ auto commit_seq2 = ++seq;
1025
+ wp_db->AddCommitted(prep_seq2, commit_seq2);
1026
+ wp_db->RemovePrepared(prep_seq2);
1027
+ // Take the 2nd and 3rd snapshot that overlap with the same txn
1028
+ prep_seq = ++seq;
1029
+ wp_db->AddPrepared(prep_seq);
1030
+ auto snap_seq2 = seq;
1031
+ wp_db->TakeSnapshot(snap_seq2);
1032
+ seq++;
1033
+ auto snap_seq3 = seq;
1034
+ wp_db->TakeSnapshot(snap_seq3);
1035
+ seq++;
1036
+ commit_seq = ++seq;
1037
+ wp_db->AddCommitted(prep_seq, commit_seq);
1038
+ wp_db->RemovePrepared(prep_seq);
1039
+ // Make sure max_evicted_seq_ will be larger than 2nd snapshot by evicting the
1040
+ // only item in the commit_cache_ via another commit.
1041
+ prep_seq = ++seq;
1042
+ wp_db->AddPrepared(prep_seq);
1043
+ commit_seq = ++seq;
1044
+ wp_db->AddCommitted(prep_seq, commit_seq);
1045
+ wp_db->RemovePrepared(prep_seq);
1046
+
1047
+ // Verify that the evicted commit entries for all snapshots are in the
1048
+ // old_commit_map_
1049
+ {
1050
+ ASSERT_FALSE(wp_db->old_commit_map_empty_.load());
1051
+ ReadLock rl(&wp_db->old_commit_map_mutex_);
1052
+ ASSERT_EQ(3, wp_db->old_commit_map_.size());
1053
+ ASSERT_EQ(2, UniqueCnt(wp_db->old_commit_map_[snap_seq1]));
1054
+ ASSERT_EQ(1, UniqueCnt(wp_db->old_commit_map_[snap_seq2]));
1055
+ ASSERT_EQ(1, UniqueCnt(wp_db->old_commit_map_[snap_seq3]));
1056
+ }
1057
+
1058
+ // Verify that the 2nd snapshot is cleaned up after the release
1059
+ wp_db->ReleaseSnapshotInternal(snap_seq2);
1060
+ {
1061
+ ASSERT_FALSE(wp_db->old_commit_map_empty_.load());
1062
+ ReadLock rl(&wp_db->old_commit_map_mutex_);
1063
+ ASSERT_EQ(2, wp_db->old_commit_map_.size());
1064
+ ASSERT_EQ(2, UniqueCnt(wp_db->old_commit_map_[snap_seq1]));
1065
+ ASSERT_EQ(1, UniqueCnt(wp_db->old_commit_map_[snap_seq3]));
1066
+ }
1067
+
1068
+ // Verify that the 1st snapshot is cleaned up after the release
1069
+ wp_db->ReleaseSnapshotInternal(snap_seq1);
1070
+ {
1071
+ ASSERT_FALSE(wp_db->old_commit_map_empty_.load());
1072
+ ReadLock rl(&wp_db->old_commit_map_mutex_);
1073
+ ASSERT_EQ(1, wp_db->old_commit_map_.size());
1074
+ ASSERT_EQ(1, UniqueCnt(wp_db->old_commit_map_[snap_seq3]));
1075
+ }
1076
+
1077
+ // Verify that the 3rd snapshot is cleaned up after the release
1078
+ wp_db->ReleaseSnapshotInternal(snap_seq3);
1079
+ {
1080
+ ASSERT_TRUE(wp_db->old_commit_map_empty_.load());
1081
+ ReadLock rl(&wp_db->old_commit_map_mutex_);
1082
+ ASSERT_EQ(0, wp_db->old_commit_map_.size());
1083
+ }
1084
+ }
1085
+
1086
+ TEST_P(WritePreparedTransactionTest, CheckAgainstSnapshots) {
1087
+ std::vector<SequenceNumber> snapshots = {100l, 200l, 300l, 400l, 500l,
1088
+ 600l, 700l, 800l, 900l};
1089
+ const size_t snapshot_cache_bits = 2;
1090
+ const uint64_t cache_size = 1ul << snapshot_cache_bits;
1091
+ // Safety check to express the intended size in the test. Can be adjusted if
1092
+ // the snapshots lists changed.
1093
+ ASSERT_EQ((1ul << snapshot_cache_bits) * 2 + 1, snapshots.size());
1094
+ DBImpl* mock_db = new DBImpl(options, dbname);
1095
+ UpdateTransactionDBOptions(snapshot_cache_bits);
1096
+ std::unique_ptr<WritePreparedTxnDBMock> wp_db(
1097
+ new WritePreparedTxnDBMock(mock_db, txn_db_options));
1098
+ SequenceNumber version = 1000l;
1099
+ ASSERT_EQ(0, wp_db->snapshots_total_);
1100
+ wp_db->UpdateSnapshots(snapshots, version);
1101
+ ASSERT_EQ(snapshots.size(), wp_db->snapshots_total_);
1102
+ // seq numbers are chosen so that we have two of them between each two
1103
+ // snapshots. If the diff of two consecutive seq is more than 5, there is a
1104
+ // snapshot between them.
1105
+ std::vector<SequenceNumber> seqs = {50l, 55l, 150l, 155l, 250l, 255l, 350l,
1106
+ 355l, 450l, 455l, 550l, 555l, 650l, 655l,
1107
+ 750l, 755l, 850l, 855l, 950l, 955l};
1108
+ ASSERT_GT(seqs.size(), 1);
1109
+ for (size_t i = 0; i + 1 < seqs.size(); i++) {
1110
+ wp_db->old_commit_map_empty_ = true; // reset
1111
+ CommitEntry commit_entry = {seqs[i], seqs[i + 1]};
1112
+ wp_db->CheckAgainstSnapshots(commit_entry);
1113
+ // Expect update if there is snapshot in between the prepare and commit
1114
+ bool expect_update = commit_entry.commit_seq - commit_entry.prep_seq > 5 &&
1115
+ commit_entry.commit_seq >= snapshots.front() &&
1116
+ commit_entry.prep_seq <= snapshots.back();
1117
+ ASSERT_EQ(expect_update, !wp_db->old_commit_map_empty_);
1118
+ }
1119
+
1120
+ // Test that search will include multiple snapshot from snapshot cache
1121
+ {
1122
+ // exclude first and last item in the cache
1123
+ CommitEntry commit_entry = {snapshots.front() + 1,
1124
+ snapshots[cache_size - 1] - 1};
1125
+ wp_db->old_commit_map_empty_ = true; // reset
1126
+ wp_db->old_commit_map_.clear();
1127
+ wp_db->CheckAgainstSnapshots(commit_entry);
1128
+ ASSERT_EQ(wp_db->old_commit_map_.size(), cache_size - 2);
1129
+ }
1130
+
1131
+ // Test that search will include multiple snapshot from old snapshots
1132
+ {
1133
+ // include two in the middle
1134
+ CommitEntry commit_entry = {snapshots[cache_size] + 1,
1135
+ snapshots[cache_size + 2] + 1};
1136
+ wp_db->old_commit_map_empty_ = true; // reset
1137
+ wp_db->old_commit_map_.clear();
1138
+ wp_db->CheckAgainstSnapshots(commit_entry);
1139
+ ASSERT_EQ(wp_db->old_commit_map_.size(), 2);
1140
+ }
1141
+
1142
+ // Test that search will include both snapshot cache and old snapshots
1143
+ // Case 1: includes all in snapshot cache
1144
+ {
1145
+ CommitEntry commit_entry = {snapshots.front() - 1, snapshots.back() + 1};
1146
+ wp_db->old_commit_map_empty_ = true; // reset
1147
+ wp_db->old_commit_map_.clear();
1148
+ wp_db->CheckAgainstSnapshots(commit_entry);
1149
+ ASSERT_EQ(wp_db->old_commit_map_.size(), snapshots.size());
1150
+ }
1151
+
1152
+ // Case 2: includes all snapshot caches except the smallest
1153
+ {
1154
+ CommitEntry commit_entry = {snapshots.front() + 1, snapshots.back() + 1};
1155
+ wp_db->old_commit_map_empty_ = true; // reset
1156
+ wp_db->old_commit_map_.clear();
1157
+ wp_db->CheckAgainstSnapshots(commit_entry);
1158
+ ASSERT_EQ(wp_db->old_commit_map_.size(), snapshots.size() - 1);
1159
+ }
1160
+
1161
+ // Case 3: includes only the largest of snapshot cache
1162
+ {
1163
+ CommitEntry commit_entry = {snapshots[cache_size - 1] - 1,
1164
+ snapshots.back() + 1};
1165
+ wp_db->old_commit_map_empty_ = true; // reset
1166
+ wp_db->old_commit_map_.clear();
1167
+ wp_db->CheckAgainstSnapshots(commit_entry);
1168
+ ASSERT_EQ(wp_db->old_commit_map_.size(), snapshots.size() - cache_size + 1);
1169
+ }
1170
+ }
1171
+
1172
+ // This test is too slow for travis
1173
+ #ifndef TRAVIS
1174
+ #ifndef ROCKSDB_VALGRIND_RUN
1175
+ // Test that CheckAgainstSnapshots will not miss a live snapshot if it is run in
1176
+ // parallel with UpdateSnapshots.
1177
+ TEST_P(SnapshotConcurrentAccessTest, SnapshotConcurrentAccess) {
1178
+ // We have a sync point in the method under test after checking each snapshot.
1179
+ // If you increase the max number of snapshots in this test, more sync points
1180
+ // in the methods must also be added.
1181
+ const std::vector<SequenceNumber> snapshots = {10l, 20l, 30l, 40l, 50l,
1182
+ 60l, 70l, 80l, 90l, 100l};
1183
+ const size_t snapshot_cache_bits = 2;
1184
+ // Safety check to express the intended size in the test. Can be adjusted if
1185
+ // the snapshots lists changed.
1186
+ ASSERT_EQ((1ul << snapshot_cache_bits) * 2 + 2, snapshots.size());
1187
+ SequenceNumber version = 1000l;
1188
+ // Choose the cache size so that the new snapshot list could replace all the
1189
+ // existing items in the cache and also have some overflow.
1190
+ DBImpl* mock_db = new DBImpl(options, dbname);
1191
+ UpdateTransactionDBOptions(snapshot_cache_bits);
1192
+ std::unique_ptr<WritePreparedTxnDBMock> wp_db(
1193
+ new WritePreparedTxnDBMock(mock_db, txn_db_options));
1194
+ const size_t extra = 2;
1195
+ size_t loop_id = 0;
1196
+ // Add up to extra items that do not fit into the cache
1197
+ for (size_t old_size = 1; old_size <= wp_db->SNAPSHOT_CACHE_SIZE + extra;
1198
+ old_size++) {
1199
+ const std::vector<SequenceNumber> old_snapshots(
1200
+ snapshots.begin(), snapshots.begin() + old_size);
1201
+
1202
+ // Each member of old snapshot might or might not appear in the new list. We
1203
+ // create a common_snapshots for each combination.
1204
+ size_t new_comb_cnt = size_t(1) << old_size;
1205
+ for (size_t new_comb = 0; new_comb < new_comb_cnt; new_comb++, loop_id++) {
1206
+ if (loop_id % split_cnt_ != split_id_) continue;
1207
+ printf("."); // To signal progress
1208
+ fflush(stdout);
1209
+ std::vector<SequenceNumber> common_snapshots;
1210
+ for (size_t i = 0; i < old_snapshots.size(); i++) {
1211
+ if (IsInCombination(i, new_comb)) {
1212
+ common_snapshots.push_back(old_snapshots[i]);
1213
+ }
1214
+ }
1215
+ // And add some new snapshots to the common list
1216
+ for (size_t added_snapshots = 0;
1217
+ added_snapshots <= snapshots.size() - old_snapshots.size();
1218
+ added_snapshots++) {
1219
+ std::vector<SequenceNumber> new_snapshots = common_snapshots;
1220
+ for (size_t i = 0; i < added_snapshots; i++) {
1221
+ new_snapshots.push_back(snapshots[old_snapshots.size() + i]);
1222
+ }
1223
+ for (auto it = common_snapshots.begin(); it != common_snapshots.end();
1224
+ ++it) {
1225
+ auto snapshot = *it;
1226
+ // Create a commit entry that is around the snapshot and thus should
1227
+ // be not be discarded
1228
+ CommitEntry entry = {static_cast<uint64_t>(snapshot - 1),
1229
+ snapshot + 1};
1230
+ // The critical part is when iterating the snapshot cache. Afterwards,
1231
+ // we are operating under the lock
1232
+ size_t a_range =
1233
+ std::min(old_snapshots.size(), wp_db->SNAPSHOT_CACHE_SIZE) + 1;
1234
+ size_t b_range =
1235
+ std::min(new_snapshots.size(), wp_db->SNAPSHOT_CACHE_SIZE) + 1;
1236
+ // Break each thread at two points
1237
+ for (size_t a1 = 1; a1 <= a_range; a1++) {
1238
+ for (size_t a2 = a1 + 1; a2 <= a_range; a2++) {
1239
+ for (size_t b1 = 1; b1 <= b_range; b1++) {
1240
+ for (size_t b2 = b1 + 1; b2 <= b_range; b2++) {
1241
+ SnapshotConcurrentAccessTestInternal(
1242
+ wp_db.get(), old_snapshots, new_snapshots, entry, version,
1243
+ a1, a2, b1, b2);
1244
+ }
1245
+ }
1246
+ }
1247
+ }
1248
+ }
1249
+ }
1250
+ }
1251
+ }
1252
+ printf("\n");
1253
+ }
1254
+ #endif // ROCKSDB_VALGRIND_RUN
1255
+ #endif // TRAVIS
1256
+
1257
+ // This test clarifies the contract of AdvanceMaxEvictedSeq method
1258
+ TEST_P(WritePreparedTransactionTest, AdvanceMaxEvictedSeqBasic) {
1259
+ DBImpl* mock_db = new DBImpl(options, dbname);
1260
+ std::unique_ptr<WritePreparedTxnDBMock> wp_db(
1261
+ new WritePreparedTxnDBMock(mock_db, txn_db_options));
1262
+
1263
+ // 1. Set the initial values for max, prepared, and snapshots
1264
+ SequenceNumber zero_max = 0l;
1265
+ // Set the initial list of prepared txns
1266
+ const std::vector<SequenceNumber> initial_prepared = {10, 30, 50, 100,
1267
+ 150, 200, 250};
1268
+ for (auto p : initial_prepared) {
1269
+ wp_db->AddPrepared(p);
1270
+ }
1271
+ // This updates the max value and also set old prepared
1272
+ SequenceNumber init_max = 100;
1273
+ wp_db->AdvanceMaxEvictedSeq(zero_max, init_max);
1274
+ const std::vector<SequenceNumber> initial_snapshots = {20, 40};
1275
+ wp_db->SetDBSnapshots(initial_snapshots);
1276
+ // This will update the internal cache of snapshots from the DB
1277
+ wp_db->UpdateSnapshots(initial_snapshots, init_max);
1278
+
1279
+ // 2. Invoke AdvanceMaxEvictedSeq
1280
+ const std::vector<SequenceNumber> latest_snapshots = {20, 110, 220, 300};
1281
+ wp_db->SetDBSnapshots(latest_snapshots);
1282
+ SequenceNumber new_max = 200;
1283
+ wp_db->AdvanceMaxEvictedSeq(init_max, new_max);
1284
+
1285
+ // 3. Verify that the state matches with AdvanceMaxEvictedSeq contract
1286
+ // a. max should be updated to new_max
1287
+ ASSERT_EQ(wp_db->max_evicted_seq_, new_max);
1288
+ // b. delayed prepared should contain every txn <= max and prepared should
1289
+ // only contain txns > max
1290
+ auto it = initial_prepared.begin();
1291
+ for (; it != initial_prepared.end() && *it <= new_max; ++it) {
1292
+ ASSERT_EQ(1, wp_db->delayed_prepared_.erase(*it));
1293
+ }
1294
+ ASSERT_TRUE(wp_db->delayed_prepared_.empty());
1295
+ for (; it != initial_prepared.end() && !wp_db->prepared_txns_.empty();
1296
+ ++it, wp_db->prepared_txns_.pop()) {
1297
+ ASSERT_EQ(*it, wp_db->prepared_txns_.top());
1298
+ }
1299
+ ASSERT_TRUE(it == initial_prepared.end());
1300
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
1301
+ // c. snapshots should contain everything below new_max
1302
+ auto sit = latest_snapshots.begin();
1303
+ for (size_t i = 0; sit != latest_snapshots.end() && *sit <= new_max &&
1304
+ i < wp_db->snapshots_total_;
1305
+ sit++, i++) {
1306
+ ASSERT_TRUE(i < wp_db->snapshots_total_);
1307
+ // This test is in small scale and the list of snapshots are assumed to be
1308
+ // within the cache size limit. This is just a safety check to double check
1309
+ // that assumption.
1310
+ ASSERT_TRUE(i < wp_db->SNAPSHOT_CACHE_SIZE);
1311
+ ASSERT_EQ(*sit, wp_db->snapshot_cache_[i]);
1312
+ }
1313
+ }
1314
+
1315
+ // A new snapshot should always be always larger than max_evicted_seq_
1316
+ // Otherwise the snapshot does not go through AdvanceMaxEvictedSeq
1317
+ TEST_P(WritePreparedTransactionTest, NewSnapshotLargerThanMax) {
1318
+ WriteOptions woptions;
1319
+ TransactionOptions txn_options;
1320
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1321
+ Transaction* txn0 = db->BeginTransaction(woptions, txn_options);
1322
+ ASSERT_OK(txn0->Put(Slice("key"), Slice("value")));
1323
+ ASSERT_OK(txn0->Commit());
1324
+ const SequenceNumber seq = txn0->GetId(); // is also prepare seq
1325
+ delete txn0;
1326
+ std::vector<Transaction*> txns;
1327
+ // Inc seq without committing anything
1328
+ for (int i = 0; i < 10; i++) {
1329
+ Transaction* txn = db->BeginTransaction(woptions, txn_options);
1330
+ ASSERT_OK(txn->SetName("xid" + std::to_string(i)));
1331
+ ASSERT_OK(txn->Put(Slice("key" + std::to_string(i)), Slice("value")));
1332
+ ASSERT_OK(txn->Prepare());
1333
+ txns.push_back(txn);
1334
+ }
1335
+
1336
+ // The new commit is seq + 10
1337
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1338
+ auto snap = wp_db->GetSnapshot();
1339
+ const SequenceNumber last_seq = snap->GetSequenceNumber();
1340
+ wp_db->ReleaseSnapshot(snap);
1341
+ ASSERT_LT(seq, last_seq);
1342
+ // Otherwise our test is not effective
1343
+ ASSERT_LT(last_seq - seq, wp_db->INC_STEP_FOR_MAX_EVICTED);
1344
+
1345
+ // Evict seq out of commit cache
1346
+ const SequenceNumber overwrite_seq = seq + wp_db->COMMIT_CACHE_SIZE;
1347
+ // Check that the next write could make max go beyond last
1348
+ auto last_max = wp_db->max_evicted_seq_.load();
1349
+ wp_db->AddCommitted(overwrite_seq, overwrite_seq);
1350
+ // Check that eviction has advanced the max
1351
+ ASSERT_LT(last_max, wp_db->max_evicted_seq_.load());
1352
+ // Check that the new max has not advanced the last seq
1353
+ ASSERT_LT(wp_db->max_evicted_seq_.load(), last_seq);
1354
+ for (auto txn : txns) {
1355
+ txn->Rollback();
1356
+ delete txn;
1357
+ }
1358
+ }
1359
+
1360
+ // A new snapshot should always be always larger than max_evicted_seq_
1361
+ // In very rare cases max could be below last published seq. Test that
1362
+ // taking snapshot will wait for max to catch up.
1363
+ TEST_P(WritePreparedTransactionTest, MaxCatchupWithNewSnapshot) {
1364
+ const size_t snapshot_cache_bits = 7; // same as default
1365
+ const size_t commit_cache_bits = 0; // only 1 entry => frequent eviction
1366
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
1367
+ ASSERT_OK(ReOpen());
1368
+ WriteOptions woptions;
1369
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1370
+
1371
+ const int writes = 50;
1372
+ const int batch_cnt = 4;
1373
+ ROCKSDB_NAMESPACE::port::Thread t1([&]() {
1374
+ for (int i = 0; i < writes; i++) {
1375
+ WriteBatch batch;
1376
+ // For duplicate keys cause 4 commit entries, each evicting an entry that
1377
+ // is not published yet, thus causing max evicted seq go higher than last
1378
+ // published.
1379
+ for (int b = 0; b < batch_cnt; b++) {
1380
+ ASSERT_OK(batch.Put("foo", "foo"));
1381
+ }
1382
+ ASSERT_OK(db->Write(woptions, &batch));
1383
+ }
1384
+ });
1385
+
1386
+ ROCKSDB_NAMESPACE::port::Thread t2([&]() {
1387
+ while (wp_db->max_evicted_seq_ == 0) { // wait for insert thread
1388
+ std::this_thread::yield();
1389
+ }
1390
+ for (int i = 0; i < 10; i++) {
1391
+ SequenceNumber max_lower_bound = wp_db->max_evicted_seq_;
1392
+ auto snap = db->GetSnapshot();
1393
+ if (snap->GetSequenceNumber() != 0) {
1394
+ // Value of max_evicted_seq_ when snapshot was taken in unknown. We thus
1395
+ // compare with the lower bound instead as an approximation.
1396
+ ASSERT_LT(max_lower_bound, snap->GetSequenceNumber());
1397
+ } // seq 0 is ok to be less than max since nothing is visible to it
1398
+ db->ReleaseSnapshot(snap);
1399
+ }
1400
+ });
1401
+
1402
+ t1.join();
1403
+ t2.join();
1404
+
1405
+ // Make sure that the test has worked and seq number has advanced as we
1406
+ // thought
1407
+ auto snap = db->GetSnapshot();
1408
+ ASSERT_GT(snap->GetSequenceNumber(), batch_cnt * writes - 1);
1409
+ db->ReleaseSnapshot(snap);
1410
+ }
1411
+
1412
+ // Test that reads without snapshots would not hit an undefined state
1413
+ TEST_P(WritePreparedTransactionTest, MaxCatchupWithUnbackedSnapshot) {
1414
+ const size_t snapshot_cache_bits = 7; // same as default
1415
+ const size_t commit_cache_bits = 0; // only 1 entry => frequent eviction
1416
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
1417
+ ASSERT_OK(ReOpen());
1418
+ WriteOptions woptions;
1419
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1420
+
1421
+ const int writes = 50;
1422
+ ROCKSDB_NAMESPACE::port::Thread t1([&]() {
1423
+ for (int i = 0; i < writes; i++) {
1424
+ WriteBatch batch;
1425
+ ASSERT_OK(batch.Put("key", "foo"));
1426
+ ASSERT_OK(db->Write(woptions, &batch));
1427
+ }
1428
+ });
1429
+
1430
+ ROCKSDB_NAMESPACE::port::Thread t2([&]() {
1431
+ while (wp_db->max_evicted_seq_ == 0) { // wait for insert thread
1432
+ std::this_thread::yield();
1433
+ }
1434
+ ReadOptions ropt;
1435
+ PinnableSlice pinnable_val;
1436
+ TransactionOptions txn_options;
1437
+ for (int i = 0; i < 10; i++) {
1438
+ auto s = db->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val);
1439
+ ASSERT_TRUE(s.ok() || s.IsTryAgain());
1440
+ pinnable_val.Reset();
1441
+ Transaction* txn = db->BeginTransaction(woptions, txn_options);
1442
+ s = txn->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val);
1443
+ ASSERT_TRUE(s.ok() || s.IsTryAgain());
1444
+ pinnable_val.Reset();
1445
+ std::vector<std::string> values;
1446
+ auto s_vec =
1447
+ txn->MultiGet(ropt, {db->DefaultColumnFamily()}, {"key"}, &values);
1448
+ ASSERT_EQ(1, values.size());
1449
+ ASSERT_EQ(1, s_vec.size());
1450
+ s = s_vec[0];
1451
+ ASSERT_TRUE(s.ok() || s.IsTryAgain());
1452
+ Slice key("key");
1453
+ txn->MultiGet(ropt, db->DefaultColumnFamily(), 1, &key, &pinnable_val, &s,
1454
+ true);
1455
+ ASSERT_TRUE(s.ok() || s.IsTryAgain());
1456
+ delete txn;
1457
+ }
1458
+ });
1459
+
1460
+ t1.join();
1461
+ t2.join();
1462
+
1463
+ // Make sure that the test has worked and seq number has advanced as we
1464
+ // thought
1465
+ auto snap = db->GetSnapshot();
1466
+ ASSERT_GT(snap->GetSequenceNumber(), writes - 1);
1467
+ db->ReleaseSnapshot(snap);
1468
+ }
1469
+
1470
+ // Check that old_commit_map_ cleanup works correctly if the snapshot equals
1471
+ // max_evicted_seq_.
1472
+ TEST_P(WritePreparedTransactionTest, CleanupSnapshotEqualToMax) {
1473
+ const size_t snapshot_cache_bits = 7; // same as default
1474
+ const size_t commit_cache_bits = 0; // only 1 entry => frequent eviction
1475
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
1476
+ ASSERT_OK(ReOpen());
1477
+ WriteOptions woptions;
1478
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1479
+ // Insert something to increase seq
1480
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1481
+ auto snap = db->GetSnapshot();
1482
+ auto snap_seq = snap->GetSequenceNumber();
1483
+ // Another insert should trigger eviction + load snapshot from db
1484
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1485
+ // This is the scenario that we check agaisnt
1486
+ ASSERT_EQ(snap_seq, wp_db->max_evicted_seq_);
1487
+ // old_commit_map_ now has some data that needs gc
1488
+ ASSERT_EQ(1, wp_db->snapshots_total_);
1489
+ ASSERT_EQ(1, wp_db->old_commit_map_.size());
1490
+
1491
+ db->ReleaseSnapshot(snap);
1492
+
1493
+ // Another insert should trigger eviction + load snapshot from db
1494
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1495
+
1496
+ // the snapshot and related metadata must be properly garbage collected
1497
+ ASSERT_EQ(0, wp_db->snapshots_total_);
1498
+ ASSERT_TRUE(wp_db->snapshots_all_.empty());
1499
+ ASSERT_EQ(0, wp_db->old_commit_map_.size());
1500
+ }
1501
+
1502
+ TEST_P(WritePreparedTransactionTest, AdvanceSeqByOne) {
1503
+ auto snap = db->GetSnapshot();
1504
+ auto seq1 = snap->GetSequenceNumber();
1505
+ db->ReleaseSnapshot(snap);
1506
+
1507
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1508
+ wp_db->AdvanceSeqByOne();
1509
+
1510
+ snap = db->GetSnapshot();
1511
+ auto seq2 = snap->GetSequenceNumber();
1512
+ db->ReleaseSnapshot(snap);
1513
+
1514
+ ASSERT_LT(seq1, seq2);
1515
+ }
1516
+
1517
+ // Test that the txn Initilize calls the overridden functions
1518
+ TEST_P(WritePreparedTransactionTest, TxnInitialize) {
1519
+ TransactionOptions txn_options;
1520
+ WriteOptions write_options;
1521
+ ASSERT_OK(db->Put(write_options, "key", "value"));
1522
+ Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
1523
+ ASSERT_OK(txn0->SetName("xid"));
1524
+ ASSERT_OK(txn0->Put(Slice("key"), Slice("value1")));
1525
+ ASSERT_OK(txn0->Prepare());
1526
+
1527
+ // SetSnapshot is overridden to update min_uncommitted_
1528
+ txn_options.set_snapshot = true;
1529
+ Transaction* txn1 = db->BeginTransaction(write_options, txn_options);
1530
+ auto snap = txn1->GetSnapshot();
1531
+ auto snap_impl = reinterpret_cast<const SnapshotImpl*>(snap);
1532
+ // If ::Initialize calls the overriden SetSnapshot, min_uncommitted_ must be
1533
+ // udpated
1534
+ ASSERT_GT(snap_impl->min_uncommitted_, kMinUnCommittedSeq);
1535
+
1536
+ ASSERT_OK(txn0->Rollback());
1537
+ ASSERT_OK(txn1->Rollback());
1538
+ delete txn0;
1539
+ delete txn1;
1540
+ }
1541
+
1542
+ // This tests that transactions with duplicate keys perform correctly after max
1543
+ // is advancing their prepared sequence numbers. This will not be the case if
1544
+ // for example the txn does not add the prepared seq for the second sub-batch to
1545
+ // the PreparedHeap structure.
1546
+ TEST_P(WritePreparedTransactionTest, AdvanceMaxEvictedSeqWithDuplicates) {
1547
+ const size_t snapshot_cache_bits = 7; // same as default
1548
+ const size_t commit_cache_bits = 1; // disable commit cache
1549
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
1550
+ ASSERT_OK(ReOpen());
1551
+
1552
+ ReadOptions ropt;
1553
+ PinnableSlice pinnable_val;
1554
+ WriteOptions write_options;
1555
+ TransactionOptions txn_options;
1556
+ Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
1557
+ ASSERT_OK(txn0->SetName("xid"));
1558
+ ASSERT_OK(txn0->Put(Slice("key"), Slice("value1")));
1559
+ ASSERT_OK(txn0->Put(Slice("key"), Slice("value2")));
1560
+ ASSERT_OK(txn0->Prepare());
1561
+
1562
+ ASSERT_OK(db->Put(write_options, "key2", "value"));
1563
+ // Will cause max advance due to disabled commit cache
1564
+ ASSERT_OK(db->Put(write_options, "key3", "value"));
1565
+
1566
+ auto s = db->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val);
1567
+ ASSERT_TRUE(s.IsNotFound());
1568
+ delete txn0;
1569
+
1570
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1571
+ ASSERT_OK(wp_db->db_impl_->FlushWAL(true));
1572
+ wp_db->TEST_Crash();
1573
+ ASSERT_OK(ReOpenNoDelete());
1574
+ ASSERT_NE(db, nullptr);
1575
+ s = db->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val);
1576
+ ASSERT_TRUE(s.IsNotFound());
1577
+
1578
+ txn0 = db->GetTransactionByName("xid");
1579
+ ASSERT_OK(txn0->Rollback());
1580
+ delete txn0;
1581
+ }
1582
+
1583
+ #ifndef ROCKSDB_VALGRIND_RUN
1584
+ // Stress SmallestUnCommittedSeq, which reads from both prepared_txns_ and
1585
+ // delayed_prepared_, when is run concurrently with advancing max_evicted_seq,
1586
+ // which moves prepared txns from prepared_txns_ to delayed_prepared_.
1587
+ TEST_P(WritePreparedTransactionTest, SmallestUnCommittedSeq) {
1588
+ const size_t snapshot_cache_bits = 7; // same as default
1589
+ const size_t commit_cache_bits = 1; // disable commit cache
1590
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
1591
+ ASSERT_OK(ReOpen());
1592
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1593
+ ReadOptions ropt;
1594
+ PinnableSlice pinnable_val;
1595
+ WriteOptions write_options;
1596
+ TransactionOptions txn_options;
1597
+ std::vector<Transaction*> txns, committed_txns;
1598
+
1599
+ const int cnt = 100;
1600
+ for (int i = 0; i < cnt; i++) {
1601
+ Transaction* txn = db->BeginTransaction(write_options, txn_options);
1602
+ ASSERT_OK(txn->SetName("xid" + ToString(i)));
1603
+ auto key = "key1" + ToString(i);
1604
+ auto value = "value1" + ToString(i);
1605
+ ASSERT_OK(txn->Put(Slice(key), Slice(value)));
1606
+ ASSERT_OK(txn->Prepare());
1607
+ txns.push_back(txn);
1608
+ }
1609
+
1610
+ port::Mutex mutex;
1611
+ Random rnd(1103);
1612
+ ROCKSDB_NAMESPACE::port::Thread commit_thread([&]() {
1613
+ for (int i = 0; i < cnt; i++) {
1614
+ uint32_t index = rnd.Uniform(cnt - i);
1615
+ Transaction* txn;
1616
+ {
1617
+ MutexLock l(&mutex);
1618
+ txn = txns[index];
1619
+ txns.erase(txns.begin() + index);
1620
+ }
1621
+ // Since commit cache is practically disabled, commit results in immediate
1622
+ // advance in max_evicted_seq_ and subsequently moving some prepared txns
1623
+ // to delayed_prepared_.
1624
+ ASSERT_OK(txn->Commit());
1625
+ committed_txns.push_back(txn);
1626
+ }
1627
+ });
1628
+ ROCKSDB_NAMESPACE::port::Thread read_thread([&]() {
1629
+ while (1) {
1630
+ MutexLock l(&mutex);
1631
+ if (txns.empty()) {
1632
+ break;
1633
+ }
1634
+ auto min_uncommitted = wp_db->SmallestUnCommittedSeq();
1635
+ ASSERT_LE(min_uncommitted, (*txns.begin())->GetId());
1636
+ }
1637
+ });
1638
+
1639
+ commit_thread.join();
1640
+ read_thread.join();
1641
+ for (auto txn : committed_txns) {
1642
+ delete txn;
1643
+ }
1644
+ }
1645
+ #endif // ROCKSDB_VALGRIND_RUN
1646
+
1647
+ TEST_P(SeqAdvanceConcurrentTest, SeqAdvanceConcurrent) {
1648
+ // Given the sequential run of txns, with this timeout we should never see a
1649
+ // deadlock nor a timeout unless we have a key conflict, which should be
1650
+ // almost infeasible.
1651
+ txn_db_options.transaction_lock_timeout = 1000;
1652
+ txn_db_options.default_lock_timeout = 1000;
1653
+ ASSERT_OK(ReOpen());
1654
+ FlushOptions fopt;
1655
+
1656
+ // Number of different txn types we use in this test
1657
+ const size_t type_cnt = 5;
1658
+ // The size of the first write group
1659
+ // TODO(myabandeh): This should be increase for pre-release tests
1660
+ const size_t first_group_size = 2;
1661
+ // Total number of txns we run in each test
1662
+ // TODO(myabandeh): This should be increase for pre-release tests
1663
+ const size_t txn_cnt = first_group_size + 1;
1664
+
1665
+ size_t base[txn_cnt + 1] = {
1666
+ 1,
1667
+ };
1668
+ for (size_t bi = 1; bi <= txn_cnt; bi++) {
1669
+ base[bi] = base[bi - 1] * type_cnt;
1670
+ }
1671
+ const size_t max_n = static_cast<size_t>(std::pow(type_cnt, txn_cnt));
1672
+ printf("Number of cases being tested is %" ROCKSDB_PRIszt "\n", max_n);
1673
+ for (size_t n = 0; n < max_n; n++) {
1674
+ if (n > 0) {
1675
+ ASSERT_OK(ReOpen());
1676
+ }
1677
+
1678
+ if (n % split_cnt_ != split_id_) continue;
1679
+ if (n % 1000 == 0) {
1680
+ printf("Tested %" ROCKSDB_PRIszt " cases so far\n", n);
1681
+ }
1682
+ DBImpl* db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
1683
+ auto seq = db_impl->TEST_GetLastVisibleSequence();
1684
+ with_empty_commits = 0;
1685
+ exp_seq = seq;
1686
+ // This is increased before writing the batch for commit
1687
+ commit_writes = 0;
1688
+ // This is increased before txn starts linking if it expects to do a commit
1689
+ // eventually
1690
+ expected_commits = 0;
1691
+ std::vector<port::Thread> threads;
1692
+
1693
+ linked = 0;
1694
+ std::atomic<bool> batch_formed(false);
1695
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1696
+ "WriteThread::EnterAsBatchGroupLeader:End",
1697
+ [&](void* /*arg*/) { batch_formed = true; });
1698
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1699
+ "WriteThread::JoinBatchGroup:Wait", [&](void* /*arg*/) {
1700
+ linked++;
1701
+ if (linked == 1) {
1702
+ // Wait until the others are linked too.
1703
+ while (linked < first_group_size) {
1704
+ }
1705
+ } else if (linked == 1 + first_group_size) {
1706
+ // Make the 2nd batch of the rest of writes plus any followup
1707
+ // commits from the first batch
1708
+ while (linked < txn_cnt + commit_writes) {
1709
+ }
1710
+ }
1711
+ // Then we will have one or more batches consisting of follow-up
1712
+ // commits from the 2nd batch. There is a bit of non-determinism here
1713
+ // but it should be tolerable.
1714
+ });
1715
+
1716
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1717
+ for (size_t bi = 0; bi < txn_cnt; bi++) {
1718
+ // get the bi-th digit in number system based on type_cnt
1719
+ size_t d = (n % base[bi + 1]) / base[bi];
1720
+ switch (d) {
1721
+ case 0:
1722
+ threads.emplace_back(txn_t0, bi);
1723
+ break;
1724
+ case 1:
1725
+ threads.emplace_back(txn_t1, bi);
1726
+ break;
1727
+ case 2:
1728
+ threads.emplace_back(txn_t2, bi);
1729
+ break;
1730
+ case 3:
1731
+ threads.emplace_back(txn_t3, bi);
1732
+ break;
1733
+ case 4:
1734
+ threads.emplace_back(txn_t3, bi);
1735
+ break;
1736
+ default:
1737
+ FAIL();
1738
+ }
1739
+ // wait to be linked
1740
+ while (linked.load() <= bi) {
1741
+ }
1742
+ // after a queue of size first_group_size
1743
+ if (bi + 1 == first_group_size) {
1744
+ while (!batch_formed) {
1745
+ }
1746
+ // to make it more deterministic, wait until the commits are linked
1747
+ while (linked.load() <= bi + expected_commits) {
1748
+ }
1749
+ }
1750
+ }
1751
+ for (auto& t : threads) {
1752
+ t.join();
1753
+ }
1754
+ if (options.two_write_queues) {
1755
+ // In this case none of the above scheduling tricks to deterministically
1756
+ // form merged batches works because the writes go to separate queues.
1757
+ // This would result in different write groups in each run of the test. We
1758
+ // still keep the test since although non-deterministic and hard to debug,
1759
+ // it is still useful to have.
1760
+ // TODO(myabandeh): Add a deterministic unit test for two_write_queues
1761
+ }
1762
+
1763
+ // Check if memtable inserts advanced seq number as expected
1764
+ seq = db_impl->TEST_GetLastVisibleSequence();
1765
+ ASSERT_EQ(exp_seq, seq);
1766
+
1767
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1768
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
1769
+
1770
+ // Check if recovery preserves the last sequence number
1771
+ ASSERT_OK(db_impl->FlushWAL(true));
1772
+ ASSERT_OK(ReOpenNoDelete());
1773
+ ASSERT_NE(db, nullptr);
1774
+ db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
1775
+ seq = db_impl->TEST_GetLastVisibleSequence();
1776
+ ASSERT_LE(exp_seq, seq + with_empty_commits);
1777
+
1778
+ // Check if flush preserves the last sequence number
1779
+ ASSERT_OK(db_impl->Flush(fopt));
1780
+ seq = db_impl->GetLatestSequenceNumber();
1781
+ ASSERT_LE(exp_seq, seq + with_empty_commits);
1782
+
1783
+ // Check if recovery after flush preserves the last sequence number
1784
+ ASSERT_OK(db_impl->FlushWAL(true));
1785
+ ASSERT_OK(ReOpenNoDelete());
1786
+ ASSERT_NE(db, nullptr);
1787
+ db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
1788
+ seq = db_impl->GetLatestSequenceNumber();
1789
+ ASSERT_LE(exp_seq, seq + with_empty_commits);
1790
+ }
1791
+ }
1792
+
1793
+ // Run a couple of different txns among them some uncommitted. Restart the db at
1794
+ // a couple points to check whether the list of uncommitted txns are recovered
1795
+ // properly.
1796
+ TEST_P(WritePreparedTransactionTest, BasicRecovery) {
1797
+ options.disable_auto_compactions = true;
1798
+ ASSERT_OK(ReOpen());
1799
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1800
+
1801
+ txn_t0(0);
1802
+
1803
+ TransactionOptions txn_options;
1804
+ WriteOptions write_options;
1805
+ size_t index = 1000;
1806
+ Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
1807
+ auto istr0 = std::to_string(index);
1808
+ auto s = txn0->SetName("xid" + istr0);
1809
+ ASSERT_OK(s);
1810
+ s = txn0->Put(Slice("foo0" + istr0), Slice("bar0" + istr0));
1811
+ ASSERT_OK(s);
1812
+ s = txn0->Prepare();
1813
+ ASSERT_OK(s);
1814
+ auto prep_seq_0 = txn0->GetId();
1815
+
1816
+ txn_t1(0);
1817
+
1818
+ index++;
1819
+ Transaction* txn1 = db->BeginTransaction(write_options, txn_options);
1820
+ auto istr1 = std::to_string(index);
1821
+ s = txn1->SetName("xid" + istr1);
1822
+ ASSERT_OK(s);
1823
+ s = txn1->Put(Slice("foo1" + istr1), Slice("bar"));
1824
+ ASSERT_OK(s);
1825
+ s = txn1->Prepare();
1826
+ ASSERT_OK(s);
1827
+ auto prep_seq_1 = txn1->GetId();
1828
+
1829
+ txn_t2(0);
1830
+
1831
+ ReadOptions ropt;
1832
+ PinnableSlice pinnable_val;
1833
+ // Check the value is not committed before restart
1834
+ s = db->Get(ropt, db->DefaultColumnFamily(), "foo0" + istr0, &pinnable_val);
1835
+ ASSERT_TRUE(s.IsNotFound());
1836
+ pinnable_val.Reset();
1837
+
1838
+ delete txn0;
1839
+ delete txn1;
1840
+ ASSERT_OK(wp_db->db_impl_->FlushWAL(true));
1841
+ wp_db->TEST_Crash();
1842
+ ASSERT_OK(ReOpenNoDelete());
1843
+ ASSERT_NE(db, nullptr);
1844
+ wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1845
+ // After recovery, all the uncommitted txns (0 and 1) should be inserted into
1846
+ // delayed_prepared_
1847
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
1848
+ ASSERT_FALSE(wp_db->delayed_prepared_empty_);
1849
+ ASSERT_LE(prep_seq_0, wp_db->max_evicted_seq_);
1850
+ ASSERT_LE(prep_seq_1, wp_db->max_evicted_seq_);
1851
+ {
1852
+ ReadLock rl(&wp_db->prepared_mutex_);
1853
+ ASSERT_EQ(2, wp_db->delayed_prepared_.size());
1854
+ ASSERT_TRUE(wp_db->delayed_prepared_.find(prep_seq_0) !=
1855
+ wp_db->delayed_prepared_.end());
1856
+ ASSERT_TRUE(wp_db->delayed_prepared_.find(prep_seq_1) !=
1857
+ wp_db->delayed_prepared_.end());
1858
+ }
1859
+
1860
+ // Check the value is still not committed after restart
1861
+ s = db->Get(ropt, db->DefaultColumnFamily(), "foo0" + istr0, &pinnable_val);
1862
+ ASSERT_TRUE(s.IsNotFound());
1863
+ pinnable_val.Reset();
1864
+
1865
+ txn_t3(0);
1866
+
1867
+ // Test that a recovered txns will be properly marked committed for the next
1868
+ // recovery
1869
+ txn1 = db->GetTransactionByName("xid" + istr1);
1870
+ ASSERT_NE(txn1, nullptr);
1871
+ ASSERT_OK(txn1->Commit());
1872
+ delete txn1;
1873
+
1874
+ index++;
1875
+ Transaction* txn2 = db->BeginTransaction(write_options, txn_options);
1876
+ auto istr2 = std::to_string(index);
1877
+ s = txn2->SetName("xid" + istr2);
1878
+ ASSERT_OK(s);
1879
+ s = txn2->Put(Slice("foo2" + istr2), Slice("bar"));
1880
+ ASSERT_OK(s);
1881
+ s = txn2->Prepare();
1882
+ ASSERT_OK(s);
1883
+ auto prep_seq_2 = txn2->GetId();
1884
+
1885
+ delete txn2;
1886
+ ASSERT_OK(wp_db->db_impl_->FlushWAL(true));
1887
+ wp_db->TEST_Crash();
1888
+ ASSERT_OK(ReOpenNoDelete());
1889
+ ASSERT_NE(db, nullptr);
1890
+ wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1891
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
1892
+ ASSERT_FALSE(wp_db->delayed_prepared_empty_);
1893
+
1894
+ // 0 and 2 are prepared and 1 is committed
1895
+ {
1896
+ ReadLock rl(&wp_db->prepared_mutex_);
1897
+ ASSERT_EQ(2, wp_db->delayed_prepared_.size());
1898
+ const auto& end = wp_db->delayed_prepared_.end();
1899
+ ASSERT_NE(wp_db->delayed_prepared_.find(prep_seq_0), end);
1900
+ ASSERT_EQ(wp_db->delayed_prepared_.find(prep_seq_1), end);
1901
+ ASSERT_NE(wp_db->delayed_prepared_.find(prep_seq_2), end);
1902
+ }
1903
+ ASSERT_LE(prep_seq_0, wp_db->max_evicted_seq_);
1904
+ ASSERT_LE(prep_seq_2, wp_db->max_evicted_seq_);
1905
+
1906
+ // Commit all the remaining txns
1907
+ txn0 = db->GetTransactionByName("xid" + istr0);
1908
+ ASSERT_NE(txn0, nullptr);
1909
+ ASSERT_OK(txn0->Commit());
1910
+ txn2 = db->GetTransactionByName("xid" + istr2);
1911
+ ASSERT_NE(txn2, nullptr);
1912
+ ASSERT_OK(txn2->Commit());
1913
+
1914
+ // Check the value is committed after commit
1915
+ s = db->Get(ropt, db->DefaultColumnFamily(), "foo0" + istr0, &pinnable_val);
1916
+ ASSERT_TRUE(s.ok());
1917
+ ASSERT_TRUE(pinnable_val == ("bar0" + istr0));
1918
+ pinnable_val.Reset();
1919
+
1920
+ delete txn0;
1921
+ delete txn2;
1922
+ ASSERT_OK(wp_db->db_impl_->FlushWAL(true));
1923
+ ASSERT_OK(ReOpenNoDelete());
1924
+ ASSERT_NE(db, nullptr);
1925
+ wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1926
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
1927
+ ASSERT_TRUE(wp_db->delayed_prepared_empty_);
1928
+
1929
+ // Check the value is still committed after recovery
1930
+ s = db->Get(ropt, db->DefaultColumnFamily(), "foo0" + istr0, &pinnable_val);
1931
+ ASSERT_TRUE(s.ok());
1932
+ ASSERT_TRUE(pinnable_val == ("bar0" + istr0));
1933
+ pinnable_val.Reset();
1934
+ }
1935
+
1936
+ // After recovery the commit map is empty while the max is set. The code would
1937
+ // go through a different path which requires a separate test. Test that the
1938
+ // committed data before the restart is visible to all snapshots.
1939
+ TEST_P(WritePreparedTransactionTest, IsInSnapshotEmptyMap) {
1940
+ for (bool end_with_prepare : {false, true}) {
1941
+ ASSERT_OK(ReOpen());
1942
+ WriteOptions woptions;
1943
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1944
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1945
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1946
+ SequenceNumber prepare_seq = kMaxSequenceNumber;
1947
+ if (end_with_prepare) {
1948
+ TransactionOptions txn_options;
1949
+ Transaction* txn = db->BeginTransaction(woptions, txn_options);
1950
+ ASSERT_OK(txn->SetName("xid0"));
1951
+ ASSERT_OK(txn->Prepare());
1952
+ prepare_seq = txn->GetId();
1953
+ delete txn;
1954
+ }
1955
+ dynamic_cast<WritePreparedTxnDB*>(db)->TEST_Crash();
1956
+ auto db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
1957
+ ASSERT_OK(db_impl->FlushWAL(true));
1958
+ ASSERT_OK(ReOpenNoDelete());
1959
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
1960
+ ASSERT_NE(wp_db, nullptr);
1961
+ ASSERT_GT(wp_db->max_evicted_seq_, 0); // max after recovery
1962
+ // Take a snapshot right after recovery
1963
+ const Snapshot* snap = db->GetSnapshot();
1964
+ auto snap_seq = snap->GetSequenceNumber();
1965
+ ASSERT_GT(snap_seq, 0);
1966
+
1967
+ for (SequenceNumber seq = 0;
1968
+ seq <= wp_db->max_evicted_seq_ && seq != prepare_seq; seq++) {
1969
+ ASSERT_TRUE(wp_db->IsInSnapshot(seq, snap_seq));
1970
+ }
1971
+ if (end_with_prepare) {
1972
+ ASSERT_FALSE(wp_db->IsInSnapshot(prepare_seq, snap_seq));
1973
+ }
1974
+ // trivial check
1975
+ ASSERT_FALSE(wp_db->IsInSnapshot(snap_seq + 1, snap_seq));
1976
+
1977
+ db->ReleaseSnapshot(snap);
1978
+
1979
+ ASSERT_OK(db->Put(woptions, "key", "value"));
1980
+ // Take a snapshot after some writes
1981
+ snap = db->GetSnapshot();
1982
+ snap_seq = snap->GetSequenceNumber();
1983
+ for (SequenceNumber seq = 0;
1984
+ seq <= wp_db->max_evicted_seq_ && seq != prepare_seq; seq++) {
1985
+ ASSERT_TRUE(wp_db->IsInSnapshot(seq, snap_seq));
1986
+ }
1987
+ if (end_with_prepare) {
1988
+ ASSERT_FALSE(wp_db->IsInSnapshot(prepare_seq, snap_seq));
1989
+ }
1990
+ // trivial check
1991
+ ASSERT_FALSE(wp_db->IsInSnapshot(snap_seq + 1, snap_seq));
1992
+
1993
+ db->ReleaseSnapshot(snap);
1994
+ }
1995
+ }
1996
+
1997
+ // Shows the contract of IsInSnapshot when called on invalid/released snapshots
1998
+ TEST_P(WritePreparedTransactionTest, IsInSnapshotReleased) {
1999
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
2000
+ WriteOptions woptions;
2001
+ ASSERT_OK(db->Put(woptions, "key", "value"));
2002
+ // snap seq = 1
2003
+ const Snapshot* snap1 = db->GetSnapshot();
2004
+ ASSERT_OK(db->Put(woptions, "key", "value"));
2005
+ ASSERT_OK(db->Put(woptions, "key", "value"));
2006
+ // snap seq = 3
2007
+ const Snapshot* snap2 = db->GetSnapshot();
2008
+ const SequenceNumber seq = 1;
2009
+ // Evict seq out of commit cache
2010
+ size_t overwrite_seq = wp_db->COMMIT_CACHE_SIZE + seq;
2011
+ wp_db->AddCommitted(overwrite_seq, overwrite_seq);
2012
+ SequenceNumber snap_seq;
2013
+ uint64_t min_uncommitted = kMinUnCommittedSeq;
2014
+ bool released;
2015
+
2016
+ released = false;
2017
+ snap_seq = snap1->GetSequenceNumber();
2018
+ ASSERT_LE(seq, snap_seq);
2019
+ // Valid snapshot lower than max
2020
+ ASSERT_LE(snap_seq, wp_db->max_evicted_seq_);
2021
+ ASSERT_TRUE(wp_db->IsInSnapshot(seq, snap_seq, min_uncommitted, &released));
2022
+ ASSERT_FALSE(released);
2023
+
2024
+ released = false;
2025
+ snap_seq = snap1->GetSequenceNumber();
2026
+ // Invaid snapshot lower than max
2027
+ ASSERT_LE(snap_seq + 1, wp_db->max_evicted_seq_);
2028
+ ASSERT_TRUE(
2029
+ wp_db->IsInSnapshot(seq, snap_seq + 1, min_uncommitted, &released));
2030
+ ASSERT_TRUE(released);
2031
+
2032
+ db->ReleaseSnapshot(snap1);
2033
+
2034
+ released = false;
2035
+ // Released snapshot lower than max
2036
+ ASSERT_TRUE(wp_db->IsInSnapshot(seq, snap_seq, min_uncommitted, &released));
2037
+ // The release does not take affect until the next max advance
2038
+ ASSERT_FALSE(released);
2039
+
2040
+ released = false;
2041
+ // Invaid snapshot lower than max
2042
+ ASSERT_TRUE(
2043
+ wp_db->IsInSnapshot(seq, snap_seq + 1, min_uncommitted, &released));
2044
+ ASSERT_TRUE(released);
2045
+
2046
+ // This make the snapshot release to reflect in txn db structures
2047
+ wp_db->AdvanceMaxEvictedSeq(wp_db->max_evicted_seq_,
2048
+ wp_db->max_evicted_seq_ + 1);
2049
+
2050
+ released = false;
2051
+ // Released snapshot lower than max
2052
+ ASSERT_TRUE(wp_db->IsInSnapshot(seq, snap_seq, min_uncommitted, &released));
2053
+ ASSERT_TRUE(released);
2054
+
2055
+ released = false;
2056
+ // Invaid snapshot lower than max
2057
+ ASSERT_TRUE(
2058
+ wp_db->IsInSnapshot(seq, snap_seq + 1, min_uncommitted, &released));
2059
+ ASSERT_TRUE(released);
2060
+
2061
+ snap_seq = snap2->GetSequenceNumber();
2062
+
2063
+ released = false;
2064
+ // Unreleased snapshot lower than max
2065
+ ASSERT_TRUE(wp_db->IsInSnapshot(seq, snap_seq, min_uncommitted, &released));
2066
+ ASSERT_FALSE(released);
2067
+
2068
+ db->ReleaseSnapshot(snap2);
2069
+ }
2070
+
2071
+ // Test WritePreparedTxnDB's IsInSnapshot against different ordering of
2072
+ // snapshot, max_committed_seq_, prepared, and commit entries.
2073
+ TEST_P(WritePreparedTransactionTest, IsInSnapshot) {
2074
+ WriteOptions wo;
2075
+ // Use small commit cache to trigger lots of eviction and fast advance of
2076
+ // max_evicted_seq_
2077
+ const size_t commit_cache_bits = 3;
2078
+ // Same for snapshot cache size
2079
+ const size_t snapshot_cache_bits = 2;
2080
+
2081
+ // Take some preliminary snapshots first. This is to stress the data structure
2082
+ // that holds the old snapshots as it will be designed to be efficient when
2083
+ // only a few snapshots are below the max_evicted_seq_.
2084
+ for (int max_snapshots = 1; max_snapshots < 20; max_snapshots++) {
2085
+ // Leave some gap between the preliminary snapshots and the final snapshot
2086
+ // that we check. This should test for also different overlapping scenarios
2087
+ // between the last snapshot and the commits.
2088
+ for (int max_gap = 1; max_gap < 10; max_gap++) {
2089
+ // Since we do not actually write to db, we mock the seq as it would be
2090
+ // increased by the db. The only exception is that we need db seq to
2091
+ // advance for our snapshots. for which we apply a dummy put each time we
2092
+ // increase our mock of seq.
2093
+ uint64_t seq = 0;
2094
+ // At each step we prepare a txn and then we commit it in the next txn.
2095
+ // This emulates the consecutive transactions that write to the same key
2096
+ uint64_t cur_txn = 0;
2097
+ // Number of snapshots taken so far
2098
+ int num_snapshots = 0;
2099
+ // Number of gaps applied so far
2100
+ int gap_cnt = 0;
2101
+ // The final snapshot that we will inspect
2102
+ uint64_t snapshot = 0;
2103
+ bool found_committed = false;
2104
+ // To stress the data structure that maintain prepared txns, at each cycle
2105
+ // we add a new prepare txn. These do not mean to be committed for
2106
+ // snapshot inspection.
2107
+ std::set<uint64_t> prepared;
2108
+ // We keep the list of txns committed before we take the last snapshot.
2109
+ // These should be the only seq numbers that will be found in the snapshot
2110
+ std::set<uint64_t> committed_before;
2111
+ // The set of commit seq numbers to be excluded from IsInSnapshot queries
2112
+ std::set<uint64_t> commit_seqs;
2113
+ DBImpl* mock_db = new DBImpl(options, dbname);
2114
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
2115
+ std::unique_ptr<WritePreparedTxnDBMock> wp_db(
2116
+ new WritePreparedTxnDBMock(mock_db, txn_db_options));
2117
+ // We continue until max advances a bit beyond the snapshot.
2118
+ while (!snapshot || wp_db->max_evicted_seq_ < snapshot + 100) {
2119
+ // do prepare for a transaction
2120
+ seq++;
2121
+ wp_db->AddPrepared(seq);
2122
+ prepared.insert(seq);
2123
+
2124
+ // If cur_txn is not started, do prepare for it.
2125
+ if (!cur_txn) {
2126
+ seq++;
2127
+ cur_txn = seq;
2128
+ wp_db->AddPrepared(cur_txn);
2129
+ } else { // else commit it
2130
+ seq++;
2131
+ wp_db->AddCommitted(cur_txn, seq);
2132
+ wp_db->RemovePrepared(cur_txn);
2133
+ commit_seqs.insert(seq);
2134
+ if (!snapshot) {
2135
+ committed_before.insert(cur_txn);
2136
+ }
2137
+ cur_txn = 0;
2138
+ }
2139
+
2140
+ if (num_snapshots < max_snapshots - 1) {
2141
+ // Take preliminary snapshots
2142
+ wp_db->TakeSnapshot(seq);
2143
+ num_snapshots++;
2144
+ } else if (gap_cnt < max_gap) {
2145
+ // Wait for some gap before taking the final snapshot
2146
+ gap_cnt++;
2147
+ } else if (!snapshot) {
2148
+ // Take the final snapshot if it is not already taken
2149
+ snapshot = seq;
2150
+ wp_db->TakeSnapshot(snapshot);
2151
+ num_snapshots++;
2152
+ }
2153
+
2154
+ // If the snapshot is taken, verify seq numbers visible to it. We redo
2155
+ // it at each cycle to test that the system is still sound when
2156
+ // max_evicted_seq_ advances.
2157
+ if (snapshot) {
2158
+ for (uint64_t s = 1;
2159
+ s <= seq && commit_seqs.find(s) == commit_seqs.end(); s++) {
2160
+ bool was_committed =
2161
+ (committed_before.find(s) != committed_before.end());
2162
+ bool is_in_snapshot = wp_db->IsInSnapshot(s, snapshot);
2163
+ if (was_committed != is_in_snapshot) {
2164
+ printf("max_snapshots %d max_gap %d seq %" PRIu64 " max %" PRIu64
2165
+ " snapshot %" PRIu64
2166
+ " gap_cnt %d num_snapshots %d s %" PRIu64 "\n",
2167
+ max_snapshots, max_gap, seq,
2168
+ wp_db->max_evicted_seq_.load(), snapshot, gap_cnt,
2169
+ num_snapshots, s);
2170
+ }
2171
+ ASSERT_EQ(was_committed, is_in_snapshot);
2172
+ found_committed = found_committed || is_in_snapshot;
2173
+ }
2174
+ }
2175
+ }
2176
+ // Safety check to make sure the test actually ran
2177
+ ASSERT_TRUE(found_committed);
2178
+ // As an extra check, check if prepared set will be properly empty after
2179
+ // they are committed.
2180
+ if (cur_txn) {
2181
+ wp_db->AddCommitted(cur_txn, seq);
2182
+ wp_db->RemovePrepared(cur_txn);
2183
+ }
2184
+ for (auto p : prepared) {
2185
+ wp_db->AddCommitted(p, seq);
2186
+ wp_db->RemovePrepared(p);
2187
+ }
2188
+ ASSERT_TRUE(wp_db->delayed_prepared_.empty());
2189
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
2190
+ }
2191
+ }
2192
+ }
2193
+
2194
+ void ASSERT_SAME(ReadOptions roptions, TransactionDB* db, Status exp_s,
2195
+ PinnableSlice& exp_v, Slice key) {
2196
+ Status s;
2197
+ PinnableSlice v;
2198
+ s = db->Get(roptions, db->DefaultColumnFamily(), key, &v);
2199
+ ASSERT_EQ(exp_s, s);
2200
+ ASSERT_TRUE(s.ok() || s.IsNotFound());
2201
+ if (s.ok()) {
2202
+ ASSERT_TRUE(exp_v == v);
2203
+ }
2204
+
2205
+ // Try with MultiGet API too
2206
+ std::vector<std::string> values;
2207
+ auto s_vec =
2208
+ db->MultiGet(roptions, {db->DefaultColumnFamily()}, {key}, &values);
2209
+ ASSERT_EQ(1, values.size());
2210
+ ASSERT_EQ(1, s_vec.size());
2211
+ s = s_vec[0];
2212
+ ASSERT_EQ(exp_s, s);
2213
+ ASSERT_TRUE(s.ok() || s.IsNotFound());
2214
+ if (s.ok()) {
2215
+ ASSERT_TRUE(exp_v == values[0]);
2216
+ }
2217
+ }
2218
+
2219
+ void ASSERT_SAME(TransactionDB* db, Status exp_s, PinnableSlice& exp_v,
2220
+ Slice key) {
2221
+ ASSERT_SAME(ReadOptions(), db, exp_s, exp_v, key);
2222
+ }
2223
+
2224
+ TEST_P(WritePreparedTransactionTest, Rollback) {
2225
+ ReadOptions roptions;
2226
+ WriteOptions woptions;
2227
+ TransactionOptions txn_options;
2228
+ const size_t num_keys = 4;
2229
+ const size_t num_values = 5;
2230
+ for (size_t ikey = 1; ikey <= num_keys; ikey++) {
2231
+ for (size_t ivalue = 0; ivalue < num_values; ivalue++) {
2232
+ for (bool crash : {false, true}) {
2233
+ ASSERT_OK(ReOpen());
2234
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
2235
+ std::string key_str = "key" + ToString(ikey);
2236
+ switch (ivalue) {
2237
+ case 0:
2238
+ break;
2239
+ case 1:
2240
+ ASSERT_OK(db->Put(woptions, key_str, "initvalue1"));
2241
+ break;
2242
+ case 2:
2243
+ ASSERT_OK(db->Merge(woptions, key_str, "initvalue2"));
2244
+ break;
2245
+ case 3:
2246
+ ASSERT_OK(db->Delete(woptions, key_str));
2247
+ break;
2248
+ case 4:
2249
+ ASSERT_OK(db->SingleDelete(woptions, key_str));
2250
+ break;
2251
+ default:
2252
+ FAIL();
2253
+ }
2254
+
2255
+ PinnableSlice v1;
2256
+ auto s1 =
2257
+ db->Get(roptions, db->DefaultColumnFamily(), Slice("key1"), &v1);
2258
+ PinnableSlice v2;
2259
+ auto s2 =
2260
+ db->Get(roptions, db->DefaultColumnFamily(), Slice("key2"), &v2);
2261
+ PinnableSlice v3;
2262
+ auto s3 =
2263
+ db->Get(roptions, db->DefaultColumnFamily(), Slice("key3"), &v3);
2264
+ PinnableSlice v4;
2265
+ auto s4 =
2266
+ db->Get(roptions, db->DefaultColumnFamily(), Slice("key4"), &v4);
2267
+ Transaction* txn = db->BeginTransaction(woptions, txn_options);
2268
+ auto s = txn->SetName("xid0");
2269
+ ASSERT_OK(s);
2270
+ s = txn->Put(Slice("key1"), Slice("value1"));
2271
+ ASSERT_OK(s);
2272
+ s = txn->Merge(Slice("key2"), Slice("value2"));
2273
+ ASSERT_OK(s);
2274
+ s = txn->Delete(Slice("key3"));
2275
+ ASSERT_OK(s);
2276
+ s = txn->SingleDelete(Slice("key4"));
2277
+ ASSERT_OK(s);
2278
+ s = txn->Prepare();
2279
+ ASSERT_OK(s);
2280
+
2281
+ {
2282
+ ReadLock rl(&wp_db->prepared_mutex_);
2283
+ ASSERT_FALSE(wp_db->prepared_txns_.empty());
2284
+ ASSERT_EQ(txn->GetId(), wp_db->prepared_txns_.top());
2285
+ }
2286
+
2287
+ ASSERT_SAME(db, s1, v1, "key1");
2288
+ ASSERT_SAME(db, s2, v2, "key2");
2289
+ ASSERT_SAME(db, s3, v3, "key3");
2290
+ ASSERT_SAME(db, s4, v4, "key4");
2291
+
2292
+ if (crash) {
2293
+ delete txn;
2294
+ auto db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
2295
+ ASSERT_OK(db_impl->FlushWAL(true));
2296
+ dynamic_cast<WritePreparedTxnDB*>(db)->TEST_Crash();
2297
+ ASSERT_OK(ReOpenNoDelete());
2298
+ ASSERT_NE(db, nullptr);
2299
+ wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
2300
+ txn = db->GetTransactionByName("xid0");
2301
+ ASSERT_FALSE(wp_db->delayed_prepared_empty_);
2302
+ ReadLock rl(&wp_db->prepared_mutex_);
2303
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
2304
+ ASSERT_FALSE(wp_db->delayed_prepared_.empty());
2305
+ ASSERT_TRUE(wp_db->delayed_prepared_.find(txn->GetId()) !=
2306
+ wp_db->delayed_prepared_.end());
2307
+ }
2308
+
2309
+ ASSERT_SAME(db, s1, v1, "key1");
2310
+ ASSERT_SAME(db, s2, v2, "key2");
2311
+ ASSERT_SAME(db, s3, v3, "key3");
2312
+ ASSERT_SAME(db, s4, v4, "key4");
2313
+
2314
+ s = txn->Rollback();
2315
+ ASSERT_OK(s);
2316
+
2317
+ {
2318
+ ASSERT_TRUE(wp_db->delayed_prepared_empty_);
2319
+ ReadLock rl(&wp_db->prepared_mutex_);
2320
+ ASSERT_TRUE(wp_db->prepared_txns_.empty());
2321
+ ASSERT_TRUE(wp_db->delayed_prepared_.empty());
2322
+ }
2323
+
2324
+ ASSERT_SAME(db, s1, v1, "key1");
2325
+ ASSERT_SAME(db, s2, v2, "key2");
2326
+ ASSERT_SAME(db, s3, v3, "key3");
2327
+ ASSERT_SAME(db, s4, v4, "key4");
2328
+ delete txn;
2329
+ }
2330
+ }
2331
+ }
2332
+ }
2333
+
2334
+ TEST_P(WritePreparedTransactionTest, DisableGCDuringRecovery) {
2335
+ // Use large buffer to avoid memtable flush after 1024 insertions
2336
+ options.write_buffer_size = 1024 * 1024;
2337
+ ASSERT_OK(ReOpen());
2338
+ std::vector<KeyVersion> versions;
2339
+ uint64_t seq = 0;
2340
+ for (uint64_t i = 1; i <= 1024; i++) {
2341
+ std::string v = "bar" + ToString(i);
2342
+ ASSERT_OK(db->Put(WriteOptions(), "foo", v));
2343
+ VerifyKeys({{"foo", v}});
2344
+ seq++; // one for the key/value
2345
+ KeyVersion kv = {"foo", v, seq, kTypeValue};
2346
+ if (options.two_write_queues) {
2347
+ seq++; // one for the commit
2348
+ }
2349
+ versions.emplace_back(kv);
2350
+ }
2351
+ std::reverse(std::begin(versions), std::end(versions));
2352
+ VerifyInternalKeys(versions);
2353
+ DBImpl* db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
2354
+ ASSERT_OK(db_impl->FlushWAL(true));
2355
+ // Use small buffer to ensure memtable flush during recovery
2356
+ options.write_buffer_size = 1024;
2357
+ ASSERT_OK(ReOpenNoDelete());
2358
+ VerifyInternalKeys(versions);
2359
+ }
2360
+
2361
+ TEST_P(WritePreparedTransactionTest, SequenceNumberZero) {
2362
+ ASSERT_OK(db->Put(WriteOptions(), "foo", "bar"));
2363
+ VerifyKeys({{"foo", "bar"}});
2364
+ const Snapshot* snapshot = db->GetSnapshot();
2365
+ ASSERT_OK(db->Flush(FlushOptions()));
2366
+ // Dummy keys to avoid compaction trivially move files and get around actual
2367
+ // compaction logic.
2368
+ ASSERT_OK(db->Put(WriteOptions(), "a", "dummy"));
2369
+ ASSERT_OK(db->Put(WriteOptions(), "z", "dummy"));
2370
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2371
+ // Compaction will output keys with sequence number 0, if it is visible to
2372
+ // earliest snapshot. Make sure IsInSnapshot() report sequence number 0 is
2373
+ // visible to any snapshot.
2374
+ VerifyKeys({{"foo", "bar"}});
2375
+ VerifyKeys({{"foo", "bar"}}, snapshot);
2376
+ VerifyInternalKeys({{"foo", "bar", 0, kTypeValue}});
2377
+ db->ReleaseSnapshot(snapshot);
2378
+ }
2379
+
2380
+ // Compaction should not remove a key if it is not committed, and should
2381
+ // proceed with older versions of the key as-if the new version doesn't exist.
2382
+ TEST_P(WritePreparedTransactionTest, CompactionShouldKeepUncommittedKeys) {
2383
+ options.disable_auto_compactions = true;
2384
+ ASSERT_OK(ReOpen());
2385
+ DBImpl* db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
2386
+ // Snapshots to avoid keys get evicted.
2387
+ std::vector<const Snapshot*> snapshots;
2388
+ // Keep track of expected sequence number.
2389
+ SequenceNumber expected_seq = 0;
2390
+
2391
+ auto add_key = [&](std::function<Status()> func) {
2392
+ ASSERT_OK(func());
2393
+ expected_seq++;
2394
+ if (options.two_write_queues) {
2395
+ expected_seq++; // 1 for commit
2396
+ }
2397
+ ASSERT_EQ(expected_seq, db_impl->TEST_GetLastVisibleSequence());
2398
+ snapshots.push_back(db->GetSnapshot());
2399
+ };
2400
+
2401
+ // Each key here represent a standalone test case.
2402
+ add_key([&]() { return db->Put(WriteOptions(), "key1", "value1_1"); });
2403
+ add_key([&]() { return db->Put(WriteOptions(), "key2", "value2_1"); });
2404
+ add_key([&]() { return db->Put(WriteOptions(), "key3", "value3_1"); });
2405
+ add_key([&]() { return db->Put(WriteOptions(), "key4", "value4_1"); });
2406
+ add_key([&]() { return db->Merge(WriteOptions(), "key5", "value5_1"); });
2407
+ add_key([&]() { return db->Merge(WriteOptions(), "key5", "value5_2"); });
2408
+ add_key([&]() { return db->Put(WriteOptions(), "key6", "value6_1"); });
2409
+ add_key([&]() { return db->Put(WriteOptions(), "key7", "value7_1"); });
2410
+ ASSERT_OK(db->Flush(FlushOptions()));
2411
+ add_key([&]() { return db->Delete(WriteOptions(), "key6"); });
2412
+ add_key([&]() { return db->SingleDelete(WriteOptions(), "key7"); });
2413
+
2414
+ auto* transaction = db->BeginTransaction(WriteOptions());
2415
+ ASSERT_OK(transaction->SetName("txn"));
2416
+ ASSERT_OK(transaction->Put("key1", "value1_2"));
2417
+ ASSERT_OK(transaction->Delete("key2"));
2418
+ ASSERT_OK(transaction->SingleDelete("key3"));
2419
+ ASSERT_OK(transaction->Merge("key4", "value4_2"));
2420
+ ASSERT_OK(transaction->Merge("key5", "value5_3"));
2421
+ ASSERT_OK(transaction->Put("key6", "value6_2"));
2422
+ ASSERT_OK(transaction->Put("key7", "value7_2"));
2423
+ // Prepare but not commit.
2424
+ ASSERT_OK(transaction->Prepare());
2425
+ ASSERT_EQ(++expected_seq, db->GetLatestSequenceNumber());
2426
+ ASSERT_OK(db->Flush(FlushOptions()));
2427
+ for (auto* s : snapshots) {
2428
+ db->ReleaseSnapshot(s);
2429
+ }
2430
+ // Dummy keys to avoid compaction trivially move files and get around actual
2431
+ // compaction logic.
2432
+ ASSERT_OK(db->Put(WriteOptions(), "a", "dummy"));
2433
+ ASSERT_OK(db->Put(WriteOptions(), "z", "dummy"));
2434
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2435
+ VerifyKeys({
2436
+ {"key1", "value1_1"},
2437
+ {"key2", "value2_1"},
2438
+ {"key3", "value3_1"},
2439
+ {"key4", "value4_1"},
2440
+ {"key5", "value5_1,value5_2"},
2441
+ {"key6", "NOT_FOUND"},
2442
+ {"key7", "NOT_FOUND"},
2443
+ });
2444
+ VerifyInternalKeys({
2445
+ {"key1", "value1_2", expected_seq, kTypeValue},
2446
+ {"key1", "value1_1", 0, kTypeValue},
2447
+ {"key2", "", expected_seq, kTypeDeletion},
2448
+ {"key2", "value2_1", 0, kTypeValue},
2449
+ {"key3", "", expected_seq, kTypeSingleDeletion},
2450
+ {"key3", "value3_1", 0, kTypeValue},
2451
+ {"key4", "value4_2", expected_seq, kTypeMerge},
2452
+ {"key4", "value4_1", 0, kTypeValue},
2453
+ {"key5", "value5_3", expected_seq, kTypeMerge},
2454
+ {"key5", "value5_1,value5_2", 0, kTypeValue},
2455
+ {"key6", "value6_2", expected_seq, kTypeValue},
2456
+ {"key7", "value7_2", expected_seq, kTypeValue},
2457
+ });
2458
+ ASSERT_OK(transaction->Commit());
2459
+ VerifyKeys({
2460
+ {"key1", "value1_2"},
2461
+ {"key2", "NOT_FOUND"},
2462
+ {"key3", "NOT_FOUND"},
2463
+ {"key4", "value4_1,value4_2"},
2464
+ {"key5", "value5_1,value5_2,value5_3"},
2465
+ {"key6", "value6_2"},
2466
+ {"key7", "value7_2"},
2467
+ });
2468
+ delete transaction;
2469
+ }
2470
+
2471
+ // Compaction should keep keys visible to a snapshot based on commit sequence,
2472
+ // not just prepare sequence.
2473
+ TEST_P(WritePreparedTransactionTest, CompactionShouldKeepSnapshotVisibleKeys) {
2474
+ options.disable_auto_compactions = true;
2475
+ ASSERT_OK(ReOpen());
2476
+ // Keep track of expected sequence number.
2477
+ SequenceNumber expected_seq = 0;
2478
+ auto* txn1 = db->BeginTransaction(WriteOptions());
2479
+ ASSERT_OK(txn1->SetName("txn1"));
2480
+ ASSERT_OK(txn1->Put("key1", "value1_1"));
2481
+ ASSERT_OK(txn1->Prepare());
2482
+ ASSERT_EQ(++expected_seq, db->GetLatestSequenceNumber());
2483
+ ASSERT_OK(txn1->Commit());
2484
+ DBImpl* db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
2485
+ ASSERT_EQ(++expected_seq, db_impl->TEST_GetLastVisibleSequence());
2486
+ delete txn1;
2487
+ // Take a snapshots to avoid keys get evicted before compaction.
2488
+ const Snapshot* snapshot1 = db->GetSnapshot();
2489
+ auto* txn2 = db->BeginTransaction(WriteOptions());
2490
+ ASSERT_OK(txn2->SetName("txn2"));
2491
+ ASSERT_OK(txn2->Put("key2", "value2_1"));
2492
+ ASSERT_OK(txn2->Prepare());
2493
+ ASSERT_EQ(++expected_seq, db->GetLatestSequenceNumber());
2494
+ // txn1 commit before snapshot2 and it is visible to snapshot2.
2495
+ // txn2 commit after snapshot2 and it is not visible.
2496
+ const Snapshot* snapshot2 = db->GetSnapshot();
2497
+ ASSERT_OK(txn2->Commit());
2498
+ ASSERT_EQ(++expected_seq, db_impl->TEST_GetLastVisibleSequence());
2499
+ delete txn2;
2500
+ // Take a snapshots to avoid keys get evicted before compaction.
2501
+ const Snapshot* snapshot3 = db->GetSnapshot();
2502
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1_2"));
2503
+ expected_seq++; // 1 for write
2504
+ SequenceNumber seq1 = expected_seq;
2505
+ if (options.two_write_queues) {
2506
+ expected_seq++; // 1 for commit
2507
+ }
2508
+ ASSERT_EQ(expected_seq, db_impl->TEST_GetLastVisibleSequence());
2509
+ ASSERT_OK(db->Put(WriteOptions(), "key2", "value2_2"));
2510
+ expected_seq++; // 1 for write
2511
+ SequenceNumber seq2 = expected_seq;
2512
+ if (options.two_write_queues) {
2513
+ expected_seq++; // 1 for commit
2514
+ }
2515
+ ASSERT_EQ(expected_seq, db_impl->TEST_GetLastVisibleSequence());
2516
+ ASSERT_OK(db->Flush(FlushOptions()));
2517
+ db->ReleaseSnapshot(snapshot1);
2518
+ db->ReleaseSnapshot(snapshot3);
2519
+ // Dummy keys to avoid compaction trivially move files and get around actual
2520
+ // compaction logic.
2521
+ ASSERT_OK(db->Put(WriteOptions(), "a", "dummy"));
2522
+ ASSERT_OK(db->Put(WriteOptions(), "z", "dummy"));
2523
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2524
+ VerifyKeys({{"key1", "value1_2"}, {"key2", "value2_2"}});
2525
+ VerifyKeys({{"key1", "value1_1"}, {"key2", "NOT_FOUND"}}, snapshot2);
2526
+ VerifyInternalKeys({
2527
+ {"key1", "value1_2", seq1, kTypeValue},
2528
+ // "value1_1" is visible to snapshot2. Also keys at bottom level visible
2529
+ // to earliest snapshot will output with seq = 0.
2530
+ {"key1", "value1_1", 0, kTypeValue},
2531
+ {"key2", "value2_2", seq2, kTypeValue},
2532
+ });
2533
+ db->ReleaseSnapshot(snapshot2);
2534
+ }
2535
+
2536
+ TEST_P(WritePreparedTransactionTest, SmallestUncommittedOptimization) {
2537
+ const size_t snapshot_cache_bits = 7; // same as default
2538
+ const size_t commit_cache_bits = 0; // disable commit cache
2539
+ for (bool has_recent_prepare : {true, false}) {
2540
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
2541
+ ASSERT_OK(ReOpen());
2542
+
2543
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));
2544
+ auto* transaction =
2545
+ db->BeginTransaction(WriteOptions(), TransactionOptions(), nullptr);
2546
+ ASSERT_OK(transaction->SetName("txn"));
2547
+ ASSERT_OK(transaction->Delete("key1"));
2548
+ ASSERT_OK(transaction->Prepare());
2549
+ // snapshot1 should get min_uncommitted from prepared_txns_ heap.
2550
+ auto snapshot1 = db->GetSnapshot();
2551
+ ASSERT_EQ(transaction->GetId(),
2552
+ ((SnapshotImpl*)snapshot1)->min_uncommitted_);
2553
+ // Add a commit to advance max_evicted_seq and move the prepared transaction
2554
+ // into delayed_prepared_ set.
2555
+ ASSERT_OK(db->Put(WriteOptions(), "key2", "value2"));
2556
+ Transaction* txn2 = nullptr;
2557
+ if (has_recent_prepare) {
2558
+ txn2 =
2559
+ db->BeginTransaction(WriteOptions(), TransactionOptions(), nullptr);
2560
+ ASSERT_OK(txn2->SetName("txn2"));
2561
+ ASSERT_OK(txn2->Put("key3", "value3"));
2562
+ ASSERT_OK(txn2->Prepare());
2563
+ }
2564
+ // snapshot2 should get min_uncommitted from delayed_prepared_ set.
2565
+ auto snapshot2 = db->GetSnapshot();
2566
+ ASSERT_EQ(transaction->GetId(),
2567
+ ((SnapshotImpl*)snapshot1)->min_uncommitted_);
2568
+ ASSERT_OK(transaction->Commit());
2569
+ delete transaction;
2570
+ if (has_recent_prepare) {
2571
+ ASSERT_OK(txn2->Commit());
2572
+ delete txn2;
2573
+ }
2574
+ VerifyKeys({{"key1", "NOT_FOUND"}});
2575
+ VerifyKeys({{"key1", "value1"}}, snapshot1);
2576
+ VerifyKeys({{"key1", "value1"}}, snapshot2);
2577
+ db->ReleaseSnapshot(snapshot1);
2578
+ db->ReleaseSnapshot(snapshot2);
2579
+ }
2580
+ }
2581
+
2582
+ // Insert two values, v1 and v2, for a key. Between prepare and commit of v2
2583
+ // take two snapshots, s1 and s2. Release s1 during compaction.
2584
+ // Test to make sure compaction doesn't get confused and think s1 can see both
2585
+ // values, and thus compact out the older value by mistake.
2586
+ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction) {
2587
+ const size_t snapshot_cache_bits = 7; // same as default
2588
+ const size_t commit_cache_bits = 0; // minimum commit cache
2589
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
2590
+ ASSERT_OK(ReOpen());
2591
+
2592
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1_1"));
2593
+ auto* transaction =
2594
+ db->BeginTransaction(WriteOptions(), TransactionOptions(), nullptr);
2595
+ ASSERT_OK(transaction->SetName("txn"));
2596
+ ASSERT_OK(transaction->Put("key1", "value1_2"));
2597
+ ASSERT_OK(transaction->Prepare());
2598
+ auto snapshot1 = db->GetSnapshot();
2599
+ // Increment sequence number.
2600
+ ASSERT_OK(db->Put(WriteOptions(), "key2", "value2"));
2601
+ auto snapshot2 = db->GetSnapshot();
2602
+ ASSERT_OK(transaction->Commit());
2603
+ delete transaction;
2604
+ VerifyKeys({{"key1", "value1_2"}});
2605
+ VerifyKeys({{"key1", "value1_1"}}, snapshot1);
2606
+ VerifyKeys({{"key1", "value1_1"}}, snapshot2);
2607
+ // Add a flush to avoid compaction to fallback to trivial move.
2608
+
2609
+ auto callback = [&](void*) {
2610
+ // Release snapshot1 after CompactionIterator init.
2611
+ // CompactionIterator need to figure out the earliest snapshot
2612
+ // that can see key1:value1_2 is kMaxSequenceNumber, not
2613
+ // snapshot1 or snapshot2.
2614
+ db->ReleaseSnapshot(snapshot1);
2615
+ // Add some keys to advance max_evicted_seq.
2616
+ ASSERT_OK(db->Put(WriteOptions(), "key3", "value3"));
2617
+ ASSERT_OK(db->Put(WriteOptions(), "key4", "value4"));
2618
+ };
2619
+ SyncPoint::GetInstance()->SetCallBack("CompactionIterator:AfterInit",
2620
+ callback);
2621
+ SyncPoint::GetInstance()->EnableProcessing();
2622
+
2623
+ ASSERT_OK(db->Flush(FlushOptions()));
2624
+ VerifyKeys({{"key1", "value1_2"}});
2625
+ VerifyKeys({{"key1", "value1_1"}}, snapshot2);
2626
+ db->ReleaseSnapshot(snapshot2);
2627
+ SyncPoint::GetInstance()->ClearAllCallBacks();
2628
+ }
2629
+
2630
+ // Insert two values, v1 and v2, for a key. Take two snapshots, s1 and s2,
2631
+ // after committing v2. Release s1 during compaction, right after compaction
2632
+ // processes v2 and before processes v1. Test to make sure compaction doesn't
2633
+ // get confused and believe v1 and v2 are visible to different snapshot
2634
+ // (v1 by s2, v2 by s1) and refuse to compact out v1.
2635
+ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction2) {
2636
+ const size_t snapshot_cache_bits = 7; // same as default
2637
+ const size_t commit_cache_bits = 0; // minimum commit cache
2638
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
2639
+ ASSERT_OK(ReOpen());
2640
+
2641
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));
2642
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value2"));
2643
+ SequenceNumber v2_seq = db->GetLatestSequenceNumber();
2644
+ auto* s1 = db->GetSnapshot();
2645
+ // Advance sequence number.
2646
+ ASSERT_OK(db->Put(WriteOptions(), "key2", "dummy"));
2647
+ auto* s2 = db->GetSnapshot();
2648
+
2649
+ int count_value = 0;
2650
+ auto callback = [&](void* arg) {
2651
+ auto* ikey = reinterpret_cast<ParsedInternalKey*>(arg);
2652
+ if (ikey->user_key == "key1") {
2653
+ count_value++;
2654
+ if (count_value == 2) {
2655
+ // Processing v1.
2656
+ db->ReleaseSnapshot(s1);
2657
+ // Add some keys to advance max_evicted_seq and update
2658
+ // old_commit_map.
2659
+ ASSERT_OK(db->Put(WriteOptions(), "key3", "dummy"));
2660
+ ASSERT_OK(db->Put(WriteOptions(), "key4", "dummy"));
2661
+ }
2662
+ }
2663
+ };
2664
+ SyncPoint::GetInstance()->SetCallBack("CompactionIterator:ProcessKV",
2665
+ callback);
2666
+ SyncPoint::GetInstance()->EnableProcessing();
2667
+
2668
+ ASSERT_OK(db->Flush(FlushOptions()));
2669
+ // value1 should be compact out.
2670
+ VerifyInternalKeys({{"key1", "value2", v2_seq, kTypeValue}});
2671
+
2672
+ // cleanup
2673
+ db->ReleaseSnapshot(s2);
2674
+ SyncPoint::GetInstance()->ClearAllCallBacks();
2675
+ }
2676
+
2677
+ // Insert two values, v1 and v2, for a key. Insert another dummy key
2678
+ // so to evict the commit cache for v2, while v1 is still in commit cache.
2679
+ // Take two snapshots, s1 and s2. Release s1 during compaction.
2680
+ // Since commit cache for v2 is evicted, and old_commit_map don't have
2681
+ // s1 (it is released),
2682
+ // TODO(myabandeh): how can we be sure that the v2's commit info is evicted
2683
+ // (and not v1's)? Instead of putting a dummy, we can directly call
2684
+ // AddCommitted(v2_seq + cache_size, ...) to evict v2's entry from commit cache.
2685
+ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction3) {
2686
+ const size_t snapshot_cache_bits = 7; // same as default
2687
+ const size_t commit_cache_bits = 1; // commit cache size = 2
2688
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
2689
+ ASSERT_OK(ReOpen());
2690
+
2691
+ // Add a dummy key to evict v2 commit cache, but keep v1 commit cache.
2692
+ // It also advance max_evicted_seq and can trigger old_commit_map cleanup.
2693
+ auto add_dummy = [&]() {
2694
+ auto* txn_dummy =
2695
+ db->BeginTransaction(WriteOptions(), TransactionOptions(), nullptr);
2696
+ ASSERT_OK(txn_dummy->SetName("txn_dummy"));
2697
+ ASSERT_OK(txn_dummy->Put("dummy", "dummy"));
2698
+ ASSERT_OK(txn_dummy->Prepare());
2699
+ ASSERT_OK(txn_dummy->Commit());
2700
+ delete txn_dummy;
2701
+ };
2702
+
2703
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));
2704
+ auto* txn =
2705
+ db->BeginTransaction(WriteOptions(), TransactionOptions(), nullptr);
2706
+ ASSERT_OK(txn->SetName("txn"));
2707
+ ASSERT_OK(txn->Put("key1", "value2"));
2708
+ ASSERT_OK(txn->Prepare());
2709
+ // TODO(myabandeh): replace it with GetId()?
2710
+ auto v2_seq = db->GetLatestSequenceNumber();
2711
+ ASSERT_OK(txn->Commit());
2712
+ delete txn;
2713
+ auto* s1 = db->GetSnapshot();
2714
+ // Dummy key to advance sequence number.
2715
+ add_dummy();
2716
+ auto* s2 = db->GetSnapshot();
2717
+
2718
+ auto callback = [&](void*) {
2719
+ db->ReleaseSnapshot(s1);
2720
+ // Add some dummy entries to trigger s1 being cleanup from old_commit_map.
2721
+ add_dummy();
2722
+ add_dummy();
2723
+ };
2724
+ SyncPoint::GetInstance()->SetCallBack("CompactionIterator:AfterInit",
2725
+ callback);
2726
+ SyncPoint::GetInstance()->EnableProcessing();
2727
+
2728
+ ASSERT_OK(db->Flush(FlushOptions()));
2729
+ // value1 should be compact out.
2730
+ VerifyInternalKeys({{"key1", "value2", v2_seq, kTypeValue}});
2731
+
2732
+ db->ReleaseSnapshot(s2);
2733
+ SyncPoint::GetInstance()->ClearAllCallBacks();
2734
+ }
2735
+
2736
+ TEST_P(WritePreparedTransactionTest, ReleaseEarliestSnapshotDuringCompaction) {
2737
+ const size_t snapshot_cache_bits = 7; // same as default
2738
+ const size_t commit_cache_bits = 0; // minimum commit cache
2739
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
2740
+ ASSERT_OK(ReOpen());
2741
+
2742
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));
2743
+ auto* transaction =
2744
+ db->BeginTransaction(WriteOptions(), TransactionOptions(), nullptr);
2745
+ ASSERT_OK(transaction->SetName("txn"));
2746
+ ASSERT_OK(transaction->Delete("key1"));
2747
+ ASSERT_OK(transaction->Prepare());
2748
+ SequenceNumber del_seq = db->GetLatestSequenceNumber();
2749
+ auto snapshot1 = db->GetSnapshot();
2750
+ // Increment sequence number.
2751
+ ASSERT_OK(db->Put(WriteOptions(), "key2", "value2"));
2752
+ auto snapshot2 = db->GetSnapshot();
2753
+ ASSERT_OK(transaction->Commit());
2754
+ delete transaction;
2755
+ VerifyKeys({{"key1", "NOT_FOUND"}});
2756
+ VerifyKeys({{"key1", "value1"}}, snapshot1);
2757
+ VerifyKeys({{"key1", "value1"}}, snapshot2);
2758
+ ASSERT_OK(db->Flush(FlushOptions()));
2759
+
2760
+ auto callback = [&](void* compaction) {
2761
+ // Release snapshot1 after CompactionIterator init.
2762
+ // CompactionIterator need to double check and find out snapshot2 is now
2763
+ // the earliest existing snapshot.
2764
+ if (compaction != nullptr) {
2765
+ db->ReleaseSnapshot(snapshot1);
2766
+ // Add some keys to advance max_evicted_seq.
2767
+ ASSERT_OK(db->Put(WriteOptions(), "key3", "value3"));
2768
+ ASSERT_OK(db->Put(WriteOptions(), "key4", "value4"));
2769
+ }
2770
+ };
2771
+ SyncPoint::GetInstance()->SetCallBack("CompactionIterator:AfterInit",
2772
+ callback);
2773
+ SyncPoint::GetInstance()->EnableProcessing();
2774
+
2775
+ // Dummy keys to avoid compaction trivially move files and get around actual
2776
+ // compaction logic.
2777
+ ASSERT_OK(db->Put(WriteOptions(), "a", "dummy"));
2778
+ ASSERT_OK(db->Put(WriteOptions(), "z", "dummy"));
2779
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2780
+ // Only verify for key1. Both the put and delete for the key should be kept.
2781
+ // Since the delete tombstone is not visible to snapshot2, we need to keep
2782
+ // at least one version of the key, for write-conflict check.
2783
+ VerifyInternalKeys({{"key1", "", del_seq, kTypeDeletion},
2784
+ {"key1", "value1", 0, kTypeValue}});
2785
+ db->ReleaseSnapshot(snapshot2);
2786
+ SyncPoint::GetInstance()->ClearAllCallBacks();
2787
+ }
2788
+
2789
+ // A more complex test to verify compaction/flush should keep keys visible
2790
+ // to snapshots.
2791
+ TEST_P(WritePreparedTransactionTest,
2792
+ CompactionKeepSnapshotVisibleKeysRandomized) {
2793
+ constexpr size_t kNumTransactions = 10;
2794
+ constexpr size_t kNumIterations = 1000;
2795
+
2796
+ std::vector<Transaction*> transactions(kNumTransactions, nullptr);
2797
+ std::vector<size_t> versions(kNumTransactions, 0);
2798
+ std::unordered_map<std::string, std::string> current_data;
2799
+ std::vector<const Snapshot*> snapshots;
2800
+ std::vector<std::unordered_map<std::string, std::string>> snapshot_data;
2801
+
2802
+ Random rnd(1103);
2803
+ options.disable_auto_compactions = true;
2804
+ ASSERT_OK(ReOpen());
2805
+
2806
+ for (size_t i = 0; i < kNumTransactions; i++) {
2807
+ std::string key = "key" + ToString(i);
2808
+ std::string value = "value0";
2809
+ ASSERT_OK(db->Put(WriteOptions(), key, value));
2810
+ current_data[key] = value;
2811
+ }
2812
+ VerifyKeys(current_data);
2813
+
2814
+ for (size_t iter = 0; iter < kNumIterations; iter++) {
2815
+ auto r = rnd.Next() % (kNumTransactions + 1);
2816
+ if (r < kNumTransactions) {
2817
+ std::string key = "key" + ToString(r);
2818
+ if (transactions[r] == nullptr) {
2819
+ std::string value = "value" + ToString(versions[r] + 1);
2820
+ auto* txn = db->BeginTransaction(WriteOptions());
2821
+ ASSERT_OK(txn->SetName("txn" + ToString(r)));
2822
+ ASSERT_OK(txn->Put(key, value));
2823
+ ASSERT_OK(txn->Prepare());
2824
+ transactions[r] = txn;
2825
+ } else {
2826
+ std::string value = "value" + ToString(++versions[r]);
2827
+ ASSERT_OK(transactions[r]->Commit());
2828
+ delete transactions[r];
2829
+ transactions[r] = nullptr;
2830
+ current_data[key] = value;
2831
+ }
2832
+ } else {
2833
+ auto* snapshot = db->GetSnapshot();
2834
+ VerifyKeys(current_data, snapshot);
2835
+ snapshots.push_back(snapshot);
2836
+ snapshot_data.push_back(current_data);
2837
+ }
2838
+ VerifyKeys(current_data);
2839
+ }
2840
+ // Take a last snapshot to test compaction with uncommitted prepared
2841
+ // transaction.
2842
+ snapshots.push_back(db->GetSnapshot());
2843
+ snapshot_data.push_back(current_data);
2844
+
2845
+ ASSERT_EQ(snapshots.size(), snapshot_data.size());
2846
+ for (size_t i = 0; i < snapshots.size(); i++) {
2847
+ VerifyKeys(snapshot_data[i], snapshots[i]);
2848
+ }
2849
+ ASSERT_OK(db->Flush(FlushOptions()));
2850
+ for (size_t i = 0; i < snapshots.size(); i++) {
2851
+ VerifyKeys(snapshot_data[i], snapshots[i]);
2852
+ }
2853
+ // Dummy keys to avoid compaction trivially move files and get around actual
2854
+ // compaction logic.
2855
+ ASSERT_OK(db->Put(WriteOptions(), "a", "dummy"));
2856
+ ASSERT_OK(db->Put(WriteOptions(), "z", "dummy"));
2857
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2858
+ for (size_t i = 0; i < snapshots.size(); i++) {
2859
+ VerifyKeys(snapshot_data[i], snapshots[i]);
2860
+ }
2861
+ // cleanup
2862
+ for (size_t i = 0; i < kNumTransactions; i++) {
2863
+ if (transactions[i] == nullptr) {
2864
+ continue;
2865
+ }
2866
+ ASSERT_OK(transactions[i]->Commit());
2867
+ delete transactions[i];
2868
+ }
2869
+ for (size_t i = 0; i < snapshots.size(); i++) {
2870
+ db->ReleaseSnapshot(snapshots[i]);
2871
+ }
2872
+ }
2873
+
2874
+ // Compaction should not apply the optimization to output key with sequence
2875
+ // number equal to 0 if the key is not visible to earliest snapshot, based on
2876
+ // commit sequence number.
2877
+ TEST_P(WritePreparedTransactionTest,
2878
+ CompactionShouldKeepSequenceForUncommittedKeys) {
2879
+ options.disable_auto_compactions = true;
2880
+ ASSERT_OK(ReOpen());
2881
+ // Keep track of expected sequence number.
2882
+ SequenceNumber expected_seq = 0;
2883
+ auto* transaction = db->BeginTransaction(WriteOptions());
2884
+ ASSERT_OK(transaction->SetName("txn"));
2885
+ ASSERT_OK(transaction->Put("key1", "value1"));
2886
+ ASSERT_OK(transaction->Prepare());
2887
+ ASSERT_EQ(++expected_seq, db->GetLatestSequenceNumber());
2888
+ SequenceNumber seq1 = expected_seq;
2889
+ ASSERT_OK(db->Put(WriteOptions(), "key2", "value2"));
2890
+ DBImpl* db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
2891
+ expected_seq++; // one for data
2892
+ if (options.two_write_queues) {
2893
+ expected_seq++; // one for commit
2894
+ }
2895
+ ASSERT_EQ(expected_seq, db_impl->TEST_GetLastVisibleSequence());
2896
+ ASSERT_OK(db->Flush(FlushOptions()));
2897
+ // Dummy keys to avoid compaction trivially move files and get around actual
2898
+ // compaction logic.
2899
+ ASSERT_OK(db->Put(WriteOptions(), "a", "dummy"));
2900
+ ASSERT_OK(db->Put(WriteOptions(), "z", "dummy"));
2901
+ ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2902
+ VerifyKeys({
2903
+ {"key1", "NOT_FOUND"},
2904
+ {"key2", "value2"},
2905
+ });
2906
+ VerifyInternalKeys({
2907
+ // "key1" has not been committed. It keeps its sequence number.
2908
+ {"key1", "value1", seq1, kTypeValue},
2909
+ // "key2" is committed and output with seq = 0.
2910
+ {"key2", "value2", 0, kTypeValue},
2911
+ });
2912
+ ASSERT_OK(transaction->Commit());
2913
+ VerifyKeys({
2914
+ {"key1", "value1"},
2915
+ {"key2", "value2"},
2916
+ });
2917
+ delete transaction;
2918
+ }
2919
+
2920
+ TEST_P(WritePreparedTransactionTest, CommitAndSnapshotDuringCompaction) {
2921
+ options.disable_auto_compactions = true;
2922
+ ASSERT_OK(ReOpen());
2923
+
2924
+ const Snapshot* snapshot = nullptr;
2925
+ ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));
2926
+ auto* txn = db->BeginTransaction(WriteOptions());
2927
+ ASSERT_OK(txn->SetName("txn"));
2928
+ ASSERT_OK(txn->Put("key1", "value2"));
2929
+ ASSERT_OK(txn->Prepare());
2930
+
2931
+ auto callback = [&](void*) {
2932
+ // Snapshot is taken after compaction start. It should be taken into
2933
+ // consideration for whether to compact out value1.
2934
+ snapshot = db->GetSnapshot();
2935
+ ASSERT_OK(txn->Commit());
2936
+ delete txn;
2937
+ };
2938
+ SyncPoint::GetInstance()->SetCallBack("CompactionIterator:AfterInit",
2939
+ callback);
2940
+ SyncPoint::GetInstance()->EnableProcessing();
2941
+ ASSERT_OK(db->Flush(FlushOptions()));
2942
+ ASSERT_NE(nullptr, snapshot);
2943
+ VerifyKeys({{"key1", "value2"}});
2944
+ VerifyKeys({{"key1", "value1"}}, snapshot);
2945
+ db->ReleaseSnapshot(snapshot);
2946
+ }
2947
+
2948
+ TEST_P(WritePreparedTransactionTest, Iterate) {
2949
+ auto verify_state = [](Iterator* iter, const std::string& key,
2950
+ const std::string& value) {
2951
+ ASSERT_TRUE(iter->Valid());
2952
+ ASSERT_OK(iter->status());
2953
+ ASSERT_EQ(key, iter->key().ToString());
2954
+ ASSERT_EQ(value, iter->value().ToString());
2955
+ };
2956
+
2957
+ auto verify_iter = [&](const std::string& expected_val) {
2958
+ // Get iterator from a concurrent transaction and make sure it has the
2959
+ // same view as an iterator from the DB.
2960
+ auto* txn = db->BeginTransaction(WriteOptions());
2961
+
2962
+ for (int i = 0; i < 2; i++) {
2963
+ Iterator* iter = (i == 0)
2964
+ ? db->NewIterator(ReadOptions())
2965
+ : txn->GetIterator(ReadOptions());
2966
+ // Seek
2967
+ iter->Seek("foo");
2968
+ verify_state(iter, "foo", expected_val);
2969
+ // Next
2970
+ iter->Seek("a");
2971
+ verify_state(iter, "a", "va");
2972
+ iter->Next();
2973
+ verify_state(iter, "foo", expected_val);
2974
+ // SeekForPrev
2975
+ iter->SeekForPrev("y");
2976
+ verify_state(iter, "foo", expected_val);
2977
+ // Prev
2978
+ iter->SeekForPrev("z");
2979
+ verify_state(iter, "z", "vz");
2980
+ iter->Prev();
2981
+ verify_state(iter, "foo", expected_val);
2982
+ delete iter;
2983
+ }
2984
+ delete txn;
2985
+ };
2986
+
2987
+ ASSERT_OK(db->Put(WriteOptions(), "foo", "v1"));
2988
+ auto* transaction = db->BeginTransaction(WriteOptions());
2989
+ ASSERT_OK(transaction->SetName("txn"));
2990
+ ASSERT_OK(transaction->Put("foo", "v2"));
2991
+ ASSERT_OK(transaction->Prepare());
2992
+ VerifyKeys({{"foo", "v1"}});
2993
+ // dummy keys
2994
+ ASSERT_OK(db->Put(WriteOptions(), "a", "va"));
2995
+ ASSERT_OK(db->Put(WriteOptions(), "z", "vz"));
2996
+ verify_iter("v1");
2997
+ ASSERT_OK(transaction->Commit());
2998
+ VerifyKeys({{"foo", "v2"}});
2999
+ verify_iter("v2");
3000
+ delete transaction;
3001
+ }
3002
+
3003
+ TEST_P(WritePreparedTransactionTest, IteratorRefreshNotSupported) {
3004
+ Iterator* iter = db->NewIterator(ReadOptions());
3005
+ ASSERT_OK(iter->status());
3006
+ ASSERT_TRUE(iter->Refresh().IsNotSupported());
3007
+ delete iter;
3008
+ }
3009
+
3010
+ // Committing an delayed prepared has two non-atomic steps: update commit cache,
3011
+ // remove seq from delayed_prepared_. The read in IsInSnapshot also involves two
3012
+ // non-atomic steps of checking these two data structures. This test breaks each
3013
+ // in the middle to ensure correctness in spite of non-atomic execution.
3014
+ // Note: This test is limitted to the case where snapshot is larger than the
3015
+ // max_evicted_seq_.
3016
+ TEST_P(WritePreparedTransactionTest, NonAtomicCommitOfDelayedPrepared) {
3017
+ const size_t snapshot_cache_bits = 7; // same as default
3018
+ const size_t commit_cache_bits = 3; // 8 entries
3019
+ for (auto split_read : {true, false}) {
3020
+ std::vector<bool> split_options = {false};
3021
+ if (split_read) {
3022
+ // Also test for break before mutex
3023
+ split_options.push_back(true);
3024
+ }
3025
+ for (auto split_before_mutex : split_options) {
3026
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
3027
+ ASSERT_OK(ReOpen());
3028
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
3029
+ DBImpl* db_impl = static_cast_with_check<DBImpl>(db->GetRootDB());
3030
+ // Fill up the commit cache
3031
+ std::string init_value("value1");
3032
+ for (int i = 0; i < 10; i++) {
3033
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key1"), Slice(init_value)));
3034
+ }
3035
+ // Prepare a transaction but do not commit it
3036
+ Transaction* txn =
3037
+ db->BeginTransaction(WriteOptions(), TransactionOptions());
3038
+ ASSERT_OK(txn->SetName("xid"));
3039
+ ASSERT_OK(txn->Put(Slice("key1"), Slice("value2")));
3040
+ ASSERT_OK(txn->Prepare());
3041
+ // Commit a bunch of entries to advance max evicted seq and make the
3042
+ // prepared a delayed prepared
3043
+ for (int i = 0; i < 10; i++) {
3044
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3045
+ }
3046
+ // The snapshot should not see the delayed prepared entry
3047
+ auto snap = db->GetSnapshot();
3048
+
3049
+ if (split_read) {
3050
+ if (split_before_mutex) {
3051
+ // split before acquiring prepare_mutex_
3052
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
3053
+ {{"WritePreparedTxnDB::IsInSnapshot:prepared_mutex_:pause",
3054
+ "AtomicCommitOfDelayedPrepared:Commit:before"},
3055
+ {"AtomicCommitOfDelayedPrepared:Commit:after",
3056
+ "WritePreparedTxnDB::IsInSnapshot:prepared_mutex_:resume"}});
3057
+ } else {
3058
+ // split right after reading from the commit cache
3059
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
3060
+ {{"WritePreparedTxnDB::IsInSnapshot:GetCommitEntry:pause",
3061
+ "AtomicCommitOfDelayedPrepared:Commit:before"},
3062
+ {"AtomicCommitOfDelayedPrepared:Commit:after",
3063
+ "WritePreparedTxnDB::IsInSnapshot:GetCommitEntry:resume"}});
3064
+ }
3065
+ } else { // split commit
3066
+ // split right before removing from delayed_prepared_
3067
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
3068
+ {{"WritePreparedTxnDB::RemovePrepared:pause",
3069
+ "AtomicCommitOfDelayedPrepared:Read:before"},
3070
+ {"AtomicCommitOfDelayedPrepared:Read:after",
3071
+ "WritePreparedTxnDB::RemovePrepared:resume"}});
3072
+ }
3073
+ SyncPoint::GetInstance()->EnableProcessing();
3074
+
3075
+ ROCKSDB_NAMESPACE::port::Thread commit_thread([&]() {
3076
+ TEST_SYNC_POINT("AtomicCommitOfDelayedPrepared:Commit:before");
3077
+ ASSERT_OK(txn->Commit());
3078
+ if (split_before_mutex) {
3079
+ // Do bunch of inserts to evict the commit entry from the cache. This
3080
+ // would prevent the 2nd look into commit cache under prepare_mutex_
3081
+ // to see the commit entry.
3082
+ auto seq = db_impl->TEST_GetLastVisibleSequence();
3083
+ size_t tries = 0;
3084
+ while (wp_db->max_evicted_seq_ < seq && tries < 50) {
3085
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3086
+ tries++;
3087
+ };
3088
+ ASSERT_LT(tries, 50);
3089
+ }
3090
+ TEST_SYNC_POINT("AtomicCommitOfDelayedPrepared:Commit:after");
3091
+ delete txn;
3092
+ });
3093
+
3094
+ ROCKSDB_NAMESPACE::port::Thread read_thread([&]() {
3095
+ TEST_SYNC_POINT("AtomicCommitOfDelayedPrepared:Read:before");
3096
+ ReadOptions roptions;
3097
+ roptions.snapshot = snap;
3098
+ PinnableSlice value;
3099
+ auto s = db->Get(roptions, db->DefaultColumnFamily(), "key1", &value);
3100
+ ASSERT_OK(s);
3101
+ // It should not see the commit of delayed prepared
3102
+ ASSERT_TRUE(value == init_value);
3103
+ TEST_SYNC_POINT("AtomicCommitOfDelayedPrepared:Read:after");
3104
+ db->ReleaseSnapshot(snap);
3105
+ });
3106
+
3107
+ read_thread.join();
3108
+ commit_thread.join();
3109
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3110
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
3111
+ } // for split_before_mutex
3112
+ } // for split_read
3113
+ }
3114
+
3115
+ // When max evicted seq advances a prepared seq, it involves two updates: i)
3116
+ // adding prepared seq to delayed_prepared_, ii) updating max_evicted_seq_.
3117
+ // ::IsInSnapshot also reads these two values in a non-atomic way. This test
3118
+ // ensures correctness if the update occurs after ::IsInSnapshot reads
3119
+ // delayed_prepared_empty_ and before it reads max_evicted_seq_.
3120
+ // Note: this test focuses on read snapshot larger than max_evicted_seq_.
3121
+ TEST_P(WritePreparedTransactionTest, NonAtomicUpdateOfDelayedPrepared) {
3122
+ const size_t snapshot_cache_bits = 7; // same as default
3123
+ const size_t commit_cache_bits = 3; // 8 entries
3124
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
3125
+ ASSERT_OK(ReOpen());
3126
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
3127
+ // Fill up the commit cache
3128
+ std::string init_value("value1");
3129
+ for (int i = 0; i < 10; i++) {
3130
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key1"), Slice(init_value)));
3131
+ }
3132
+ // Prepare a transaction but do not commit it
3133
+ Transaction* txn = db->BeginTransaction(WriteOptions(), TransactionOptions());
3134
+ ASSERT_OK(txn->SetName("xid"));
3135
+ ASSERT_OK(txn->Put(Slice("key1"), Slice("value2")));
3136
+ ASSERT_OK(txn->Prepare());
3137
+ // Create a gap between prepare seq and snapshot seq
3138
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3139
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3140
+ // The snapshot should not see the delayed prepared entry
3141
+ auto snap = db->GetSnapshot();
3142
+ ASSERT_LT(txn->GetId(), snap->GetSequenceNumber());
3143
+
3144
+ // split right after reading delayed_prepared_empty_
3145
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
3146
+ {{"WritePreparedTxnDB::IsInSnapshot:delayed_prepared_empty_:pause",
3147
+ "AtomicUpdateOfDelayedPrepared:before"},
3148
+ {"AtomicUpdateOfDelayedPrepared:after",
3149
+ "WritePreparedTxnDB::IsInSnapshot:delayed_prepared_empty_:resume"}});
3150
+ SyncPoint::GetInstance()->EnableProcessing();
3151
+
3152
+ ROCKSDB_NAMESPACE::port::Thread commit_thread([&]() {
3153
+ TEST_SYNC_POINT("AtomicUpdateOfDelayedPrepared:before");
3154
+ // Commit a bunch of entries to advance max evicted seq and make the
3155
+ // prepared a delayed prepared
3156
+ size_t tries = 0;
3157
+ while (wp_db->max_evicted_seq_ < txn->GetId() && tries < 50) {
3158
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3159
+ tries++;
3160
+ };
3161
+ ASSERT_LT(tries, 50);
3162
+ // This is the case on which the test focuses
3163
+ ASSERT_LT(wp_db->max_evicted_seq_, snap->GetSequenceNumber());
3164
+ TEST_SYNC_POINT("AtomicUpdateOfDelayedPrepared:after");
3165
+ });
3166
+
3167
+ ROCKSDB_NAMESPACE::port::Thread read_thread([&]() {
3168
+ ReadOptions roptions;
3169
+ roptions.snapshot = snap;
3170
+ PinnableSlice value;
3171
+ auto s = db->Get(roptions, db->DefaultColumnFamily(), "key1", &value);
3172
+ ASSERT_OK(s);
3173
+ // It should not see the uncommitted value of delayed prepared
3174
+ ASSERT_TRUE(value == init_value);
3175
+ db->ReleaseSnapshot(snap);
3176
+ });
3177
+
3178
+ read_thread.join();
3179
+ commit_thread.join();
3180
+ ASSERT_OK(txn->Commit());
3181
+ delete txn;
3182
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3183
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
3184
+ }
3185
+
3186
+ // Eviction from commit cache and update of max evicted seq are two non-atomic
3187
+ // steps. Similarly the read of max_evicted_seq_ in ::IsInSnapshot and reading
3188
+ // from commit cache are two non-atomic steps. This tests if the update occurs
3189
+ // after reading max_evicted_seq_ and before reading the commit cache.
3190
+ // Note: the test focuses on snapshot larger than max_evicted_seq_
3191
+ TEST_P(WritePreparedTransactionTest, NonAtomicUpdateOfMaxEvictedSeq) {
3192
+ const size_t snapshot_cache_bits = 7; // same as default
3193
+ const size_t commit_cache_bits = 3; // 8 entries
3194
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
3195
+ ASSERT_OK(ReOpen());
3196
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
3197
+ // Fill up the commit cache
3198
+ std::string init_value("value1");
3199
+ std::string last_value("value_final");
3200
+ for (int i = 0; i < 10; i++) {
3201
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key1"), Slice(init_value)));
3202
+ }
3203
+ // Do an uncommitted write to prevent min_uncommitted optimization
3204
+ Transaction* txn1 =
3205
+ db->BeginTransaction(WriteOptions(), TransactionOptions());
3206
+ ASSERT_OK(txn1->SetName("xid1"));
3207
+ ASSERT_OK(txn1->Put(Slice("key0"), last_value));
3208
+ ASSERT_OK(txn1->Prepare());
3209
+ // Do a write with prepare to get the prepare seq
3210
+ Transaction* txn = db->BeginTransaction(WriteOptions(), TransactionOptions());
3211
+ ASSERT_OK(txn->SetName("xid"));
3212
+ ASSERT_OK(txn->Put(Slice("key1"), last_value));
3213
+ ASSERT_OK(txn->Prepare());
3214
+ ASSERT_OK(txn->Commit());
3215
+ // Create a gap between commit entry and snapshot seq
3216
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3217
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3218
+ // The snapshot should see the last commit
3219
+ auto snap = db->GetSnapshot();
3220
+ ASSERT_LE(txn->GetId(), snap->GetSequenceNumber());
3221
+
3222
+ // split right after reading max_evicted_seq_
3223
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
3224
+ {{"WritePreparedTxnDB::IsInSnapshot:max_evicted_seq_:pause",
3225
+ "NonAtomicUpdateOfMaxEvictedSeq:before"},
3226
+ {"NonAtomicUpdateOfMaxEvictedSeq:after",
3227
+ "WritePreparedTxnDB::IsInSnapshot:max_evicted_seq_:resume"}});
3228
+ SyncPoint::GetInstance()->EnableProcessing();
3229
+
3230
+ ROCKSDB_NAMESPACE::port::Thread commit_thread([&]() {
3231
+ TEST_SYNC_POINT("NonAtomicUpdateOfMaxEvictedSeq:before");
3232
+ // Commit a bunch of entries to advance max evicted seq beyond txn->GetId()
3233
+ size_t tries = 0;
3234
+ while (wp_db->max_evicted_seq_ < txn->GetId() && tries < 50) {
3235
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3")));
3236
+ tries++;
3237
+ };
3238
+ ASSERT_LT(tries, 50);
3239
+ // This is the case on which the test focuses
3240
+ ASSERT_LT(wp_db->max_evicted_seq_, snap->GetSequenceNumber());
3241
+ TEST_SYNC_POINT("NonAtomicUpdateOfMaxEvictedSeq:after");
3242
+ });
3243
+
3244
+ ROCKSDB_NAMESPACE::port::Thread read_thread([&]() {
3245
+ ReadOptions roptions;
3246
+ roptions.snapshot = snap;
3247
+ PinnableSlice value;
3248
+ auto s = db->Get(roptions, db->DefaultColumnFamily(), "key1", &value);
3249
+ ASSERT_OK(s);
3250
+ // It should see the committed value of the evicted entry
3251
+ ASSERT_TRUE(value == last_value);
3252
+ db->ReleaseSnapshot(snap);
3253
+ });
3254
+
3255
+ read_thread.join();
3256
+ commit_thread.join();
3257
+ delete txn;
3258
+ ASSERT_OK(txn1->Commit());
3259
+ delete txn1;
3260
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3261
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
3262
+ }
3263
+
3264
+ // Test when we add a prepared seq when the max_evicted_seq_ already goes beyond
3265
+ // that. The test focuses on a race condition between AddPrepared and
3266
+ // AdvanceMaxEvictedSeq functions.
3267
+ TEST_P(WritePreparedTransactionTest, AddPreparedBeforeMax) {
3268
+ if (!options.two_write_queues) {
3269
+ // This test is only for two write queues
3270
+ return;
3271
+ }
3272
+ const size_t snapshot_cache_bits = 7; // same as default
3273
+ // 1 entry to advance max after the 2nd commit
3274
+ const size_t commit_cache_bits = 0;
3275
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
3276
+ ASSERT_OK(ReOpen());
3277
+ WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
3278
+ std::string some_value("value_some");
3279
+ std::string uncommitted_value("value_uncommitted");
3280
+ // Prepare two uncommitted transactions
3281
+ Transaction* txn1 =
3282
+ db->BeginTransaction(WriteOptions(), TransactionOptions());
3283
+ ASSERT_OK(txn1->SetName("xid1"));
3284
+ ASSERT_OK(txn1->Put(Slice("key1"), some_value));
3285
+ ASSERT_OK(txn1->Prepare());
3286
+ Transaction* txn2 =
3287
+ db->BeginTransaction(WriteOptions(), TransactionOptions());
3288
+ ASSERT_OK(txn2->SetName("xid2"));
3289
+ ASSERT_OK(txn2->Put(Slice("key2"), some_value));
3290
+ ASSERT_OK(txn2->Prepare());
3291
+ // Start the txn here so the other thread could get its id
3292
+ Transaction* txn = db->BeginTransaction(WriteOptions(), TransactionOptions());
3293
+ ASSERT_OK(txn->SetName("xid"));
3294
+ ASSERT_OK(txn->Put(Slice("key0"), uncommitted_value));
3295
+ port::Mutex txn_mutex_;
3296
+
3297
+ // t1) Insert prepared entry, t2) commit other entries to advance max
3298
+ // evicted sec and finish checking the existing prepared entries, t1)
3299
+ // AddPrepared, t2) update max_evicted_seq_
3300
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
3301
+ {"AddPreparedCallback::AddPrepared::begin:pause",
3302
+ "AddPreparedBeforeMax::read_thread:start"},
3303
+ {"AdvanceMaxEvictedSeq::update_max:pause",
3304
+ "AddPreparedCallback::AddPrepared::begin:resume"},
3305
+ {"AddPreparedCallback::AddPrepared::end",
3306
+ "AdvanceMaxEvictedSeq::update_max:resume"},
3307
+ });
3308
+ SyncPoint::GetInstance()->EnableProcessing();
3309
+
3310
+ ROCKSDB_NAMESPACE::port::Thread write_thread([&]() {
3311
+ txn_mutex_.Lock();
3312
+ ASSERT_OK(txn->Prepare());
3313
+ txn_mutex_.Unlock();
3314
+ });
3315
+
3316
+ ROCKSDB_NAMESPACE::port::Thread read_thread([&]() {
3317
+ TEST_SYNC_POINT("AddPreparedBeforeMax::read_thread:start");
3318
+ // Publish seq number with a commit
3319
+ ASSERT_OK(txn1->Commit());
3320
+ // Since the commit cache size is one the 2nd commit evict the 1st one and
3321
+ // invokes AdcanceMaxEvictedSeq
3322
+ ASSERT_OK(txn2->Commit());
3323
+
3324
+ ReadOptions roptions;
3325
+ PinnableSlice value;
3326
+ // The snapshot should not see the uncommitted value from write_thread
3327
+ auto snap = db->GetSnapshot();
3328
+ ASSERT_LT(wp_db->max_evicted_seq_, snap->GetSequenceNumber());
3329
+ // This is the scenario that we test for
3330
+ txn_mutex_.Lock();
3331
+ ASSERT_GT(wp_db->max_evicted_seq_, txn->GetId());
3332
+ txn_mutex_.Unlock();
3333
+ roptions.snapshot = snap;
3334
+ auto s = db->Get(roptions, db->DefaultColumnFamily(), "key0", &value);
3335
+ ASSERT_TRUE(s.IsNotFound());
3336
+ db->ReleaseSnapshot(snap);
3337
+ });
3338
+
3339
+ read_thread.join();
3340
+ write_thread.join();
3341
+ delete txn1;
3342
+ delete txn2;
3343
+ ASSERT_OK(txn->Commit());
3344
+ delete txn;
3345
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3346
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
3347
+ }
3348
+
3349
+ // When an old prepared entry gets committed, there is a gap between the time
3350
+ // that it is published and when it is cleaned up from old_prepared_. This test
3351
+ // stresses such cases.
3352
+ TEST_P(WritePreparedTransactionTest, CommitOfDelayedPrepared) {
3353
+ const size_t snapshot_cache_bits = 7; // same as default
3354
+ for (const size_t commit_cache_bits : {0, 2, 3}) {
3355
+ for (const size_t sub_batch_cnt : {1, 2, 3}) {
3356
+ UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
3357
+ ASSERT_OK(ReOpen());
3358
+ std::atomic<const Snapshot*> snap = {nullptr};
3359
+ std::atomic<SequenceNumber> exp_prepare = {0};
3360
+ ROCKSDB_NAMESPACE::port::Thread callback_thread;
3361
+ // Value is synchronized via snap
3362
+ PinnableSlice value;
3363
+ // Take a snapshot after publish and before RemovePrepared:Start
3364
+ auto snap_callback = [&]() {
3365
+ ASSERT_EQ(nullptr, snap.load());
3366
+ snap.store(db->GetSnapshot());
3367
+ ReadOptions roptions;
3368
+ roptions.snapshot = snap.load();
3369
+ auto s = db->Get(roptions, db->DefaultColumnFamily(), "key2", &value);
3370
+ ASSERT_OK(s);
3371
+ };
3372
+ auto callback = [&](void* param) {
3373
+ SequenceNumber prep_seq = *((SequenceNumber*)param);
3374
+ if (prep_seq == exp_prepare.load()) { // only for write_thread
3375
+ // We need to spawn a thread to avoid deadlock since getting a
3376
+ // snpashot might end up calling AdvanceSeqByOne which needs joining
3377
+ // the write queue.
3378
+ callback_thread = ROCKSDB_NAMESPACE::port::Thread(snap_callback);
3379
+ TEST_SYNC_POINT("callback:end");
3380
+ }
3381
+ };
3382
+ // Wait for the first snapshot be taken in GetSnapshotInternal. Although
3383
+ // it might be updated before GetSnapshotInternal finishes but this should
3384
+ // cover most of the cases.
3385
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
3386
+ {"WritePreparedTxnDB::GetSnapshotInternal:first", "callback:end"},
3387
+ });
3388
+ SyncPoint::GetInstance()->SetCallBack("RemovePrepared:Start", callback);
3389
+ SyncPoint::GetInstance()->EnableProcessing();
3390
+ // Thread to cause frequent evictions
3391
+ ROCKSDB_NAMESPACE::port::Thread eviction_thread([&]() {
3392
+ // Too many txns might cause commit_seq - prepare_seq in another thread
3393
+ // to go beyond DELTA_UPPERBOUND
3394
+ for (int i = 0; i < 25 * (1 << commit_cache_bits); i++) {
3395
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key1"), Slice("value1")));
3396
+ }
3397
+ });
3398
+ ROCKSDB_NAMESPACE::port::Thread write_thread([&]() {
3399
+ for (int i = 0; i < 25 * (1 << commit_cache_bits); i++) {
3400
+ Transaction* txn =
3401
+ db->BeginTransaction(WriteOptions(), TransactionOptions());
3402
+ ASSERT_OK(txn->SetName("xid"));
3403
+ std::string val_str = "value" + ToString(i);
3404
+ for (size_t b = 0; b < sub_batch_cnt; b++) {
3405
+ ASSERT_OK(txn->Put(Slice("key2"), val_str));
3406
+ }
3407
+ ASSERT_OK(txn->Prepare());
3408
+ // Let an eviction to kick in
3409
+ std::this_thread::yield();
3410
+
3411
+ exp_prepare.store(txn->GetId());
3412
+ ASSERT_OK(txn->Commit());
3413
+ delete txn;
3414
+ // Wait for the snapshot taking that is triggered by
3415
+ // RemovePrepared:Start callback
3416
+ callback_thread.join();
3417
+
3418
+ // Read with the snapshot taken before delayed_prepared_ cleanup
3419
+ ReadOptions roptions;
3420
+ roptions.snapshot = snap.load();
3421
+ ASSERT_NE(nullptr, roptions.snapshot);
3422
+ PinnableSlice value2;
3423
+ auto s =
3424
+ db->Get(roptions, db->DefaultColumnFamily(), "key2", &value2);
3425
+ ASSERT_OK(s);
3426
+ // It should see its own write
3427
+ ASSERT_TRUE(val_str == value2);
3428
+ // The value read by snapshot should not change
3429
+ ASSERT_STREQ(value2.ToString().c_str(), value.ToString().c_str());
3430
+
3431
+ db->ReleaseSnapshot(roptions.snapshot);
3432
+ snap.store(nullptr);
3433
+ }
3434
+ });
3435
+ write_thread.join();
3436
+ eviction_thread.join();
3437
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3438
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
3439
+ }
3440
+ }
3441
+ }
3442
+
3443
+ // Test that updating the commit map will not affect the existing snapshots
3444
+ TEST_P(WritePreparedTransactionTest, AtomicCommit) {
3445
+ for (bool skip_prepare : {true, false}) {
3446
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
3447
+ {"WritePreparedTxnDB::AddCommitted:start",
3448
+ "AtomicCommit::GetSnapshot:start"},
3449
+ {"AtomicCommit::Get:end",
3450
+ "WritePreparedTxnDB::AddCommitted:start:pause"},
3451
+ {"WritePreparedTxnDB::AddCommitted:end", "AtomicCommit::Get2:start"},
3452
+ {"AtomicCommit::Get2:end",
3453
+ "WritePreparedTxnDB::AddCommitted:end:pause:"},
3454
+ });
3455
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3456
+ ROCKSDB_NAMESPACE::port::Thread write_thread([&]() {
3457
+ if (skip_prepare) {
3458
+ ASSERT_OK(db->Put(WriteOptions(), Slice("key"), Slice("value")));
3459
+ } else {
3460
+ Transaction* txn =
3461
+ db->BeginTransaction(WriteOptions(), TransactionOptions());
3462
+ ASSERT_OK(txn->SetName("xid"));
3463
+ ASSERT_OK(txn->Put(Slice("key"), Slice("value")));
3464
+ ASSERT_OK(txn->Prepare());
3465
+ ASSERT_OK(txn->Commit());
3466
+ delete txn;
3467
+ }
3468
+ });
3469
+ ROCKSDB_NAMESPACE::port::Thread read_thread([&]() {
3470
+ ReadOptions roptions;
3471
+ TEST_SYNC_POINT("AtomicCommit::GetSnapshot:start");
3472
+ roptions.snapshot = db->GetSnapshot();
3473
+ PinnableSlice val;
3474
+ auto s = db->Get(roptions, db->DefaultColumnFamily(), "key", &val);
3475
+ TEST_SYNC_POINT("AtomicCommit::Get:end");
3476
+ TEST_SYNC_POINT("AtomicCommit::Get2:start");
3477
+ ASSERT_SAME(roptions, db, s, val, "key");
3478
+ TEST_SYNC_POINT("AtomicCommit::Get2:end");
3479
+ db->ReleaseSnapshot(roptions.snapshot);
3480
+ });
3481
+ read_thread.join();
3482
+ write_thread.join();
3483
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
3484
+ }
3485
+ }
3486
+
3487
+ // Test that we can change write policy from WriteCommitted to WritePrepared
3488
+ // after a clean shutdown (which would empty the WAL)
3489
+ TEST_P(WritePreparedTransactionTest, WP_WC_DBBackwardCompatibility) {
3490
+ bool empty_wal = true;
3491
+ CrossCompatibilityTest(WRITE_COMMITTED, WRITE_PREPARED, empty_wal);
3492
+ }
3493
+
3494
+ // Test that we fail fast if WAL is not emptied between changing the write
3495
+ // policy from WriteCommitted to WritePrepared
3496
+ TEST_P(WritePreparedTransactionTest, WP_WC_WALBackwardIncompatibility) {
3497
+ bool empty_wal = true;
3498
+ CrossCompatibilityTest(WRITE_COMMITTED, WRITE_PREPARED, !empty_wal);
3499
+ }
3500
+
3501
+ // Test that we can change write policy from WritePrepare back to WriteCommitted
3502
+ // after a clean shutdown (which would empty the WAL)
3503
+ TEST_P(WritePreparedTransactionTest, WC_WP_ForwardCompatibility) {
3504
+ bool empty_wal = true;
3505
+ CrossCompatibilityTest(WRITE_PREPARED, WRITE_COMMITTED, empty_wal);
3506
+ }
3507
+
3508
+ // Test that we fail fast if WAL is not emptied between changing the write
3509
+ // policy from WriteCommitted to WritePrepared
3510
+ TEST_P(WritePreparedTransactionTest, WC_WP_WALForwardIncompatibility) {
3511
+ bool empty_wal = true;
3512
+ CrossCompatibilityTest(WRITE_PREPARED, WRITE_COMMITTED, !empty_wal);
3513
+ }
3514
+
3515
+ } // namespace ROCKSDB_NAMESPACE
3516
+
3517
+ int main(int argc, char** argv) {
3518
+ ::testing::InitGoogleTest(&argc, argv);
3519
+ return RUN_ALL_TESTS();
3520
+ }
3521
+
3522
+ #else
3523
+ #include <stdio.h>
3524
+
3525
+ int main(int /*argc*/, char** /*argv*/) {
3526
+ fprintf(stderr,
3527
+ "SKIPPED as Transactions are not supported in ROCKSDB_LITE\n");
3528
+ return 0;
3529
+ }
3530
+
3531
+ #endif // ROCKSDB_LITE