@nxtedition/rocksdb 8.2.7 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -269,7 +269,13 @@ Status BlobDBImpl::Open(std::vector<ColumnFamilyHandle*>* handles) {
269
269
  // Add trash files in blob dir to file delete scheduler.
270
270
  SstFileManagerImpl* sfm = static_cast<SstFileManagerImpl*>(
271
271
  db_impl_->immutable_db_options().sst_file_manager.get());
272
- DeleteScheduler::CleanupDirectory(env_, sfm, blob_dir_);
272
+ s = DeleteScheduler::CleanupDirectory(env_, sfm, blob_dir_);
273
+ if (!s.ok()) {
274
+ ROCKS_LOG_ERROR(db_options_.info_log,
275
+ "Failed to clean up directory %s, status: %s",
276
+ blob_dir_.c_str(), s.ToString().c_str());
277
+ return s;
278
+ }
273
279
 
274
280
  UpdateLiveSSTSize();
275
281
 
@@ -1142,7 +1148,7 @@ Slice BlobDBImpl::GetCompressedSlice(const Slice& raw,
1142
1148
  StopWatch compression_sw(clock_, statistics_, BLOB_DB_COMPRESSION_MICROS);
1143
1149
  CompressionType type = bdb_options_.compression;
1144
1150
  CompressionOptions opts;
1145
- CompressionContext context(type);
1151
+ CompressionContext context(type, opts);
1146
1152
  CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), type,
1147
1153
  0 /* sample_for_compression */);
1148
1154
  CompressBlock(raw, info, &type, kBlockBasedTableVersionFormat, false,
@@ -1384,28 +1390,46 @@ Status BlobDBImpl::AppendBlob(const std::shared_ptr<BlobFile>& bfile,
1384
1390
  return s;
1385
1391
  }
1386
1392
 
1387
- std::vector<Status> BlobDBImpl::MultiGet(const ReadOptions& read_options,
1393
+ std::vector<Status> BlobDBImpl::MultiGet(const ReadOptions& _read_options,
1388
1394
  const std::vector<Slice>& keys,
1389
1395
  std::vector<std::string>* values) {
1390
1396
  StopWatch multiget_sw(clock_, statistics_, BLOB_DB_MULTIGET_MICROS);
1391
1397
  RecordTick(statistics_, BLOB_DB_NUM_MULTIGET);
1392
1398
  // Get a snapshot to avoid blob file get deleted between we
1393
1399
  // fetch and index entry and reading from the file.
1394
- ReadOptions ro(read_options);
1395
- bool snapshot_created = SetSnapshotIfNeeded(&ro);
1396
-
1397
1400
  std::vector<Status> statuses;
1398
- statuses.reserve(keys.size());
1401
+ std::size_t num_keys = keys.size();
1402
+ statuses.reserve(num_keys);
1403
+
1404
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
1405
+ _read_options.io_activity != Env::IOActivity::kMultiGet) {
1406
+ Status s = Status::InvalidArgument(
1407
+ "Can only call MultiGet with `ReadOptions::io_activity` is "
1408
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
1409
+
1410
+ for (size_t i = 0; i < num_keys; ++i) {
1411
+ statuses.push_back(s);
1412
+ }
1413
+ return statuses;
1414
+ }
1415
+
1416
+ ReadOptions read_options(_read_options);
1417
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
1418
+ read_options.io_activity = Env::IOActivity::kMultiGet;
1419
+ }
1420
+ bool snapshot_created = SetSnapshotIfNeeded(&read_options);
1421
+
1399
1422
  values->clear();
1400
1423
  values->reserve(keys.size());
1401
1424
  PinnableSlice value;
1402
1425
  for (size_t i = 0; i < keys.size(); i++) {
1403
- statuses.push_back(Get(ro, DefaultColumnFamily(), keys[i], &value));
1426
+ statuses.push_back(
1427
+ GetImpl(read_options, DefaultColumnFamily(), keys[i], &value));
1404
1428
  values->push_back(value.ToString());
1405
1429
  value.Reset();
1406
1430
  }
1407
1431
  if (snapshot_created) {
1408
- db_->ReleaseSnapshot(ro.snapshot);
1432
+ db_->ReleaseSnapshot(read_options.snapshot);
1409
1433
  }
1410
1434
  return statuses;
1411
1435
  }
@@ -1544,12 +1568,12 @@ Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number,
1544
1568
  if (reader->use_direct_io()) {
1545
1569
  s = reader->Read(IOOptions(), record_offset,
1546
1570
  static_cast<size_t>(record_size), &blob_record, nullptr,
1547
- &aligned_buf, Env::IO_TOTAL /* rate_limiter_priority */);
1571
+ &aligned_buf);
1548
1572
  } else {
1549
1573
  buf.reserve(static_cast<size_t>(record_size));
1550
1574
  s = reader->Read(IOOptions(), record_offset,
1551
1575
  static_cast<size_t>(record_size), &blob_record, &buf[0],
1552
- nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
1576
+ nullptr);
1553
1577
  }
1554
1578
  RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size());
1555
1579
  }
@@ -1609,16 +1633,36 @@ Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number,
1609
1633
  return Status::OK();
1610
1634
  }
1611
1635
 
1612
- Status BlobDBImpl::Get(const ReadOptions& read_options,
1636
+ Status BlobDBImpl::Get(const ReadOptions& _read_options,
1613
1637
  ColumnFamilyHandle* column_family, const Slice& key,
1614
1638
  PinnableSlice* value) {
1615
- return Get(read_options, column_family, key, value,
1616
- static_cast<uint64_t*>(nullptr) /*expiration*/);
1639
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
1640
+ _read_options.io_activity != Env::IOActivity::kGet) {
1641
+ return Status::InvalidArgument(
1642
+ "Can only call Get with `ReadOptions::io_activity` is "
1643
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
1644
+ }
1645
+ ReadOptions read_options(_read_options);
1646
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
1647
+ read_options.io_activity = Env::IOActivity::kGet;
1648
+ }
1649
+ return GetImpl(read_options, column_family, key, value);
1617
1650
  }
1618
1651
 
1619
- Status BlobDBImpl::Get(const ReadOptions& read_options,
1652
+ Status BlobDBImpl::Get(const ReadOptions& _read_options,
1620
1653
  ColumnFamilyHandle* column_family, const Slice& key,
1621
1654
  PinnableSlice* value, uint64_t* expiration) {
1655
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
1656
+ _read_options.io_activity != Env::IOActivity::kGet) {
1657
+ return Status::InvalidArgument(
1658
+ "Can only call Get with `ReadOptions::io_activity` is "
1659
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
1660
+ }
1661
+ ReadOptions read_options(_read_options);
1662
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
1663
+ read_options.io_activity = Env::IOActivity::kGet;
1664
+ }
1665
+
1622
1666
  StopWatch get_sw(clock_, statistics_, BLOB_DB_GET_MICROS);
1623
1667
  RecordTick(statistics_, BLOB_DB_NUM_GET);
1624
1668
  return GetImpl(read_options, column_family, key, value, expiration);
@@ -1631,11 +1675,6 @@ Status BlobDBImpl::GetImpl(const ReadOptions& read_options,
1631
1675
  return Status::NotSupported(
1632
1676
  "Blob DB doesn't support non-default column family.");
1633
1677
  }
1634
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
1635
- return Status::InvalidArgument(
1636
- "Cannot call Get with `ReadOptions::io_activity` != "
1637
- "`Env::IOActivity::kUnknown`");
1638
- }
1639
1678
  // Get a snapshot to avoid blob file get deleted between we
1640
1679
  // fetch and index entry and reading from the file.
1641
1680
  // TODO(yiwu): For Get() retry if file not found would be a simpler strategy.
@@ -1882,7 +1921,7 @@ std::pair<bool, int64_t> BlobDBImpl::EvictExpiredFiles(bool aborted) {
1882
1921
  }
1883
1922
 
1884
1923
  if (!blob_file->Immutable()) {
1885
- CloseBlobFile(blob_file);
1924
+ CloseBlobFile(blob_file).PermitUncheckedError();
1886
1925
  }
1887
1926
 
1888
1927
  assert(blob_file->Immutable());
@@ -2040,11 +2079,16 @@ void BlobDBImpl::CopyBlobFiles(
2040
2079
  }
2041
2080
  }
2042
2081
 
2043
- Iterator* BlobDBImpl::NewIterator(const ReadOptions& read_options) {
2044
- if (read_options.io_activity != Env::IOActivity::kUnknown) {
2082
+ Iterator* BlobDBImpl::NewIterator(const ReadOptions& _read_options) {
2083
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
2084
+ _read_options.io_activity != Env::IOActivity::kDBIterator) {
2045
2085
  return NewErrorIterator(Status::InvalidArgument(
2046
- "Cannot call NewIterator with `ReadOptions::io_activity` != "
2047
- "`Env::IOActivity::kUnknown`"));
2086
+ "Can only call NewIterator with `ReadOptions::io_activity` is "
2087
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`"));
2088
+ }
2089
+ ReadOptions read_options(_read_options);
2090
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
2091
+ read_options.io_activity = Env::IOActivity::kDBIterator;
2048
2092
  }
2049
2093
  auto* cfd =
2050
2094
  static_cast_with_check<ColumnFamilyHandleImpl>(DefaultColumnFamily())
@@ -103,12 +103,13 @@ class BlobDBImpl : public BlobDB {
103
103
  const Slice& value) override;
104
104
 
105
105
  using BlobDB::Get;
106
- Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family,
107
- const Slice& key, PinnableSlice* value) override;
106
+ Status Get(const ReadOptions& _read_options,
107
+ ColumnFamilyHandle* column_family, const Slice& key,
108
+ PinnableSlice* value) override;
108
109
 
109
- Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family,
110
- const Slice& key, PinnableSlice* value,
111
- uint64_t* expiration) override;
110
+ Status Get(const ReadOptions& _read_options,
111
+ ColumnFamilyHandle* column_family, const Slice& key,
112
+ PinnableSlice* value, uint64_t* expiration) override;
112
113
 
113
114
  using BlobDB::NewIterator;
114
115
  virtual Iterator* NewIterator(const ReadOptions& read_options) override;
@@ -123,7 +124,7 @@ class BlobDBImpl : public BlobDB {
123
124
 
124
125
  using BlobDB::MultiGet;
125
126
  virtual std::vector<Status> MultiGet(
126
- const ReadOptions& read_options, const std::vector<Slice>& keys,
127
+ const ReadOptions& _read_options, const std::vector<Slice>& keys,
127
128
  std::vector<std::string>* values) override;
128
129
 
129
130
  using BlobDB::Write;
@@ -22,7 +22,7 @@ class BlobDBListener : public EventListener {
22
22
 
23
23
  void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*info*/) override {
24
24
  assert(blob_db_impl_ != nullptr);
25
- blob_db_impl_->SyncBlobFiles();
25
+ blob_db_impl_->SyncBlobFiles().PermitUncheckedError();
26
26
  }
27
27
 
28
28
  void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& /*info*/) override {
@@ -102,8 +102,8 @@ Status BlobDumpTool::Read(uint64_t offset, size_t size, Slice* result) {
102
102
  }
103
103
  buffer_.reset(new char[buffer_size_]);
104
104
  }
105
- Status s = reader_->Read(IOOptions(), offset, size, result, buffer_.get(),
106
- nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
105
+ Status s =
106
+ reader_->Read(IOOptions(), offset, size, result, buffer_.get(), nullptr);
107
107
  if (!s.ok()) {
108
108
  return s;
109
109
  }
@@ -277,4 +277,3 @@ std::string BlobDumpTool::GetString(std::pair<T, T> p) {
277
277
 
278
278
  } // namespace blob_db
279
279
  } // namespace ROCKSDB_NAMESPACE
280
-
@@ -114,13 +114,11 @@ Status BlobFile::ReadFooter(BlobLogFooter* bf) {
114
114
  // TODO: rate limit reading footers from blob files.
115
115
  if (ra_file_reader_->use_direct_io()) {
116
116
  s = ra_file_reader_->Read(IOOptions(), footer_offset, BlobLogFooter::kSize,
117
- &result, nullptr, &aligned_buf,
118
- Env::IO_TOTAL /* rate_limiter_priority */);
117
+ &result, nullptr, &aligned_buf);
119
118
  } else {
120
119
  buf.reserve(BlobLogFooter::kSize + 10);
121
120
  s = ra_file_reader_->Read(IOOptions(), footer_offset, BlobLogFooter::kSize,
122
- &result, &buf[0], nullptr,
123
- Env::IO_TOTAL /* rate_limiter_priority */);
121
+ &result, &buf[0], nullptr);
124
122
  }
125
123
  if (!s.ok()) return s;
126
124
  if (result.size() != BlobLogFooter::kSize) {
@@ -238,13 +236,11 @@ Status BlobFile::ReadMetadata(const std::shared_ptr<FileSystem>& fs,
238
236
  // TODO: rate limit reading headers from blob files.
239
237
  if (file_reader->use_direct_io()) {
240
238
  s = file_reader->Read(IOOptions(), 0, BlobLogHeader::kSize, &header_slice,
241
- nullptr, &aligned_buf,
242
- Env::IO_TOTAL /* rate_limiter_priority */);
239
+ nullptr, &aligned_buf);
243
240
  } else {
244
241
  header_buf.reserve(BlobLogHeader::kSize);
245
242
  s = file_reader->Read(IOOptions(), 0, BlobLogHeader::kSize, &header_slice,
246
- &header_buf[0], nullptr,
247
- Env::IO_TOTAL /* rate_limiter_priority */);
243
+ &header_buf[0], nullptr);
248
244
  }
249
245
  if (!s.ok()) {
250
246
  ROCKS_LOG_ERROR(
@@ -281,13 +277,12 @@ Status BlobFile::ReadMetadata(const std::shared_ptr<FileSystem>& fs,
281
277
  if (file_reader->use_direct_io()) {
282
278
  s = file_reader->Read(IOOptions(), file_size - BlobLogFooter::kSize,
283
279
  BlobLogFooter::kSize, &footer_slice, nullptr,
284
- &aligned_buf,
285
- Env::IO_TOTAL /* rate_limiter_priority */);
280
+ &aligned_buf);
286
281
  } else {
287
282
  footer_buf.reserve(BlobLogFooter::kSize);
288
283
  s = file_reader->Read(IOOptions(), file_size - BlobLogFooter::kSize,
289
284
  BlobLogFooter::kSize, &footer_slice, &footer_buf[0],
290
- nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
285
+ nullptr);
291
286
  }
292
287
  if (!s.ok()) {
293
288
  ROCKS_LOG_ERROR(
@@ -249,8 +249,7 @@ class FromFileCacheDumpReader : public CacheDumpReader {
249
249
 
250
250
  while (to_read > 0) {
251
251
  io_s = file_reader_->Read(IOOptions(), offset_, to_read, &result_,
252
- buffer_, nullptr,
253
- Env::IO_TOTAL /* rate_limiter_priority */);
252
+ buffer_, nullptr);
254
253
  if (!io_s.ok()) {
255
254
  return io_s;
256
255
  }
@@ -460,8 +460,8 @@ TEST_F(CheckpointTest, CheckpointCF) {
460
460
  Options options = CurrentOptions();
461
461
  CreateAndReopenWithCF({"one", "two", "three", "four", "five"}, options);
462
462
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
463
- {{"CheckpointTest::CheckpointCF:2", "DBImpl::GetLiveFiles:2"},
464
- {"DBImpl::GetLiveFiles:1", "CheckpointTest::CheckpointCF:1"}});
463
+ {{"CheckpointTest::CheckpointCF:2", "DBImpl::FlushAllColumnFamilies:2"},
464
+ {"DBImpl::FlushAllColumnFamilies:1", "CheckpointTest::CheckpointCF:1"}});
465
465
 
466
466
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
467
467
 
@@ -925,7 +925,7 @@ TEST_F(CheckpointTest, CheckpointWithDbPath) {
925
925
  options.db_paths.emplace_back(dbname_ + "_2", 0);
926
926
  Reopen(options);
927
927
  ASSERT_OK(Put("key1", "val1"));
928
- Flush();
928
+ ASSERT_OK(Flush());
929
929
  Checkpoint* checkpoint;
930
930
  ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
931
931
  // Currently not supported
@@ -968,7 +968,7 @@ TEST_F(CheckpointTest, PutRaceWithCheckpointTrackedWalSync) {
968
968
 
969
969
  // Simulate full loss of unsynced data. This drops "key2" -> "val2" from the
970
970
  // DB WAL.
971
- fault_env->DropUnsyncedFileData();
971
+ ASSERT_OK(fault_env->DropUnsyncedFileData());
972
972
 
973
973
  // Before the bug fix, reopening the DB would fail because the MANIFEST's
974
974
  // AddWal entry indicated the WAL should be synced through "key2" -> "val2".
@@ -985,4 +985,3 @@ int main(int argc, char** argv) {
985
985
  ::testing::InitGoogleTest(&argc, argv);
986
986
  return RUN_ALL_TESTS();
987
987
  }
988
-
@@ -242,6 +242,7 @@ IOStatus TestFSWritableFile::PositionedAppend(
242
242
  IOStatus TestFSWritableFile::Close(const IOOptions& options,
243
243
  IODebugContext* dbg) {
244
244
  MutexLock l(&mutex_);
245
+ fs_->WritableFileClosed(state_);
245
246
  if (!fs_->IsFilesystemActive()) {
246
247
  return fs_->GetError();
247
248
  }
@@ -263,7 +264,6 @@ IOStatus TestFSWritableFile::Close(const IOOptions& options,
263
264
  io_s = target_->Close(options, dbg);
264
265
  }
265
266
  if (io_s.ok()) {
266
- fs_->WritableFileClosed(state_);
267
267
  IOStatus in_s = fs_->InjectMetadataWriteError();
268
268
  if (!in_s.ok()) {
269
269
  return in_s;
@@ -78,13 +78,13 @@ FaultInjectionSecondaryCache::GetErrorContext() {
78
78
 
79
79
  Status FaultInjectionSecondaryCache::Insert(
80
80
  const Slice& key, Cache::ObjectPtr value,
81
- const Cache::CacheItemHelper* helper) {
81
+ const Cache::CacheItemHelper* helper, bool force_insert) {
82
82
  ErrorContext* ctx = GetErrorContext();
83
83
  if (ctx->rand.OneIn(prob_)) {
84
84
  return Status::IOError();
85
85
  }
86
86
 
87
- return base_->Insert(key, value, helper);
87
+ return base_->Insert(key, value, helper, force_insert);
88
88
  }
89
89
 
90
90
  std::unique_ptr<SecondaryCacheResultHandle>
@@ -32,7 +32,8 @@ class FaultInjectionSecondaryCache : public SecondaryCache {
32
32
  const char* Name() const override { return "FaultInjectionSecondaryCache"; }
33
33
 
34
34
  Status Insert(const Slice& key, Cache::ObjectPtr value,
35
- const Cache::CacheItemHelper* helper) override;
35
+ const Cache::CacheItemHelper* helper,
36
+ bool force_insert) override;
36
37
 
37
38
  std::unique_ptr<SecondaryCacheResultHandle> Lookup(
38
39
  const Slice& key, const Cache::CacheItemHelper* helper,
@@ -229,7 +229,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate3) {
229
229
  for (int i = 0; i < 50; i++) {
230
230
  ASSERT_OK(Put(Key(num * 100 + i), rnd.RandomString(900)));
231
231
  }
232
- Flush();
232
+ ASSERT_OK(Flush());
233
233
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
234
234
  if (num == 9) {
235
235
  // Issue a full compaction to generate some zero-out files
@@ -313,7 +313,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate4) {
313
313
  for (int i = 0; i < 50; i++) {
314
314
  ASSERT_OK(Put(Key(num * 100 + i), rnd.RandomString(900)));
315
315
  }
316
- Flush();
316
+ ASSERT_OK(Flush());
317
317
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
318
318
  if (num == 9) {
319
319
  // Issue a full compaction to generate some zero-out files
@@ -496,7 +496,7 @@ TEST_F(DBOptionChangeMigrationTest, CompactedSrcToUniversal) {
496
496
  ASSERT_OK(Put(Key(num * 100 + i), rnd.RandomString(900)));
497
497
  }
498
498
  }
499
- Flush();
499
+ ASSERT_OK(Flush());
500
500
  CompactRangeOptions cro;
501
501
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
502
502
  ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
@@ -236,7 +236,7 @@ bool RandomAccessCacheFile::Read(const LBA& lba, Slice* key, Slice* val,
236
236
 
237
237
  Slice result;
238
238
  Status s = freader_->Read(IOOptions(), lba.off_, lba.size_, &result, scratch,
239
- nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
239
+ nullptr);
240
240
  if (!s.ok()) {
241
241
  Error(log_, "Error reading from file %s. %s", Path().c_str(),
242
242
  s.ToString().c_str());
@@ -605,4 +605,3 @@ void ThreadedWriter::DispatchIO(const IO& io) {
605
605
  }
606
606
 
607
607
  } // namespace ROCKSDB_NAMESPACE
608
-
@@ -42,8 +42,7 @@ Status FileTraceReader::Reset() {
42
42
  Status FileTraceReader::Read(std::string* data) {
43
43
  assert(file_reader_ != nullptr);
44
44
  Status s = file_reader_->Read(IOOptions(), offset_, kTraceMetadataSize,
45
- &result_, buffer_, nullptr,
46
- Env::IO_TOTAL /* rate_limiter_priority */);
45
+ &result_, buffer_, nullptr);
47
46
  if (!s.ok()) {
48
47
  return s;
49
48
  }
@@ -68,7 +67,7 @@ Status FileTraceReader::Read(std::string* data) {
68
67
  bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read;
69
68
  while (to_read > 0) {
70
69
  s = file_reader_->Read(IOOptions(), offset_, to_read, &result_, buffer_,
71
- nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
70
+ nullptr);
72
71
  if (!s.ok()) {
73
72
  return s;
74
73
  }
@@ -128,14 +128,14 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
128
128
  port::Thread t1 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
129
129
  ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
130
130
  // block because txn1 is holding a lock on k1.
131
- locker_->TryLock(txn2, 1, "k1", env_, true);
131
+ ASSERT_OK(locker_->TryLock(txn2, 1, "k1", env_, true));
132
132
  });
133
133
 
134
134
  ASSERT_OK(locker_->TryLock(txn3, 1, "k3", env_, true));
135
135
 
136
136
  port::Thread t2 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
137
137
  // block because txn3 is holding a lock on k1.
138
- locker_->TryLock(txn4, 1, "k3", env_, true);
138
+ ASSERT_OK(locker_->TryLock(txn4, 1, "k3", env_, true));
139
139
  });
140
140
 
141
141
  auto s = locker_->TryLock(txn3, 1, "k2", env_, true);
@@ -244,7 +244,7 @@ TEST_P(AnyLockManagerTest, Deadlock) {
244
244
  // txn1 tries to lock k2, will block forever.
245
245
  port::Thread t = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
246
246
  // block because txn2 is holding a lock on k2.
247
- locker_->TryLock(txn1, 1, "k2", env_, true);
247
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true));
248
248
  });
249
249
 
250
250
  auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
@@ -6,6 +6,7 @@
6
6
 
7
7
  #include "utilities/transactions/optimistic_transaction.h"
8
8
 
9
+ #include <cstdint>
9
10
  #include <string>
10
11
 
11
12
  #include "db/column_family.h"
@@ -15,6 +16,7 @@
15
16
  #include "rocksdb/status.h"
16
17
  #include "rocksdb/utilities/optimistic_transaction_db.h"
17
18
  #include "util/cast_util.h"
19
+ #include "util/defer.h"
18
20
  #include "util/string_util.h"
19
21
  #include "utilities/transactions/lock/point/point_lock_tracker.h"
20
22
  #include "utilities/transactions/optimistic_transaction.h"
@@ -96,28 +98,42 @@ Status OptimisticTransaction::CommitWithParallelValidate() {
96
98
  assert(txn_db_impl);
97
99
  DBImpl* db_impl = static_cast_with_check<DBImpl>(db_->GetRootDB());
98
100
  assert(db_impl);
99
- const size_t space = txn_db_impl->GetLockBucketsSize();
100
- std::set<size_t> lk_idxes;
101
- std::vector<std::unique_lock<std::mutex>> lks;
101
+ std::set<port::Mutex*> lk_ptrs;
102
102
  std::unique_ptr<LockTracker::ColumnFamilyIterator> cf_it(
103
103
  tracked_locks_->GetColumnFamilyIterator());
104
104
  assert(cf_it != nullptr);
105
105
  while (cf_it->HasNext()) {
106
106
  ColumnFamilyId cf = cf_it->Next();
107
+
108
+ // To avoid the same key(s) contending across CFs or DBs, seed the
109
+ // hash independently.
110
+ uint64_t seed = reinterpret_cast<uintptr_t>(db_impl) +
111
+ uint64_t{0xb83c07fbc6ced699} /*random prime*/ * cf;
112
+
107
113
  std::unique_ptr<LockTracker::KeyIterator> key_it(
108
114
  tracked_locks_->GetKeyIterator(cf));
109
115
  assert(key_it != nullptr);
110
116
  while (key_it->HasNext()) {
111
- const std::string& key = key_it->Next();
112
- lk_idxes.insert(FastRange64(GetSliceNPHash64(key), space));
117
+ auto lock_bucket_ptr = &txn_db_impl->GetLockBucket(key_it->Next(), seed);
118
+ TEST_SYNC_POINT_CALLBACK(
119
+ "OptimisticTransaction::CommitWithParallelValidate::lock_bucket_ptr",
120
+ lock_bucket_ptr);
121
+ lk_ptrs.insert(lock_bucket_ptr);
113
122
  }
114
123
  }
115
124
  // NOTE: in a single txn, all bucket-locks are taken in ascending order.
116
125
  // In this way, txns from different threads all obey this rule so that
117
126
  // deadlock can be avoided.
118
- for (auto v : lk_idxes) {
119
- lks.emplace_back(txn_db_impl->LockBucket(v));
127
+ for (auto v : lk_ptrs) {
128
+ // WART: if an exception is thrown during a Lock(), previously locked will
129
+ // not be Unlock()ed. But a vector of MutexLock is likely inefficient.
130
+ v->Lock();
120
131
  }
132
+ Defer unlocks([&]() {
133
+ for (auto v : lk_ptrs) {
134
+ v->Unlock();
135
+ }
136
+ });
121
137
 
122
138
  Status s = TransactionUtil::CheckKeysForConflicts(db_impl, *tracked_locks_,
123
139
  true /* cache_only */);
@@ -191,4 +207,3 @@ Status OptimisticTransaction::SetName(const TransactionName& /* unused */) {
191
207
  }
192
208
 
193
209
  } // namespace ROCKSDB_NAMESPACE
194
-
@@ -17,6 +17,15 @@
17
17
 
18
18
  namespace ROCKSDB_NAMESPACE {
19
19
 
20
+ std::shared_ptr<OccLockBuckets> MakeSharedOccLockBuckets(size_t bucket_count,
21
+ bool cache_aligned) {
22
+ if (cache_aligned) {
23
+ return std::make_shared<OccLockBucketsImpl<true>>(bucket_count);
24
+ } else {
25
+ return std::make_shared<OccLockBucketsImpl<false>>(bucket_count);
26
+ }
27
+ }
28
+
20
29
  Transaction* OptimisticTransactionDBImpl::BeginTransaction(
21
30
  const WriteOptions& write_options,
22
31
  const OptimisticTransactionOptions& txn_options, Transaction* old_txn) {
@@ -28,12 +37,6 @@ Transaction* OptimisticTransactionDBImpl::BeginTransaction(
28
37
  }
29
38
  }
30
39
 
31
- std::unique_lock<std::mutex> OptimisticTransactionDBImpl::LockBucket(
32
- size_t idx) {
33
- assert(idx < bucketed_locks_.size());
34
- return std::unique_lock<std::mutex>(*bucketed_locks_[idx]);
35
- }
36
-
37
40
  Status OptimisticTransactionDB::Open(const Options& options,
38
41
  const std::string& dbname,
39
42
  OptimisticTransactionDB** dbptr) {
@@ -6,15 +6,41 @@
6
6
  #pragma once
7
7
 
8
8
  #include <algorithm>
9
- #include <mutex>
9
+ #include <cstdint>
10
+ #include <memory>
10
11
  #include <vector>
11
12
 
12
13
  #include "rocksdb/db.h"
13
14
  #include "rocksdb/options.h"
14
15
  #include "rocksdb/utilities/optimistic_transaction_db.h"
16
+ #include "util/cast_util.h"
17
+ #include "util/mutexlock.h"
15
18
 
16
19
  namespace ROCKSDB_NAMESPACE {
17
20
 
21
+ class OccLockBucketsImplBase : public OccLockBuckets {
22
+ public:
23
+ virtual port::Mutex& GetLockBucket(const Slice& key, uint64_t seed) = 0;
24
+ };
25
+
26
+ template <bool cache_aligned>
27
+ class OccLockBucketsImpl : public OccLockBucketsImplBase {
28
+ public:
29
+ explicit OccLockBucketsImpl(size_t bucket_count) : locks_(bucket_count) {}
30
+ port::Mutex& GetLockBucket(const Slice& key, uint64_t seed) override {
31
+ return locks_.Get(key, seed);
32
+ }
33
+ size_t ApproximateMemoryUsage() const override {
34
+ return locks_.ApproximateMemoryUsage();
35
+ }
36
+
37
+ private:
38
+ // TODO: investigate optionally using folly::MicroLock to majorly save space
39
+ using M = std::conditional_t<cache_aligned, CacheAlignedWrapper<port::Mutex>,
40
+ port::Mutex>;
41
+ Striped<M> locks_;
42
+ };
43
+
18
44
  class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
19
45
  public:
20
46
  explicit OptimisticTransactionDBImpl(
@@ -24,12 +50,13 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
24
50
  db_owner_(take_ownership),
25
51
  validate_policy_(occ_options.validate_policy) {
26
52
  if (validate_policy_ == OccValidationPolicy::kValidateParallel) {
27
- uint32_t bucket_size = std::max(16u, occ_options.occ_lock_buckets);
28
- bucketed_locks_.reserve(bucket_size);
29
- for (size_t i = 0; i < bucket_size; ++i) {
30
- bucketed_locks_.emplace_back(
31
- std::unique_ptr<std::mutex>(new std::mutex));
53
+ auto bucketed_locks = occ_options.shared_lock_buckets;
54
+ if (!bucketed_locks) {
55
+ uint32_t bucket_count = std::max(16u, occ_options.occ_lock_buckets);
56
+ bucketed_locks = MakeSharedOccLockBuckets(bucket_count);
32
57
  }
58
+ bucketed_locks_ = static_cast_with_check<OccLockBucketsImplBase>(
59
+ std::move(bucketed_locks));
33
60
  }
34
61
  }
35
62
 
@@ -62,16 +89,14 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
62
89
  return OptimisticTransactionDB::Write(write_opts, batch);
63
90
  }
64
91
 
65
- size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
66
-
67
92
  OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }
68
93
 
69
- std::unique_lock<std::mutex> LockBucket(size_t idx);
94
+ port::Mutex& GetLockBucket(const Slice& key, uint64_t seed) {
95
+ return bucketed_locks_->GetLockBucket(key, seed);
96
+ }
70
97
 
71
98
  private:
72
- // NOTE: used in validation phase. Each key is hashed into some
73
- // bucket. We then take the lock in the hash value order to avoid deadlock.
74
- std::vector<std::unique_ptr<std::mutex>> bucketed_locks_;
99
+ std::shared_ptr<OccLockBucketsImplBase> bucketed_locks_;
75
100
 
76
101
  bool db_owner_;
77
102